From a35d66c606c7fc9f55dce71447f1465fec5830b6 Mon Sep 17 00:00:00 2001 From: Samuel Beaulieu Date: Wed, 2 Sep 2020 11:12:29 -0500 Subject: [PATCH 1/2] (POOLER-184) Pool manager retry and exit on failure Adding a reconnect retry for redis, which by default would retry 10 times, for a total wait time of ~80 seconds --- bin/vmpooler | 3 ++- lib/vmpooler.rb | 9 +++++---- spec/unit/generic_connection_pool_spec.rb | 7 +++++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/bin/vmpooler b/bin/vmpooler index 139d79c6c..960988383 100755 --- a/bin/vmpooler +++ b/bin/vmpooler @@ -9,6 +9,7 @@ redis_port = config[:redis]['port'] redis_password = config[:redis]['password'] redis_connection_pool_size = config[:redis]['connection_pool_size'] redis_connection_pool_timeout = config[:redis]['connection_pool_timeout'] +redis_reconnect_attempts = config[:redis]['reconnect_attempts'] || 10 logger_file = config[:config]['logfile'] logger = Vmpooler::Logger.new logger_file @@ -43,7 +44,7 @@ if torun.include? :manager Vmpooler::PoolManager.new( config, logger, - Vmpooler.redis_connection_pool(redis_host, redis_port, redis_password, redis_connection_pool_size, redis_connection_pool_timeout, metrics), + Vmpooler.redis_connection_pool(redis_host, redis_port, redis_password, redis_connection_pool_size, redis_connection_pool_timeout, metrics, redis_reconnect_attempts), metrics ).execute! end diff --git a/lib/vmpooler.rb b/lib/vmpooler.rb index 276bfa7e8..729ba4744 100644 --- a/lib/vmpooler.rb +++ b/lib/vmpooler.rb @@ -164,7 +164,7 @@ def self.load_pools_from_redis(redis) pools end - def self.redis_connection_pool(host, port, password, size, timeout, metrics) + def self.redis_connection_pool(host, port, password, size, timeout, metrics, redis_reconnect_attempts = 0) Vmpooler::PoolManager::GenericConnectionPool.new( metrics: metrics, connpool_type: 'redis_connection_pool', @@ -173,13 +173,14 @@ def self.redis_connection_pool(host, port, password, size, timeout, metrics) timeout: timeout ) do connection = Concurrent::Hash.new - redis = new_redis(host, port, password) + redis = new_redis(host, port, password, redis_reconnect_attempts) connection['connection'] = redis end end - def self.new_redis(host = 'localhost', port = nil, password = nil) - Redis.new(host: host, port: port, password: password) + def self.new_redis(host = 'localhost', port = nil, password = nil, redis_reconnect_attempts = 10) + Redis.new(host: host, port: port, password: password, reconnect_attempts: redis_reconnect_attempts, reconnect_delay: 1.5, + reconnect_delay_max: 10.0) end def self.pools(conf) diff --git a/spec/unit/generic_connection_pool_spec.rb b/spec/unit/generic_connection_pool_spec.rb index 5c2421578..4777795c9 100644 --- a/spec/unit/generic_connection_pool_spec.rb +++ b/spec/unit/generic_connection_pool_spec.rb @@ -25,6 +25,13 @@ connection: 'connection' }} + it 'should error out when connection pool could not establish no nothing' do + newsub = Vmpooler.redis_connection_pool("foo,bar", "1234", "fuba", 1, 1, metrics, 0) + expect { newsub.with_metrics do |conn_pool_object| + conn_pool_object.srem('foo', "bar") + end }.to raise_error Redis::CannotConnectError + end + it 'should return a connection object when grabbing one from the pool' do subject.with_metrics do |conn_pool_object| expect(conn_pool_object).to be(connection_object) From 1c53625fd8348cf46b5f326adcb8d5b61fde9b5a Mon Sep 17 00:00:00 2001 From: Samuel Beaulieu Date: Thu, 3 Sep 2020 08:49:45 -0500 Subject: [PATCH 2/2] add the default in the standard location, also adding support for an ENV var. Update the example yaml file with the new config --- bin/vmpooler | 2 +- lib/vmpooler.rb | 81 +++++++++++++++++++++---------------------- vmpooler.yaml.example | 6 ++++ 3 files changed, 47 insertions(+), 42 deletions(-) diff --git a/bin/vmpooler b/bin/vmpooler index 960988383..4d8af4556 100755 --- a/bin/vmpooler +++ b/bin/vmpooler @@ -9,7 +9,7 @@ redis_port = config[:redis]['port'] redis_password = config[:redis]['password'] redis_connection_pool_size = config[:redis]['connection_pool_size'] redis_connection_pool_timeout = config[:redis]['connection_pool_timeout'] -redis_reconnect_attempts = config[:redis]['reconnect_attempts'] || 10 +redis_reconnect_attempts = config[:redis]['reconnect_attempts'] logger_file = config[:config]['logfile'] logger = Vmpooler::Logger.new logger_file diff --git a/lib/vmpooler.rb b/lib/vmpooler.rb index 729ba4744..d6e55224e 100644 --- a/lib/vmpooler.rb +++ b/lib/vmpooler.rb @@ -58,59 +58,58 @@ def self.config(filepath = 'vmpooler.yaml') end # Set some configuration defaults - parsed_config[:config]['task_limit'] = string_to_int(ENV['TASK_LIMIT']) || parsed_config[:config]['task_limit'] || 10 - parsed_config[:config]['ondemand_clone_limit'] = string_to_int(ENV['ONDEMAND_CLONE_LIMIT']) || parsed_config[:config]['ondemand_clone_limit'] || 10 + parsed_config[:config]['task_limit'] = string_to_int(ENV['TASK_LIMIT']) || parsed_config[:config]['task_limit'] || 10 + parsed_config[:config]['ondemand_clone_limit'] = string_to_int(ENV['ONDEMAND_CLONE_LIMIT']) || parsed_config[:config]['ondemand_clone_limit'] || 10 parsed_config[:config]['max_ondemand_instances_per_request'] = string_to_int(ENV['MAX_ONDEMAND_INSTANCES_PER_REQUEST']) || parsed_config[:config]['max_ondemand_instances_per_request'] || 10 - parsed_config[:config]['migration_limit'] = string_to_int(ENV['MIGRATION_LIMIT']) if ENV['MIGRATION_LIMIT'] - parsed_config[:config]['vm_checktime'] = string_to_int(ENV['VM_CHECKTIME']) || parsed_config[:config]['vm_checktime'] || 1 - parsed_config[:config]['vm_lifetime'] = string_to_int(ENV['VM_LIFETIME']) || parsed_config[:config]['vm_lifetime'] || 24 - parsed_config[:config]['max_lifetime_upper_limit'] = string_to_int(ENV['MAX_LIFETIME_UPPER_LIMIT']) || parsed_config[:config]['max_lifetime_upper_limit'] - parsed_config[:config]['ready_ttl'] = string_to_int(ENV['READY_TTL']) || parsed_config[:config]['ready_ttl'] || 60 - parsed_config[:config]['ondemand_request_ttl'] = string_to_int(ENV['ONDEMAND_REQUEST_TTL']) || parsed_config[:config]['ondemand_request_ttl'] || 5 - parsed_config[:config]['prefix'] = ENV['PREFIX'] || parsed_config[:config]['prefix'] || '' - - parsed_config[:config]['logfile'] = ENV['LOGFILE'] if ENV['LOGFILE'] - - parsed_config[:config]['site_name'] = ENV['SITE_NAME'] if ENV['SITE_NAME'] - parsed_config[:config]['domain'] = ENV['DOMAIN'] if ENV['DOMAIN'] - parsed_config[:config]['clone_target'] = ENV['CLONE_TARGET'] if ENV['CLONE_TARGET'] - parsed_config[:config]['timeout'] = string_to_int(ENV['TIMEOUT']) if ENV['TIMEOUT'] - parsed_config[:config]['vm_lifetime_auth'] = string_to_int(ENV['VM_LIFETIME_AUTH']) if ENV['VM_LIFETIME_AUTH'] - parsed_config[:config]['max_tries'] = string_to_int(ENV['MAX_TRIES']) if ENV['MAX_TRIES'] - parsed_config[:config]['retry_factor'] = string_to_int(ENV['RETRY_FACTOR']) if ENV['RETRY_FACTOR'] - parsed_config[:config]['create_folders'] = true?(ENV['CREATE_FOLDERS']) if ENV['CREATE_FOLDERS'] - parsed_config[:config]['create_template_delta_disks'] = ENV['CREATE_TEMPLATE_DELTA_DISKS'] if ENV['CREATE_TEMPLATE_DELTA_DISKS'] + parsed_config[:config]['migration_limit'] = string_to_int(ENV['MIGRATION_LIMIT']) if ENV['MIGRATION_LIMIT'] + parsed_config[:config]['vm_checktime'] = string_to_int(ENV['VM_CHECKTIME']) || parsed_config[:config]['vm_checktime'] || 1 + parsed_config[:config]['vm_lifetime'] = string_to_int(ENV['VM_LIFETIME']) || parsed_config[:config]['vm_lifetime'] || 24 + parsed_config[:config]['max_lifetime_upper_limit'] = string_to_int(ENV['MAX_LIFETIME_UPPER_LIMIT']) || parsed_config[:config]['max_lifetime_upper_limit'] + parsed_config[:config]['ready_ttl'] = string_to_int(ENV['READY_TTL']) || parsed_config[:config]['ready_ttl'] || 60 + parsed_config[:config]['ondemand_request_ttl'] = string_to_int(ENV['ONDEMAND_REQUEST_TTL']) || parsed_config[:config]['ondemand_request_ttl'] || 5 + parsed_config[:config]['prefix'] = ENV['PREFIX'] || parsed_config[:config]['prefix'] || '' + parsed_config[:config]['logfile'] = ENV['LOGFILE'] if ENV['LOGFILE'] + parsed_config[:config]['site_name'] = ENV['SITE_NAME'] if ENV['SITE_NAME'] + parsed_config[:config]['domain'] = ENV['DOMAIN'] if ENV['DOMAIN'] + parsed_config[:config]['clone_target'] = ENV['CLONE_TARGET'] if ENV['CLONE_TARGET'] + parsed_config[:config]['timeout'] = string_to_int(ENV['TIMEOUT']) if ENV['TIMEOUT'] + parsed_config[:config]['vm_lifetime_auth'] = string_to_int(ENV['VM_LIFETIME_AUTH']) if ENV['VM_LIFETIME_AUTH'] + parsed_config[:config]['max_tries'] = string_to_int(ENV['MAX_TRIES']) if ENV['MAX_TRIES'] + parsed_config[:config]['retry_factor'] = string_to_int(ENV['RETRY_FACTOR']) if ENV['RETRY_FACTOR'] + parsed_config[:config]['create_folders'] = true?(ENV['CREATE_FOLDERS']) if ENV['CREATE_FOLDERS'] + parsed_config[:config]['experimental_features'] = ENV['EXPERIMENTAL_FEATURES'] if ENV['EXPERIMENTAL_FEATURES'] + parsed_config[:config]['purge_unconfigured_folders'] = ENV['PURGE_UNCONFIGURED_FOLDERS'] if ENV['PURGE_UNCONFIGURED_FOLDERS'] + parsed_config[:config]['usage_stats'] = ENV['USAGE_STATS'] if ENV['USAGE_STATS'] + parsed_config[:config]['request_logger'] = ENV['REQUEST_LOGGER'] if ENV['REQUEST_LOGGER'] + parsed_config[:config]['create_template_delta_disks'] = ENV['CREATE_TEMPLATE_DELTA_DISKS'] if ENV['CREATE_TEMPLATE_DELTA_DISKS'] set_linked_clone(parsed_config) - parsed_config[:config]['experimental_features'] = ENV['EXPERIMENTAL_FEATURES'] if ENV['EXPERIMENTAL_FEATURES'] - parsed_config[:config]['purge_unconfigured_folders'] = ENV['PURGE_UNCONFIGURED_FOLDERS'] if ENV['PURGE_UNCONFIGURED_FOLDERS'] - parsed_config[:config]['usage_stats'] = ENV['USAGE_STATS'] if ENV['USAGE_STATS'] - parsed_config[:config]['request_logger'] = ENV['REQUEST_LOGGER'] if ENV['REQUEST_LOGGER'] - - parsed_config[:redis] = parsed_config[:redis] || {} - parsed_config[:redis]['server'] = ENV['REDIS_SERVER'] || parsed_config[:redis]['server'] || 'localhost' - parsed_config[:redis]['port'] = string_to_int(ENV['REDIS_PORT']) if ENV['REDIS_PORT'] - parsed_config[:redis]['password'] = ENV['REDIS_PASSWORD'] if ENV['REDIS_PASSWORD'] - parsed_config[:redis]['data_ttl'] = string_to_int(ENV['REDIS_DATA_TTL']) || parsed_config[:redis]['data_ttl'] || 168 - parsed_config[:redis]['connection_pool_size'] = string_to_int(ENV['REDIS_CONNECTION_POOL_SIZE']) || parsed_config[:redis]['connection_pool_size'] || 10 + + parsed_config[:redis] = parsed_config[:redis] || {} + parsed_config[:redis]['server'] = ENV['REDIS_SERVER'] || parsed_config[:redis]['server'] || 'localhost' + parsed_config[:redis]['port'] = string_to_int(ENV['REDIS_PORT']) if ENV['REDIS_PORT'] + parsed_config[:redis]['password'] = ENV['REDIS_PASSWORD'] if ENV['REDIS_PASSWORD'] + parsed_config[:redis]['data_ttl'] = string_to_int(ENV['REDIS_DATA_TTL']) || parsed_config[:redis]['data_ttl'] || 168 + parsed_config[:redis]['connection_pool_size'] = string_to_int(ENV['REDIS_CONNECTION_POOL_SIZE']) || parsed_config[:redis]['connection_pool_size'] || 10 parsed_config[:redis]['connection_pool_timeout'] = string_to_int(ENV['REDIS_CONNECTION_POOL_TIMEOUT']) || parsed_config[:redis]['connection_pool_timeout'] || 5 + parsed_config[:redis]['reconnect_attempts'] = string_to_int(ENV['REDIS_RECONNECT_ATTEMPTS']) || parsed_config[:redis]['reconnect_attempts'] || 10 - parsed_config[:statsd] = parsed_config[:statsd] || {} if ENV['STATSD_SERVER'] + parsed_config[:statsd] = parsed_config[:statsd] || {} if ENV['STATSD_SERVER'] parsed_config[:statsd]['server'] = ENV['STATSD_SERVER'] if ENV['STATSD_SERVER'] parsed_config[:statsd]['prefix'] = ENV['STATSD_PREFIX'] if ENV['STATSD_PREFIX'] - parsed_config[:statsd]['port'] = string_to_int(ENV['STATSD_PORT']) if ENV['STATSD_PORT'] + parsed_config[:statsd]['port'] = string_to_int(ENV['STATSD_PORT']) if ENV['STATSD_PORT'] - parsed_config[:graphite] = parsed_config[:graphite] || {} if ENV['GRAPHITE_SERVER'] + parsed_config[:graphite] = parsed_config[:graphite] || {} if ENV['GRAPHITE_SERVER'] parsed_config[:graphite]['server'] = ENV['GRAPHITE_SERVER'] if ENV['GRAPHITE_SERVER'] parsed_config[:graphite]['prefix'] = ENV['GRAPHITE_PREFIX'] if ENV['GRAPHITE_PREFIX'] - parsed_config[:graphite]['port'] = string_to_int(ENV['GRAPHITE_PORT']) if ENV['GRAPHITE_PORT'] + parsed_config[:graphite]['port'] = string_to_int(ENV['GRAPHITE_PORT']) if ENV['GRAPHITE_PORT'] parsed_config[:auth] = parsed_config[:auth] || {} if ENV['AUTH_PROVIDER'] if parsed_config.key? :auth - parsed_config[:auth]['provider'] = ENV['AUTH_PROVIDER'] if ENV['AUTH_PROVIDER'] - parsed_config[:auth][:ldap] = parsed_config[:auth][:ldap] || {} if parsed_config[:auth]['provider'] == 'ldap' - parsed_config[:auth][:ldap]['host'] = ENV['LDAP_HOST'] if ENV['LDAP_HOST'] - parsed_config[:auth][:ldap]['port'] = string_to_int(ENV['LDAP_PORT']) if ENV['LDAP_PORT'] - parsed_config[:auth][:ldap]['base'] = ENV['LDAP_BASE'] if ENV['LDAP_BASE'] + parsed_config[:auth]['provider'] = ENV['AUTH_PROVIDER'] if ENV['AUTH_PROVIDER'] + parsed_config[:auth][:ldap] = parsed_config[:auth][:ldap] || {} if parsed_config[:auth]['provider'] == 'ldap' + parsed_config[:auth][:ldap]['host'] = ENV['LDAP_HOST'] if ENV['LDAP_HOST'] + parsed_config[:auth][:ldap]['port'] = string_to_int(ENV['LDAP_PORT']) if ENV['LDAP_PORT'] + parsed_config[:auth][:ldap]['base'] = ENV['LDAP_BASE'] if ENV['LDAP_BASE'] parsed_config[:auth][:ldap]['user_object'] = ENV['LDAP_USER_OBJECT'] if ENV['LDAP_USER_OBJECT'] end diff --git a/vmpooler.yaml.example b/vmpooler.yaml.example index cd53faf8c..6194167e5 100644 --- a/vmpooler.yaml.example +++ b/vmpooler.yaml.example @@ -204,6 +204,12 @@ # - redis_connection_pool_timeout # How long a task should wait (in seconds) for a redis connection when all connections are in use. # (default: 5) +# +# - reconnect_attempts +# How many times to retry one redis connection, for example if the host:port is not available +# The time between attempts starts at 1.5s and increases up to 10s, in such a way that 10 attempts +# takes about 80s, at which point an error is returned. +# (default: 10) # Example: