diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 8901c05f1..ede2bb83f 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -1,5 +1,9 @@ module Vmpooler class PoolManager + CHECK_LOOP_DELAY_MIN_DEFAULT = 5 + CHECK_LOOP_DELAY_MAX_DEFAULT = 60 + CHECK_LOOP_DELAY_DECAY_DEFAULT = 2.0 + def initialize(config, logger, redis, metrics) $config = config @@ -523,17 +527,36 @@ def migrate_vm_and_record_timing(vm_name, pool_name, source_host_name, dest_host finish end - def check_pool(pool, maxloop = 0, loop_delay = 5) + def check_pool(pool, + maxloop = 0, + loop_delay_min = CHECK_LOOP_DELAY_MIN_DEFAULT, + loop_delay_max = CHECK_LOOP_DELAY_MAX_DEFAULT, + loop_delay_decay = CHECK_LOOP_DELAY_DECAY_DEFAULT) $logger.log('d', "[*] [#{pool['name']}] starting worker thread") + # Use the pool setings if they exist + loop_delay_min = pool['check_loop_delay_min'] unless pool['check_loop_delay_min'].nil? + loop_delay_max = pool['check_loop_delay_max'] unless pool['check_loop_delay_max'].nil? + loop_delay_decay = pool['check_loop_delay_decay'] unless pool['check_loop_delay_decay'].nil? + + loop_delay_decay = 2.0 if loop_delay_decay <= 1.0 + loop_delay_max = loop_delay_min if loop_delay_max.nil? || loop_delay_max < loop_delay_min + $threads[pool['name']] = Thread.new do begin loop_count = 1 + loop_delay = loop_delay_min provider = get_provider_for_pool(pool['name']) raise("Could not find provider '#{pool['provider']}") if provider.nil? loop do - _check_pool(pool, provider) + result = _check_pool(pool, provider) + if result[:cloned_vms] > 0 || result[:checked_pending_vms] > 0 || result[:discovered_vms] > 0 + loop_delay = loop_delay_min + else + loop_delay = (loop_delay * loop_delay_decay).to_i + loop_delay = loop_delay_max if loop_delay > loop_delay_max + end sleep(loop_delay) unless maxloop.zero? @@ -549,6 +572,15 @@ def check_pool(pool, maxloop = 0, loop_delay = 5) end def _check_pool(pool, provider) + pool_check_response = { + discovered_vms: 0, + checked_running_vms: 0, + checked_ready_vms: 0, + checked_pending_vms: 0, + destroyed_vms: 0, + migrated_vms: 0, + cloned_vms: 0 + } # INVENTORY inventory = {} begin @@ -561,6 +593,7 @@ def _check_pool(pool, provider) (! $redis.sismember('vmpooler__discovered__' + pool['name'], vm['name'])) && (! $redis.sismember('vmpooler__migrating__' + pool['name'], vm['name'])) + pool_check_response[:discovered_vms] += 1 $redis.sadd('vmpooler__discovered__' + pool['name'], vm['name']) $logger.log('s', "[?] [#{pool['name']}] '#{vm['name']}' added to 'discovered' queue") @@ -577,6 +610,7 @@ def _check_pool(pool, provider) if inventory[vm] begin vm_lifetime = $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12 + pool_check_response[:checked_running_vms] += 1 check_running_vm(vm, pool['name'], vm_lifetime, provider) rescue => err $logger.log('d', "[!] [#{pool['name']}] _check_pool with an error while evaluating running VMs: #{err}") @@ -588,6 +622,7 @@ def _check_pool(pool, provider) $redis.smembers("vmpooler__ready__#{pool['name']}").each do |vm| if inventory[vm] begin + pool_check_response[:checked_ready_vms] += 1 check_ready_vm(vm, pool['name'], pool['ready_ttl'] || 0, provider) rescue => err $logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating ready VMs: #{err}") @@ -600,6 +635,7 @@ def _check_pool(pool, provider) pool_timeout = pool['timeout'] || $config[:config]['timeout'] || 15 if inventory[vm] begin + pool_check_response[:checked_pending_vms] += 1 check_pending_vm(vm, pool['name'], pool_timeout, provider) rescue => err $logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating pending VMs: #{err}") @@ -613,6 +649,7 @@ def _check_pool(pool, provider) $redis.smembers("vmpooler__completed__#{pool['name']}").each do |vm| if inventory[vm] begin + pool_check_response[:destroyed_vms] += 1 destroy_vm(vm, pool['name'], provider) rescue => err $redis.srem("vmpooler__completed__#{pool['name']}", vm) @@ -650,6 +687,7 @@ def _check_pool(pool, provider) $redis.smembers("vmpooler__migrating__#{pool['name']}").each do |vm| if inventory[vm] begin + pool_check_response[:migrated_vms] += 1 migrate_vm(vm, pool['name'], provider) rescue => err $logger.log('s', "[x] [#{pool['name']}] '#{vm}' failed to migrate: #{err}") @@ -680,6 +718,7 @@ def _check_pool(pool, provider) if $redis.get('vmpooler__tasks__clone').to_i < $config[:config]['task_limit'].to_i begin $redis.incr('vmpooler__tasks__clone') + pool_check_response[:cloned_vms] += 1 clone_vm(pool, provider) rescue => err $logger.log('s', "[!] [#{pool['name']}] clone failed during check_pool with an error: #{err}") @@ -689,6 +728,8 @@ def _check_pool(pool, provider) end end end + + pool_check_response rescue => err $logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error: #{err}") raise @@ -734,6 +775,12 @@ def execute!(maxloop = 0, loop_delay = 1) end end + # Get pool loop settings + $config[:config] = {} if $config[:config].nil? + check_loop_delay_min = $config[:config]['check_loop_delay_min'] || CHECK_LOOP_DELAY_MIN_DEFAULT + check_loop_delay_max = $config[:config]['check_loop_delay_max'] || CHECK_LOOP_DELAY_MAX_DEFAULT + check_loop_delay_decay = $config[:config]['check_loop_delay_decay'] || CHECK_LOOP_DELAY_DECAY_DEFAULT + # Create the providers $config[:pools].each do |pool| provider_name = pool['provider'] @@ -786,7 +833,7 @@ def execute!(maxloop = 0, loop_delay = 1) check_pool(pool) elsif ! $threads[pool['name']].alive? $logger.log('d', "[!] [#{pool['name']}] worker thread died, restarting") - check_pool(pool) + check_pool(pool, check_loop_delay_min, check_loop_delay_max, check_loop_delay_decay) end end diff --git a/spec/unit/pool_manager_spec.rb b/spec/unit/pool_manager_spec.rb index f642f6ae3..b9ee982ea 100644 --- a/spec/unit/pool_manager_spec.rb +++ b/spec/unit/pool_manager_spec.rb @@ -1912,18 +1912,61 @@ end context 'with dead pool thread' do - let(:pool_thread) { double('thread', :alive? => false) } - before(:each) do - # Reset the global variable - Note this is a code smell - $threads = {} - $threads[pool] = pool_thread + context 'without check_loop_delay_xxx settings' do + let(:pool_thread) { double('thread', :alive? => false) } + let(:default_check_loop_delay_min) { 5 } + let(:default_check_loop_delay_max) { nil } + let(:default_check_loop_delay_decay) { 2.0 } + before(:each) do + # Reset the global variable - Note this is a code smell + $threads = {} + $threads[pool] = pool_thread + end + + it 'should run the check_pool method and log a message' do + expect(subject).to receive(:check_pool).with(a_pool_with_name_of(pool), + default_check_loop_delay_min, + default_check_loop_delay_max, + default_check_loop_delay_decay) + expect(logger).to receive(:log).with('d', "[!] [#{pool}] worker thread died, restarting") + + subject.execute!(1,0) + end end - it 'should run the check_pool method and log a message' do - expect(subject).to receive(:check_pool).with(a_pool_with_name_of(pool)) - expect(logger).to receive(:log).with('d', "[!] [#{pool}] worker thread died, restarting") + context 'with check_loop_delay_xxx settings' do + let(:pool_thread) { double('thread', :alive? => false) } + let(:check_loop_delay_min) { 7 } + let(:check_loop_delay_max) { 20 } + let(:check_loop_delay_decay) { 2.1 } - subject.execute!(1,0) + let(:config) { + YAML.load(<<-EOT +--- +:config: + check_loop_delay_min: #{check_loop_delay_min} + check_loop_delay_max: #{check_loop_delay_max} + check_loop_delay_decay: #{check_loop_delay_decay} +:pools: + - name: #{pool} +EOT + ) + } + before(:each) do + # Reset the global variable - Note this is a code smell + $threads = {} + $threads[pool] = pool_thread + end + + it 'should run the check_pool method and log a message' do + expect(subject).to receive(:check_pool).with(a_pool_with_name_of(pool), + check_loop_delay_min, + check_loop_delay_max, + check_loop_delay_decay) + expect(logger).to receive(:log).with('d', "[!] [#{pool}] worker thread died, restarting") + + subject.execute!(1,0) + end end end @@ -1995,6 +2038,15 @@ } let(:pool_object) { config[:pools][0] } + let(:check_pool_response) {{ + :discovered_vms => 0, + :checked_running_vms => 0, + :checked_ready_vms => 0, + :checked_pending_vms => 0, + :destroyed_vms => 0, + :migrated_vms => 0, + :cloned_vms => 0, + }} before do expect(subject).not_to be_nil @@ -2004,7 +2056,7 @@ context 'on startup' do before(:each) do - allow(subject).to receive(:_check_pool) + allow(subject).to receive(:_check_pool).and_return(check_pool_response) expect(logger).to receive(:log).with('d', "[*] [#{pool}] starting worker thread") end @@ -2032,8 +2084,7 @@ before(:each) do allow(logger).to receive(:log) - # Note the Vmpooler::VsphereHelper is not mocked - allow(subject).to receive(:_check_pool) + allow(subject).to receive(:_check_pool).and_return(check_pool_response) end after(:each) do @@ -2048,13 +2099,145 @@ end end + context 'delays between loops configured in the pool configuration' do + let(:maxloop) { 2 } + let(:loop_delay) { 1 } + let(:pool_loop_delay) { 2 } + let(:config) { + YAML.load(<<-EOT +--- +:pools: + - name: #{pool} + provider: #{provider_name} + check_loop_delay_min: #{pool_loop_delay} +EOT + ) + } + + before(:each) do + allow(logger).to receive(:log) + allow(subject).to receive(:_check_pool).and_return(check_pool_response) + end + + after(:each) do + # Reset the global variable - Note this is a code smell + $threads = nil + end + + it 'when a non-default loop delay is specified' do + expect(subject).to receive(:sleep).with(pool_loop_delay).exactly(maxloop).times + + subject.check_pool(pool_object,maxloop,loop_delay) + end + end + + context 'delays between loops with a specified min and max value' do + let(:maxloop) { 5 } + let(:loop_delay_min) { 1 } + let(:loop_delay_max) { 60 } + # Note a maxloop of zero can not be tested as it never terminates + + before(:each) do + allow(logger).to receive(:log) + allow(subject).to receive(:_check_pool).and_return(check_pool_response) + end + + after(:each) do + # Reset the global variable - Note this is a code smell + $threads = nil + end + + [:checked_pending_vms, :discovered_vms, :cloned_vms].each do |testcase| + describe "when #{testcase} is greater than zero" do + it "delays the minimum delay time" do + expect(subject).to receive(:sleep).with(loop_delay_min).exactly(maxloop).times + check_pool_response[testcase] = 1 + + subject.check_pool(pool_object,maxloop,loop_delay_min,loop_delay_max) + end + end + end + + [:checked_running_vms, :checked_ready_vms, :destroyed_vms, :migrated_vms].each do |testcase| + describe "when #{testcase} is greater than zero" do + let(:loop_decay) { 3.0 } + it "delays increases with a decay" do + expect(subject).to receive(:sleep).with(3).once + expect(subject).to receive(:sleep).with(9).once + expect(subject).to receive(:sleep).with(27).once + expect(subject).to receive(:sleep).with(60).twice + check_pool_response[testcase] = 1 + + subject.check_pool(pool_object,maxloop,loop_delay_min,loop_delay_max,loop_decay) + end + end + end + end + + context 'delays between loops with a specified min and max value configured in the pool configuration' do + let(:maxloop) { 5 } + let(:loop_delay_min) { 1 } + let(:loop_delay_max) { 60 } + let(:loop_decay) { 3.0 } + let(:pool_loop_delay_min) { 3 } + let(:pool_loop_delay_max) { 70 } + let(:pool_loop_delay_decay) { 2.5 } + let(:config) { + YAML.load(<<-EOT +--- +:pools: + - name: #{pool} + provider: #{provider_name} + check_loop_delay_min: #{pool_loop_delay_min} + check_loop_delay_max: #{pool_loop_delay_max} + check_loop_delay_decay: #{pool_loop_delay_decay} +EOT + ) + } + + before(:each) do + allow(logger).to receive(:log) + allow(subject).to receive(:_check_pool).and_return(check_pool_response) + end + + after(:each) do + # Reset the global variable - Note this is a code smell + $threads = nil + end + + [:checked_pending_vms, :discovered_vms, :cloned_vms].each do |testcase| + describe "when #{testcase} is greater than zero" do + it "delays the minimum delay time" do + expect(subject).to receive(:sleep).with(pool_loop_delay_min).exactly(maxloop).times + check_pool_response[testcase] = 1 + + subject.check_pool(pool_object,maxloop,loop_delay_min,loop_delay_max,loop_decay) + end + end + end + + [:checked_running_vms, :checked_ready_vms, :destroyed_vms, :migrated_vms].each do |testcase| + describe "when #{testcase} is greater than zero" do + it "delays increases with a decay" do + expect(subject).to receive(:sleep).with(7).once + expect(subject).to receive(:sleep).with(17).once + expect(subject).to receive(:sleep).with(42).once + expect(subject).to receive(:sleep).with(70).twice + check_pool_response[testcase] = 1 + + subject.check_pool(pool_object,maxloop,loop_delay_min,loop_delay_max,loop_decay) + end + end + end + end + + context 'loops specified number of times (5)' do let(:maxloop) { 5 } # Note a maxloop of zero can not be tested as it never terminates before(:each) do allow(logger).to receive(:log) - # Note the Vmpooler::VsphereHelper is not mocked - allow(subject).to receive(:_check_pool) + allow(subject).to receive(:_check_pool).and_return(check_pool_response) end after(:each) do @@ -2200,6 +2383,12 @@ subject._check_pool(pool_object,provider) end + it 'should return the number of discoverd of VMs' do + result = subject._check_pool(pool_object,provider) + + expect(result[:discovered_vms]).to be(1) + end + it 'should add undiscovered VMs to the completed queue' do allow(logger).to receive(:log).with('s', "[?] [#{pool}] '#{new_vm}' added to 'discovered' queue") @@ -2259,6 +2448,12 @@ subject._check_pool(pool_object,provider) end + it 'should return the number of checked running VMs' do + result = subject._check_pool(pool_object,provider) + + expect(result[:checked_running_vms]).to be(1) + end + it 'should use the VM lifetime in preference to defaults' do big_lifetime = 2000 @@ -2305,6 +2500,12 @@ create_ready_vm(pool,vm,token) end + it 'should return the number of checked ready VMs' do + result = subject._check_pool(pool_object,provider) + + expect(result[:checked_ready_vms]).to be(1) + end + it 'should log an error if one occurs' do expect(subject).to receive(:check_ready_vm).and_raise(RuntimeError,'MockError') expect(logger).to receive(:log).with('d', "[!] [#{pool}] _check_pool failed with an error while evaluating ready VMs: MockError") @@ -2351,6 +2552,12 @@ create_pending_vm(pool,vm,token) end + it 'should return the number of checked pending VMs' do + result = subject._check_pool(pool_object,provider) + + expect(result[:checked_pending_vms]).to be(1) + end + it 'should log an error if one occurs' do expect(subject).to receive(:check_pending_vm).and_raise(RuntimeError,'MockError') expect(logger).to receive(:log).with('d', "[!] [#{pool}] _check_pool failed with an error while evaluating pending VMs: MockError") @@ -2427,6 +2634,12 @@ subject._check_pool(pool_object,provider) end + it 'should return the number of destroyed VMs' do + result = subject._check_pool(pool_object,provider) + + expect(result[:destroyed_vms]).to be(1) + end + context 'with an error during destroy_vm' do before(:each) do expect(subject).to receive(:destroy_vm).and_raise(RuntimeError,"MockError") @@ -2533,6 +2746,13 @@ create_migrating_vm(vm,pool) end + it 'should return the number of migrated VMs' do + allow(subject).to receive(:migrate_vm).with(vm,pool,provider) + result = subject._check_pool(pool_object,provider) + + expect(result[:migrated_vms]).to be(1) + end + it 'should log an error if one occurs' do expect(subject).to receive(:migrate_vm).and_raise(RuntimeError,'MockError') expect(logger).to receive(:log).with('s', "[x] [#{pool}] '#{vm}' failed to migrate: MockError") @@ -2618,7 +2838,16 @@ context 'when number of VMs is less than the pool size' do before(:each) do - expect(provider).to receive(:vms_in_pool).with(pool).and_return([]) + expect(provider).to receive(:vms_in_pool).with(pool).and_return([]) + end + + it 'should return the number of cloned VMs' do + pool_size = 5 + config[:pools][0]['size'] = pool_size + + result = subject._check_pool(pool_object,provider) + + expect(result[:cloned_vms]).to be(pool_size) end it 'should call clone_vm to populate the pool' do diff --git a/vmpooler.yaml.example b/vmpooler.yaml.example index 6b0d4c616..fab1b3545 100644 --- a/vmpooler.yaml.example +++ b/vmpooler.yaml.example @@ -395,6 +395,27 @@ # This is particularly useful for instances with a large number of pools # to prevent a thundering herd when retrying connections. # (optional; default: 10) +# +# - check_loop_delay_min (optional; default: 5) seconds +# - check_loop_delay_max (optional; default: 60) seconds +# - check_loop_delay_decay (optional; default: 2.0) Must be greater than 1.0 +# Each pool is polled on a schedule to check whether there are any tasks to perform, for example, provision +# new VMs to fill a pool, or destroy VMs which are no longer required. By default this value is every 5 seconds. +# However, with a large number of pools, this can cause the provider to be issuing many, many requests which +# can cause performance problems, for example, vSphere recommends no more than 100 active connections per vCenter +# instance. But if you increase the check interval to a large number, then vmpooler will appear to be slow to +# perform tasks. These settings can be used to tune how often the provider is polling for changes to a pool. +# This is done by increasing the polling period when the pool is stable or when unimportant tasks are being +# performed. The polling period is decreased back to the minimum when important tasks happen (newly discovered VMs, +# creating new VMs and checking for VMs that have completed cloning but not ready). This means the pools are +# checked appropriately during important events, but as the pool stabilizes it does not need to be checked as often. +# - The check_loop_delay_min setting determines the smallest period of time between polls, in seconds. +# - The check_loop_delay_max setting determines the longest period of time between polls, in seconds. +# Must be greater than or equal to check_loop_delay_min or it will be set to check_loop_delay_min. +# - The check_loop_delay_decay setting determines how quickly the delay moves from minimum to maximum. So a value +# of 2.0 means each time the pool is checked and nothing important happens, the loop delay is multiplied by 2.0, +# for example, the first time is 2 seconds, then 4, 8, 16 etc. until it reaches check_loop_delay_max. +# This value must be greater than 1.0. # Example: @@ -455,6 +476,11 @@ # How long (in minutes) to keep VMs in 'ready' queues before destroying. # (optional; default: no limit) # +# - check_loop_delay_min (optional; default: 5) seconds +# - check_loop_delay_max (optional; default: same as check_loop_delay_min) seconds +# - check_loop_delay_decay (optional; default: 2.0) Must be greater than 1.0 +# See the :config: section for information about these settings +# # Provider specific pool settings # vSphere provider # - folder