Skip to content

Commit

Permalink
(POOLER-26) Fix lost VMs getting stuck in pending
Browse files Browse the repository at this point in the history
This commit updates vmpooler to understand how to resolve a situation
where a pending VM does not exist. Without this change a pending VM that
does not exist in vmware inventory gets stuck in the pending state,
preventing the pool from ever reaching its target capacity.

As a part of this change the find_vm method is updated to perform a
light, then heavy search each time find_vm is called and all usage of
find_vm || find_vm_heavy is replaced. This makes find_vm usage
consistent across pool_manager.

Additionally, open_socket method is updated to resolve an incorrect
reference to the host name.
  • Loading branch information
mattkirby committed Nov 22, 2016
1 parent a6c8c76 commit 02327df
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 18 deletions.
30 changes: 14 additions & 16 deletions lib/vmpooler/pool_manager.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ def _check_pending_vm(vm, pool, timeout, vsphere)
fail_pending_vm(vm, pool, timeout)
end

def remove_nonexistent_vm(vm, pool)
$redis.srem("vmpooler__pending__#{pool}", vm)
$logger.log('d', "[!] [#{pool}] '#{vm}' no longer exists. Removing from pending.")
end

def fail_pending_vm(vm, pool, timeout, exists=true)
clone_stamp = $redis.hget("vmpooler__vm__#{vm}", 'clone')
return if ! clone_stamp
Expand Down Expand Up @@ -108,8 +113,7 @@ def check_ready_vm(vm, pool, ttl, vsphere)

$redis.hset('vmpooler__vm__' + vm, 'check', Time.now)

host = vsphere.find_vm(vm) ||
vsphere.find_vm_heavy(vm)[vm]
host = vsphere.find_vm(vm)

if host
if
Expand Down Expand Up @@ -288,8 +292,7 @@ def destroy_vm(vm, pool, vsphere)
# Auto-expire metadata key
$redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))

host = vsphere.find_vm(vm) ||
vsphere.find_vm_heavy(vm)[vm]
host = vsphere.find_vm(vm)

if host
start = Time.now
Expand Down Expand Up @@ -319,8 +322,7 @@ def create_vm_disk(vm, disk_size, vsphere)
end

def _create_vm_disk(vm, disk_size, vsphere)
host = vsphere.find_vm(vm) ||
vsphere.find_vm_heavy(vm)[vm]
host = vsphere.find_vm(vm)

if (host) && ((! disk_size.nil?) && (! disk_size.empty?) && (disk_size.to_i > 0))
$logger.log('s', "[ ] [disk_manager] '#{vm}' is attaching a #{disk_size}gb disk")
Expand Down Expand Up @@ -360,8 +362,7 @@ def create_vm_snapshot(vm, snapshot_name, vsphere)
end

def _create_vm_snapshot(vm, snapshot_name, vsphere)
host = vsphere.find_vm(vm) ||
vsphere.find_vm_heavy(vm)[vm]
host = vsphere.find_vm(vm)

if (host) && ((! snapshot_name.nil?) && (! snapshot_name.empty?))
$logger.log('s', "[ ] [snapshot_manager] '#{vm}' is being snapshotted")
Expand Down Expand Up @@ -390,8 +391,7 @@ def revert_vm_snapshot(vm, snapshot_name, vsphere)
end

def _revert_vm_snapshot(vm, snapshot_name, vsphere)
host = vsphere.find_vm(vm) ||
vsphere.find_vm_heavy(vm)[vm]
host = vsphere.find_vm(vm)

if host
snapshot = vsphere.find_snapshot(host, snapshot_name)
Expand Down Expand Up @@ -473,10 +473,6 @@ def _check_snapshot_queue(vsphere)
end
end

def find_vsphere_pool_vm(pool, vm, vsphere)
vsphere.find_vm(vm) || vsphere.find_vm_heavy(vm)[vm]
end

def migration_limit(migration_limit)
# Returns migration_limit setting when enabled
return false if migration_limit == 0 || ! migration_limit
Expand All @@ -492,7 +488,7 @@ def migrate_vm(vm, pool, vsphere)
def _migrate_vm(vm, pool, vsphere)
begin
$redis.srem('vmpooler__migrating__' + pool, vm)
vm_object = find_vsphere_pool_vm(pool, vm, vsphere)
vm_object = vsphere.find_vm(vm)
parent_host, parent_host_name = get_vm_host_info(vm_object)
migration_limit = migration_limit $config[:config]['migration_limit']
migration_count = $redis.scard('vmpooler__migration')
Expand Down Expand Up @@ -607,12 +603,14 @@ def _check_pool(pool, vsphere)

# PENDING
$redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm|
pool_timeout = pool['timeout'] || $config[:config]['timeout'] || 15
if inventory[vm]
begin
pool_timeout = pool['timeout'] || $config[:config]['timeout'] || 15
check_pending_vm(vm, pool['name'], pool_timeout, vsphere)
rescue
end
else
fail_pending_vm(vm, pool['name'], pool_timeout, false)
end
end

Expand Down
5 changes: 5 additions & 0 deletions lib/vmpooler/vsphere_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,11 @@ def find_snapshot(vm, snapshotname)

def find_vm(vmname)
ensure_connected @connection, $credentials
find_vm_light(vmname) || find_vm_heavy(vmname)[vmname]
end

def find_vm_light(vmname)
ensure_connected @connection, $credentials

@connection.searchIndex.FindByDnsName(vmSearch: true, dnsName: vmname)
end
Expand Down
1 change: 0 additions & 1 deletion spec/vmpooler/pool_manager_migration_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
create_migrating_vm vm['name'], pool, redis
allow(vsphere).to receive(:find_vm).and_return(vm)
allow(pooler).to receive(:get_vm_host_info).and_return([{'name' => 'host1'}, 'host1'])
expect(vsphere).to receive(:find_vm).with(vm['name'])
end

it 'logs VM host when migration is disabled' do
Expand Down
1 change: 0 additions & 1 deletion spec/vmpooler/pool_manager_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
it 'calls fail_pending_vm' do
allow(vsphere).to receive(:find_vm).and_return(nil)
allow(redis).to receive(:hget)
expect(redis).to receive(:hget).with(String, 'clone').once
subject._check_pending_vm(vm, pool, timeout, vsphere)
end
end
Expand Down

0 comments on commit 02327df

Please sign in to comment.