Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make vm_id assignment more robust #714

Merged
merged 1 commit into from
Oct 31, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 10 additions & 25 deletions src/aleph/vm/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,13 @@


class VmPool:
"""Pool of VMs already started and used to decrease response time.
"""Pool of existing VMs

For function VM we keep the VM a while after they have run, so we can reuse them and thus decrease response time.
After running, a VM is saved for future reuse from the same function during a
configurable duration.

The counter is used by the VMs to set their tap interface name and the corresponding
IPv4 subnet.
"""

counter: int # Used to provide distinct ids to network interfaces
executions: dict[ItemHash, VmExecution]
message_cache: dict[str, ExecutableMessage]
network: Network | None
Expand All @@ -45,7 +43,6 @@
creation_lock: asyncio.Lock

def __init__(self, loop: asyncio.AbstractEventLoop):
self.counter = settings.START_ID_INDEX
self.executions = {}
self.message_cache = {}

Expand Down Expand Up @@ -150,25 +147,13 @@
This identifier is used to name the network interface and in the IPv4 range
dedicated to the VM.
"""
_, network_range = settings.IPV4_ADDRESS_POOL.split("/")
available_bits = int(network_range) - settings.IPV4_NETWORK_PREFIX_LENGTH
self.counter += 1
if self.counter < 2**available_bits:
# In common cases, use the counter itself as the vm_id. This makes it
# easier to debug.
return self.counter
else:
# The value of the counter is too high and some functions such as the
# IPv4 range dedicated to the VM do not support such high values.
#
# We therefore recycle vm_id values from executions that are not running
# anymore.
currently_used_vm_ids = {execution.vm_id for execution in self.executions.values()}
for i in range(settings.START_ID_INDEX, 255**2):
if i not in currently_used_vm_ids:
return i
msg = "No available value for vm_id."
raise ValueError(msg)
# Take the first id that is not already taken
currently_used_vm_ids = {execution.vm_id for execution in self.executions.values()}

Check warning on line 151 in src/aleph/vm/pool.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/pool.py#L151

Added line #L151 was not covered by tests
for i in range(settings.START_ID_INDEX, 255**2):
if i not in currently_used_vm_ids:
return i
msg = "No available value for vm_id."
raise ValueError(msg)

Check warning on line 156 in src/aleph/vm/pool.py

View check run for this annotation

Codecov / codecov/patch

src/aleph/vm/pool.py#L154-L156

Added lines #L154 - L156 were not covered by tests

def get_running_vm(self, vm_hash: ItemHash) -> VmExecution | None:
"""Return a running VM or None. Disables the VM expiration task."""
Expand Down