Skip to content

Commit

Permalink
Add min-available runners to scale-config
Browse files Browse the repository at this point in the history
This change allows the min-available runners to be configured more
specifically by the runner type rather than as a single global setting.

Issue: pytorch/ci-infra#275
Signed-off-by: Thanh Ha <thanh.ha@linuxfoundation.org>
  • Loading branch information
zxiiro committed Oct 7, 2024
1 parent 46e0ca7 commit 140d43b
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ export async function getRunnerTypes(
is_ephemeral: runner_type.is_ephemeral || false,
/* istanbul ignore next */
labels: runner_type.labels?.map((label: string) => label.trim()),
min_available: runner_type.min_available || Config.Instance.minAvailableRunners,
max_available: runner_type.max_available,
os: runner_type.os,
runnerTypeName: prop,
Expand Down Expand Up @@ -401,6 +402,7 @@ export async function getRunnerTypes(
['linux', 'windows'].includes(runnerType.os) &&
(runnerType.labels?.every((label) => typeof label === 'string' && alphaNumericStr.test(label)) ?? true) &&
(typeof runnerType.disk_size === 'number' || runnerType.disk_size === undefined) &&
(typeof runnerType.min_available === 'number' || runnerType.min_available === undefined) &&
(typeof runnerType.max_available === 'number' || runnerType.max_available === undefined) &&
(typeof runnerType.ami === 'string' || runnerType.ami === undefined) &&
(typeof runnerType.ami_experiment?.ami === 'string' || runnerType.ami_experiment === undefined) &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export interface RunnerTypeOptional {
instance_type?: string;
is_ephemeral?: boolean;
labels?: Array<string>;
min_available?: number;
max_available?: number;
os?: string;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ export async function scaleDown(): Promise<void> {
// We only limit the number of removed instances here for the reason: while sorting and getting info
// on getRunner[Org|Repo] we send statistics that are relevant for monitoring
if (
ghRunnersRemovable.length - removedRunners <= Config.Instance.minAvailableRunners &&
ghRunnersRemovable.length - removedRunners <= (await minRunners(ec2runner, metrics)) &&
ghRunner !== undefined &&
ec2runner.applicationDeployDatetime == Config.Instance.datetimeDeploy &&
!(await isEphemeralRunner(ec2runner, metrics))
Expand Down Expand Up @@ -387,6 +387,26 @@ export async function isEphemeralRunner(ec2runner: RunnerInfo, metrics: ScaleDow
return runnerTypes.get(ec2runner.runnerType)?.is_ephemeral ?? false;
}

export async function minRunners(ec2runner: RunnerInfo, metrics: ScaleDownMetrics): Promise<number> {
if (ec2runner.runnerType === undefined) {
return Config.Instance.minAvailableRunners;
}

const repo: Repo = (() => {
if (Config.Instance.enableOrganizationRunners) {
return {
owner: ec2runner.org !== undefined ? (ec2runner.org as string) : getRepo(ec2runner.repo as string).owner,
repo: Config.Instance.scaleConfigRepo,
};
}
return getRepo(ec2runner.repo as string);
})();

const runnerTypes = await getRunnerTypes(repo, metrics);

return runnerTypes.get(ec2runner.runnerType)?.min_available ?? Config.Instance.minAvailableRunners;
}

export function isRunnerRemovable(
ghRunner: GhRunner | undefined,
ec2runner: RunnerInfo,
Expand Down

0 comments on commit 140d43b

Please sign in to comment.