Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

[Rest Server] Use list pods with label selector in getting job detail #4270

Merged
merged 4 commits into from
Mar 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions src/rest-server/src/models/kubernetes/kubernetes.js
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,15 @@ const getNodes = async () => {
return res.data;
};

const getPods = async (options = {}) => {
const {namespace} = options;
const getPods = async (options = {}, headers={}) => {
const {namespace, ...params} = options;
const client = getClient();

const requestOptions = Object.assign({}, options);
delete requestOptions.namespace;
let url = '/api/v1/pods';
if (namespace) {
url = `/api/v1/namespaces/${namespace}/pods`;
}
const res = await client.get(url, {params: requestOptions});
const res = await client.get(url, {params, headers});
return res.data;
};

Expand Down
41 changes: 29 additions & 12 deletions src/rest-server/src/models/v2/job/k8s.js
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ const convertFrameworkSummary = (framework) => {
};
};

const convertTaskDetail = async (taskStatus, ports, userName, jobName, taskRoleName) => {
const convertTaskDetail = async (taskStatus, ports, userName, jobName, taskRoleName, pod, gpuNumber) => {
// get container ports
const containerPorts = {};
if (ports) {
Expand All @@ -192,13 +192,6 @@ const convertTaskDetail = async (taskStatus, ports, userName, jobName, taskRoleN
// get container gpus
let containerGpus = null;
try {
const response = await k8sModel.getClient().get(
launcherConfig.podPath(taskStatus.attemptStatus.podName),
{
headers: launcherConfig.requestHeaders,
}
);
const pod = response.data;
if (launcherConfig.enabledHived) {
const hivedSpec = yaml.load(pod.metadata.annotations['hivedscheduler.microsoft.com/pod-scheduling-spec']);
if (hivedSpec && hivedSpec.affinityGroup && hivedSpec.affinityGroup.name) {
Expand All @@ -209,7 +202,6 @@ const convertTaskDetail = async (taskStatus, ports, userName, jobName, taskRoleN
const isolation = pod.metadata.annotations['hivedscheduler.microsoft.com/pod-gpu-isolation'];
containerGpus = isolation.split(',').reduce((attr, id) => attr + Math.pow(2, id), 0);
} else {
const gpuNumber = k8s.atoi(pod.spec.containers[0].resources.limits['nvidia.com/gpu']);
// mock GPU ids from 0 to (gpuNumber - 1)
containerGpus = Math.pow(2, gpuNumber) - 1;
}
Expand Down Expand Up @@ -295,21 +287,46 @@ const convertFrameworkDetail = async (framework) => {
taskRoles: {},
};
const ports = {};
const gpuNumbers = {};
for (let taskRoleSpec of framework.spec.taskRoles) {
ports[taskRoleSpec.name] = taskRoleSpec.task.pod.metadata.annotations['rest-server/port-scheduling-spec'];
gpuNumbers[taskRoleSpec.name] = k8s.atoi(
taskRoleSpec.task.pod.spec.containers[0].resources.limits['nvidia.com/gpu']
);
}

const userName = framework.metadata.labels ? framework.metadata.labels.userName : 'unknown';
const jobName = decodeName(framework.metadata.name, framework.metadata.annotations);
let pods = {};
try {
const podList = await k8sModel.getPods({
namespace: 'default',
labelSelector: `FC_FRAMEWORK_NAME=${framework.metadata.name}`,
}, {
accept: 'application/json;as=PartialObjectMetadataList;g=meta.k8s.io;v=v1beta1',
});
pods = _.keyBy(podList.items, (obj) => obj.metadata.name);
} catch (err) {
pods = {};
}

for (let taskRoleStatus of framework.status.attemptStatus.taskRoleStatuses) {
const taskStatuses = await Promise.all(taskRoleStatus.taskStatuses.map(
async (status) => await convertTaskDetail(
status,
ports[taskRoleStatus.name],
userName,
jobName,
taskRoleStatus.name,
pods[status.attemptStatus.podName],
gpuNumbers[taskRoleStatus.name]
)
));
detail.taskRoles[taskRoleStatus.name] = {
taskRoleStatus: {
name: taskRoleStatus.name,
},
taskStatuses: await Promise.all(taskRoleStatus.taskStatuses.map(
async (status) => await convertTaskDetail(status, ports[taskRoleStatus.name], userName, jobName, taskRoleStatus.name))
),
taskStatuses: taskStatuses,
};
}

Expand Down