Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

Commit

Permalink
generate an application token for every job (#5282)
Browse files Browse the repository at this point in the history
- create application token in rest-server, get a k8s secret definition
- create a token object in db, (add a column in table Framework named tokenSecretDef...), let dbc create the secret with the def
- mount the token-secrets to initContainers & job containers
- revoke the token in dbc, (remove the token from DB) : realized in src/dbc
  • Loading branch information
suiguoxin authored Feb 2, 2021
1 parent 06eb934 commit 30f21b3
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 6 deletions.
3 changes: 2 additions & 1 deletion src/database-controller/sdk/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ class DatabaseModel {
type: Sequelize.DATE,
allowNull: false,
},
// `dockerSecretDef`, `configSecretDef`, and `priorityClassDef` is the definition of job add-ons.
// `dockerSecretDef`, `configSecretDef`, `tokenSecretDef` and `priorityClassDef` is the definition of job add-ons.
// They are generated by rest-server and recorded into database by write-merger.
// These add-ons are created by poller or the short-cut in write-merger.
dockerSecretDef: Sequelize.TEXT,
configSecretDef: Sequelize.TEXT,
tokenSecretDef: Sequelize.TEXT,
priorityClassDef: Sequelize.TEXT,
retries: Sequelize.INTEGER,
retryDelayTime: Sequelize.INTEGER,
Expand Down
30 changes: 29 additions & 1 deletion src/database-controller/src/common/framework.js
Original file line number Diff line number Diff line change
Expand Up @@ -293,12 +293,13 @@ class Snapshot {
}

// Class Add-ons handles creation/patching/deletion of job add-ons.
// Currently there are 3 types of add-ons: configSecret, priorityClass, and dockerSecret.
// Currently there are 4 types of add-ons: configSecret, priorityClass, dockerSecret, and tokenSecret.
class AddOns {
constructor(
configSecretDef = null,
priorityClassDef = null,
dockerSecretDef = null,
tokenSecretDef = null,
) {
if (configSecretDef !== null && !(configSecretDef instanceof Object)) {
this._configSecretDef = JSON.parse(configSecretDef);
Expand All @@ -315,6 +316,11 @@ class AddOns {
} else {
this._dockerSecretDef = dockerSecretDef;
}
if (tokenSecretDef !== null && !(tokenSecretDef instanceof Object)) {
this._tokenSecretDef = JSON.parse(tokenSecretDef);
} else {
this._tokenSecretDef = tokenSecretDef;
}
}

async create() {
Expand Down Expand Up @@ -357,6 +363,19 @@ class AddOns {
}
}
}
if (this._tokenSecretDef) {
try {
await k8s.createSecret(this._tokenSecretDef);
} catch (err) {
if (err.response && err.response.statusCode === 409) {
logger.warn(
`Secret ${this._tokenSecretDef.metadata.name} already exists.`,
);
} else {
throw err;
}
}
}
}

silentPatch(frameworkResponse) {
Expand All @@ -369,6 +388,10 @@ class AddOns {
k8s
.patchSecretOwnerToFramework(this._dockerSecretDef, frameworkResponse)
.catch(logError);
this._tokenSecretDef &&
k8s
.patchSecretOwnerToFramework(this._tokenSecretDef, frameworkResponse)
.catch(logError);
}

silentDelete() {
Expand All @@ -381,6 +404,8 @@ class AddOns {
.catch(logError);
this._dockerSecretDef &&
k8s.deleteSecret(this._dockerSecretDef.metadata.name).catch(logError);
this._tokenSecretDef &&
k8s.deleteSecret(this._tokenSecretDef.metadata.name).catch(logError);
}

getUpdate() {
Expand All @@ -394,6 +419,9 @@ class AddOns {
if (this._dockerSecretDef) {
update.dockerSecretDef = JSON.stringify(this._dockerSecretDef);
}
if (this._tokenSecretDef) {
update.tokenSecretDef = JSON.stringify(this._tokenSecretDef);
}
return update;
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/database-controller/src/poller/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ async function poll() {
'configSecretDef',
'priorityClassDef',
'dockerSecretDef',
'tokenSecretDef',
'snapshot',
'subState',
'requestSynced',
Expand All @@ -132,6 +133,7 @@ async function poll() {
framework.configSecretDef,
framework.priorityClassDef,
framework.dockerSecretDef,
framework.tokenSecretDef,
);
if (framework.subState === 'Completed') {
deleteHandler(snapshot, pollingTs);
Expand Down
7 changes: 6 additions & 1 deletion src/database-controller/src/write-merger/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ async function patchFrameworkRequest(req, res, next) {
'configSecretDef',
'priorityClassDef',
'dockerSecretDef',
'tokenSecretDef',
],
where: { name: frameworkName },
});
Expand All @@ -203,6 +204,7 @@ async function patchFrameworkRequest(req, res, next) {
oldFramework.configSecretDef,
oldFramework.priorityClassDef,
oldFramework.dockerSecretDef,
oldFramework.tokenSecretDef,
);
return onModifyFrameworkRequest(oldSnapshot, snapshot, addOns);
}
Expand All @@ -216,7 +218,7 @@ async function patchFrameworkRequest(req, res, next) {
async function putFrameworkRequest(req, res, next) {
// The handler to handle PUT /frameworkRequest.
// PUT means provide a full spec of framework request, and the corresponding request will be created or updated.
// Along with the framework request, user must provide other job add-ons, e.g. configSecretDef, priorityClassDef, dockerSecretDef.
// Along with the framework request, user must provide other job add-ons, e.g. configSecretDef, priorityClassDef, dockerSecretDef, tokenSecretDef.
// If the framework doesn't exist in database, the record will be created.
// If the framework already exists, the record will be updated, and all job add-ons will be ignored. (Job add-ons can't be changed).
// If the framework request JSON is changed(or created), we will mark it as requestSynced=false.
Expand All @@ -228,6 +230,7 @@ async function putFrameworkRequest(req, res, next) {
configSecretDef,
priorityClassDef,
dockerSecretDef,
tokenSecretDef,
} = req.body;
const frameworkName = _.get(frameworkRequest, 'metadata.name');
if (!frameworkName) {
Expand Down Expand Up @@ -259,6 +262,7 @@ async function putFrameworkRequest(req, res, next) {
configSecretDef,
priorityClassDef,
dockerSecretDef,
tokenSecretDef,
);
return onCreateFrameworkRequest(snapshot, submissionTime, addOns);
} else {
Expand All @@ -269,6 +273,7 @@ async function putFrameworkRequest(req, res, next) {
oldFramework.configSecretDef,
oldFramework.priorityClassDef,
oldFramework.dockerSecretDef,
oldFramework.tokenSecretDef,
);
return onModifyFrameworkRequest(oldSnapshot, snapshot, addOns);
}
Expand Down
2 changes: 2 additions & 0 deletions src/rest-server/deploy/rest-server.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ spec:
{%- else %}
value: "{{ cluster_cfg['pylon']['uri']}}"
{%- endif %}
- name: REST_SERVER_URI
value: "{{ cluster_cfg['rest-server']['uri']}}"
{% if not cluster_cfg['authentication']['OIDC'] %}
- name: AUTHN_METHOD
value: basic
Expand Down
2 changes: 2 additions & 0 deletions src/rest-server/src/config/launcher.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const Joi = require('joi');
const k8sLauncherConfigSchema = Joi.object()
.keys({
hivedWebserviceUri: Joi.string().uri().required(),
restServerUri: Joi.string().uri().required(),
enabledPriorityClass: Joi.boolean().required(),
apiVersion: Joi.string().required(),
podGracefulDeletionTimeoutSec: Joi.number()
Expand Down Expand Up @@ -55,6 +56,7 @@ const launcherType = process.env.LAUNCHER_TYPE;
if (launcherType === 'k8s') {
launcherConfig = {
hivedWebserviceUri: process.env.HIVED_WEBSERVICE_URI,
restServerUri: process.env.REST_SERVER_URI,
enabledPriorityClass: process.env.LAUNCHER_PRIORITY_CLASS === 'true',
apiVersion: 'frameworkcontroller.microsoft.com/v1',
podGracefulDeletionTimeoutSec: 600,
Expand Down
50 changes: 47 additions & 3 deletions src/rest-server/src/models/v2/job/k8s.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const launcherConfig = require('@pai/config/launcher');
const createError = require('@pai/utils/error');
const protocolSecret = require('@pai/utils/protocolSecret');
const userModel = require('@pai/models/v2/user');
const tokenModel = require('@pai/models/token');
const storageModel = require('@pai/models/v2/storage');
const logger = require('@pai/config/logger');
const { apiserver } = require('@pai/config/kubernetes');
Expand Down Expand Up @@ -442,6 +443,10 @@ const generateTaskRole = (
name: 'KUBE_APISERVER_ADDRESS',
value: apiserver.uri,
},
{
name: 'REST_SERVER_URI',
value: launcherConfig.restServerUri,
},
{
name: 'GANG_ALLOCATION',
value: gangAllocation,
Expand Down Expand Up @@ -749,6 +754,7 @@ const generateFrameworkDescription = (
taskRoleDescription.task.pod.spec.priorityClassName =
'pai-job-minimal-priority';
}
// mount job secrets to initContainers & job container if exist
if (config.secrets) {
taskRoleDescription.task.pod.spec.volumes.push({
name: 'job-secrets',
Expand All @@ -765,6 +771,21 @@ const generateFrameworkDescription = (
mountPath: '/usr/local/pai/secrets',
});
}
// mount token-secrets to initContainers & job container
taskRoleDescription.task.pod.spec.volumes.push({
name: 'token-secrets',
secret: {
secretName: `${encodeName(frameworkName)}-tokencred`,
},
});
taskRoleDescription.task.pod.spec.initContainers[0].volumeMounts.push({
name: 'token-secrets',
mountPath: '/usr/local/pai/token-secrets',
});
taskRoleDescription.task.pod.spec.containers[0].volumeMounts.push({
name: 'token-secrets',
mountPath: '/usr/local/pai/token-secrets',
});
frameworkDescription.spec.taskRoles.push(taskRoleDescription);
}
frameworkDescription.metadata.annotations.totalGpuNumber = `${totalGpuNumber}`;
Expand Down Expand Up @@ -830,6 +851,22 @@ const getConfigSecretDef = (frameworkName, secrets) => {
};
};

const getTokenSecretDef = (frameworkName, token) => {
const data = {
token: Buffer.from(token).toString('base64'),
};
return {
apiVersion: 'v1',
kind: 'Secret',
metadata: {
name: `${encodeName(frameworkName)}-tokencred`,
namespace: 'default',
},
data: data,
type: 'Opaque',
};
};

const list = async (
attributes,
filters,
Expand Down Expand Up @@ -1053,19 +1090,25 @@ const put = async (frameworkName, config, rawConfig) => {
config,
rawConfig,
);
// generate image pull secret
// generate the image pull secret definition
const auths = Object.values(config.prerequisites.dockerimage)
.filter((dockerimage) => dockerimage.auth != null)
.map((dockerimage) => dockerimage.auth);
const dockerSecretDef = auths.length
? getDockerSecretDef(frameworkName, auths)
: null;

// generate job config secret
// generate the job config secret definition
const configSecretDef = config.secrets
? getConfigSecretDef(frameworkName, config.secrets)
: null;

// create an application token
// TODO: need a mechanism to label this token as job specific token and revoke it if job is stopped / failed
const token = await tokenModel.create(userName, true);
// generate the application token secret definition
const tokenSecretDef = getTokenSecretDef(frameworkName, token);

// calculate pod priority
// reference: https://github.com/microsoft/pai/issues/3704
// Truncate submissionTime to multiple of 1000.
Expand All @@ -1088,7 +1131,7 @@ const put = async (frameworkName, config, rawConfig) => {
priorityClassDef = getPriorityClassDef(frameworkName, podPriority);
}

// send request to framework controller
// send request to DB controller
let response;
try {
response = await axios({
Expand All @@ -1103,6 +1146,7 @@ const put = async (frameworkName, config, rawConfig) => {
configSecretDef: configSecretDef,
priorityClassDef: priorityClassDef,
dockerSecretDef: dockerSecretDef,
tokenSecretDef: tokenSecretDef,
},
headers: {
'Content-Type': 'application/json',
Expand Down

0 comments on commit 30f21b3

Please sign in to comment.