-
Notifications
You must be signed in to change notification settings - Fork 205
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #777 from aws-quickstart/feature/neuron-addon
Neuron Device Plugin Addon
- Loading branch information
Showing
10 changed files
with
220 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Neuron Device Plugin Addon | ||
|
||
[AWS Neuron](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/) is the SDK used to run deep learning workloads on AWS Inferentia and AWS Trainium based instances. This addon will install the Neuron Device Plugin necessary to run the instances on Amazon EKS (and Blueprints). Note that you **must** use *inf1, inf2, trn1,* or *trn1n* instances. | ||
|
||
## Usage | ||
|
||
#### **`index.ts`** | ||
```typescript | ||
import 'source-map-support/register'; | ||
import * as cdk from 'aws-cdk-lib'; | ||
import * as blueprints from '@aws-quickstart/eks-blueprints'; | ||
|
||
const app = new cdk.App(); | ||
|
||
const addOn = new blueprints.addons.NeuronPluginAddon(); | ||
|
||
const clusterProvider = new blueprints.GenericClusterProvider({ | ||
version: KubernetesVersion.V1_27, | ||
managedNodeGroups: [ | ||
inferentiaNodeGroup() | ||
] | ||
}); | ||
|
||
function inferentiaNodeGroup(): blueprints.ManagedNodeGroup { | ||
return { | ||
id: "mng1", | ||
instanceTypes: [new ec2.InstanceType('inf1.2xlarge')], | ||
desiredSize: 1, | ||
maxSize: 2, | ||
nodeGroupSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS }, | ||
}; | ||
} | ||
|
||
const blueprint = blueprints.EksBlueprint.builder() | ||
.clusterProvider(clusterProvider) | ||
.addOns(addOn) | ||
.build(app, 'my-stack-name'); | ||
``` | ||
|
||
Once deployed, you can see the plugin daemonset in the `kube-system` namespace. | ||
|
||
```sh | ||
$ kubectl get daemonset neuron-device-plugin-daemonset -n kube-system | ||
|
||
NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE | ||
neuron-device-plugin-daemonset 1 1 1 1 1 <none> 24m 20m | ||
``` | ||
|
||
## Functionality | ||
|
||
1. Deploys the plugin daemonset in `kube-system` namespace by default. | ||
2. Provides a plugin for the blueprint to leverage the Inferentia or Trainium instances to use the Neuron SDK. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import { Construct } from "constructs"; | ||
|
||
import { ClusterAddOn, ClusterInfo } from "../../spi"; | ||
import { KubectlProvider, ManifestDeployment } from "../helm-addon/kubectl-provider"; | ||
import { loadExternalYaml } from "../../utils/yaml-utils"; | ||
|
||
const PLUGIN_URL = "https://raw.githubusercontent.com/aws-neuron/aws-neuron-sdk/master/src/k8/k8s-neuron-device-plugin.yml"; | ||
const RBAC_URL = "https://raw.githubusercontent.com/aws-neuron/aws-neuron-sdk/master/src/k8/k8s-neuron-device-plugin-rbac.yml"; | ||
|
||
export class NeuronPluginAddOn implements ClusterAddOn { | ||
deploy(clusterInfo: ClusterInfo): Promise<Construct> { | ||
const kubectlProvider = new KubectlProvider(clusterInfo); | ||
|
||
// Read in YAML docs | ||
const rbac = loadExternalYaml(RBAC_URL); | ||
const rbacManifest: ManifestDeployment = { | ||
name: "neuron-rbac-manifest", | ||
namespace: "", | ||
manifest: rbac, | ||
values: {} | ||
}; | ||
|
||
const plugin = loadExternalYaml(PLUGIN_URL); | ||
const pluginManifest: ManifestDeployment = { | ||
name: "neuron-plugin-manifest", | ||
namespace: "kube-system", | ||
manifest: plugin, | ||
values: {} | ||
}; | ||
|
||
const rbacStatement = kubectlProvider.addManifest(rbacManifest); | ||
const pluginStatement = kubectlProvider.addManifest(pluginManifest); | ||
|
||
// Plugin dependency on the RBAC manifest | ||
pluginStatement.node.addDependency(rbacStatement); | ||
|
||
return Promise.resolve(pluginStatement); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
--- | ||
kind: ClusterRole | ||
--- | ||
kind: Deployment | ||
--- | ||
kind: Pod |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
apiVersion: apps/v1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import * as yaml from "../../lib/utils/yaml-utils"; | ||
|
||
describe('Unit tests for yaml utils', () => { | ||
|
||
test("The YAML Document file is read correctly", () => { | ||
const doc = yaml.readYamlDocument(__dirname +'/yaml-test.yaml'); | ||
|
||
expect(doc).toBe("apiVersion: apps/v1"); | ||
}); | ||
|
||
test("The YAML Document file is serialized correctly", () => { | ||
const sample = {"apiVersion":"apps/v1","resource":"Deployment"}; | ||
|
||
const serialized = yaml.serializeYaml(sample); | ||
|
||
expect(serialized.length).toBe(41); | ||
}); | ||
|
||
test("The YAML Document with multiple resources is read correctly", () => { | ||
const doc = yaml.loadMultiResourceYaml(__dirname +'/multi-yaml-test.yaml'); | ||
|
||
const firstPart = { "kind": "ClusterRole" }; | ||
const secondPart = { "kind": "Deployment" }; | ||
const lastPart = { "kind": "Pod" }; | ||
|
||
expect(doc.length).toBe(4); | ||
expect(doc[1]).toStrictEqual(firstPart); | ||
expect(doc[2]).toStrictEqual(secondPart); | ||
expect(doc[3]).toStrictEqual(lastPart); | ||
}); | ||
|
||
test("External YAML Document is read correctly", () => { | ||
const doc = yaml.loadExternalYaml('https://raw.githubusercontent.com/kubernetes/examples/master/guestbook/legacy/frontend-controller.yaml'); | ||
const part = { | ||
apiVersion: "v1", | ||
kind: "ReplicationController", | ||
metadata: {name: "frontend"}, | ||
spec: { | ||
replicas: 3, | ||
template: { | ||
metadata: { | ||
labels: {app: "guestbook", tier: "frontend"} | ||
}, | ||
spec: { | ||
containers: [{ | ||
name: "php-redis", | ||
image: "gcr.io/google_samples/gb-frontend:v4", | ||
resources: { | ||
requests: { | ||
cpu: "100m", | ||
memory: "100Mi" | ||
} | ||
}, | ||
env: [{name: "GET_HOSTS_FROM", value: "dns"}], | ||
ports:[{containerPort: 80}] | ||
}] | ||
} | ||
} | ||
} | ||
}; | ||
|
||
expect(doc.length).toBe(1); | ||
expect(doc[0]).toStrictEqual(part); | ||
}); | ||
}); |