feat(sagemaker): add EndpointConfig L2 construct

This is the second of three PRs to complete the implementation of RFC 431: aws/aws-cdk-rfcs#431 related to aws#2809 Co-authored-by: Matt McClean <mmcclean@amazon.com> Co-authored-by: Long Yao <yl1984108@gmail.com> Co-authored-by: Drew Jetter <60628154+jetterdj@users.noreply.github.com> Co-authored-by: Murali Ganesh <59461079+foxpro24@users.noreply.github.com> Co-authored-by: Abilash Rangoju <988529+rangoju@users.noreply.github.com>
petermeansrock · Nov 10, 2022 · 8c8a4f9 · 8c8a4f9
1 parent 6fe034c
commit 8c8a4f9
Show file tree

Hide file tree

Showing 21 changed files with 3,811 additions and 0 deletions.
diff --git a/packages/@aws-cdk/aws-sagemaker/README.md b/packages/@aws-cdk/aws-sagemaker/README.md
@@ -156,3 +156,42 @@ import * as sagemaker from '@aws-cdk/aws-sagemaker';
 const bucket = new s3.Bucket(this, 'MyBucket');
 const modelData = sagemaker.ModelData.fromBucket(bucket, 'path/to/artifact/file.tar.gz');
 ```
+
+## Model Hosting
+
+Amazon SageMaker provides model hosting services for model deployment. Amazon SageMaker provides an
+HTTPS endpoint where your machine learning model is available to provide inferences.
+
+### Endpoint Configuration
+
+By using the `EndpointConfig` construct, you can define a set of endpoint configuration which can be
+used to provision one or more endpoints. In this configuration, you identify one or more models to
+deploy and the resources that you want Amazon SageMaker to provision. You define one or more
+production variants, each of which identifies a model. Each production variant also describes the
+resources that you want Amazon SageMaker to provision. If you are hosting multiple models, you also
+assign a variant weight to specify how much traffic you want to allocate to each model. For example,
+suppose that you want to host two models, A and B, and you assign traffic weight 2 for model A and 1
+for model B. Amazon SageMaker distributes two-thirds of the traffic to Model A, and one-third to
+model B:
+
+```typescript
+import * as sagemaker from '@aws-cdk/aws-sagemaker';
+
+declare const modelA: sagemaker.Model;
+declare const modelB: sagemaker.Model;
+
+const endpointConfig = new sagemaker.EndpointConfig(this, 'EndpointConfig', {
+  instanceProductionVariants: [
+    {
+      model: modelA,
+      variantName: 'modelA',
+      initialVariantWeight: 2.0,
+    },
+    {
+      model: modelB,
+      variantName: 'variantB',
+      initialVariantWeight: 1.0,
+    },
+  ]
+});
+```
diff --git a/packages/@aws-cdk/aws-sagemaker/lib/accelerator-type.ts b/packages/@aws-cdk/aws-sagemaker/lib/accelerator-type.ts
@@ -0,0 +1,64 @@
+import * as cdk from '@aws-cdk/core';
+
+/**
+ * Supported Elastic Inference (EI) instance types for SageMaker instance-based production variants.
+ * EI instances provide on-demand GPU computing for inference.
+ */
+export class AcceleratorType {
+  /**
+   * ml.eia1.large
+   */
+  public static readonly EIA1_LARGE = AcceleratorType.of('ml.eia1.large');
+
+  /**
+   * ml.eia1.medium
+   */
+  public static readonly EIA1_MEDIUM = AcceleratorType.of('ml.eia1.medium');
+
+  /**
+   * ml.eia1.xlarge
+   */
+  public static readonly EIA1_XLARGE = AcceleratorType.of('ml.eia1.xlarge');
+
+  /**
+   * ml.eia2.large
+   */
+  public static readonly EIA2_LARGE = AcceleratorType.of('ml.eia2.large');
+
+  /**
+   * ml.eia2.medium
+   */
+  public static readonly EIA2_MEDIUM = AcceleratorType.of('ml.eia2.medium');
+
+  /**
+   * ml.eia2.xlarge
+   */
+  public static readonly EIA2_XLARGE = AcceleratorType.of('ml.eia2.xlarge');
+
+  /**
+   * Builds an AcceleratorType from a given string or token (such as a CfnParameter).
+   * @param acceleratorType An accelerator type as string
+   * @returns A strongly typed AcceleratorType
+   */
+  public static of(acceleratorType: string): AcceleratorType {
+    return new AcceleratorType(acceleratorType);
+  }
+
+  private readonly acceleratorTypeIdentifier: string;
+
+  constructor(acceleratorType: string) {
+    if (cdk.Token.isUnresolved(acceleratorType) || acceleratorType.startsWith('ml.')) {
+      this.acceleratorTypeIdentifier = acceleratorType;
+    } else {
+      throw new Error(`instance type must start with 'ml.'; (got ${acceleratorType})`);
+    }
+  }
+
+  /**
+   * Return the accelerator type as a string
+   * @returns The accelerator type as a string
+   */
+  public toString(): string {
+    return this.acceleratorTypeIdentifier;
+  }
+}