Merge pull request #85 from awslabs/feature/documentation_imprmvt

feat(userfeedback): updated construct documentation and s3 bucket name
awslabs · Nov 9, 2023 · a155387 · a155387
2 parents 3939869 + 9248486
commit a155387
Show file tree

Hide file tree

Showing 7 changed files with 167 additions and 47 deletions.
diff --git a/src/patterns/gen-ai/aws-qa-appsync-opensearch/README.md b/src/patterns/gen-ai/aws-qa-appsync-opensearch/README.md
@@ -73,8 +73,43 @@ const rag_source = new QaAppsyncOpensearch(
       }
     )
 ```
+After deploying the CDK stack, the QA  process can be invoked using Graphql APIs. The API Schema details are present here - resources/gen-ai/aws-qa-appsync-opensearch/schema.graphql.
 
-The code below provides an example of a mutation call and associated subscription to trigger a question and get response notifications:
+The code below provides an example of a mutation call and associated subscription to trigger a question and get response notifications.The subscription call wait for mutation request to send the notifications.
+
+Subscription call to get notifications about the question answering process:
+
+```
+subscription MySubscription {
+  updateQAJobStatus(jobid: "123") {
+    sources
+    question
+    answer
+    jobstatus
+  }
+}
+____________________________________________________________________
+Expected response:
+
+{
+  "data": {
+    "updateQAJobStatus": {
+      "sources": [
+        ""
+      ],
+      "question": "<base 64 encoded question>",
+      "answer": "<base 64 encoded answer>",
+      "jobstatus": "Succeed"
+    }
+  }
+}
+```
+
+Where:
+- jobid: id which can be used to filter subscriptions on client side
+- answer: response to the question from the Large Language Model as a base64 encoded string
+- sources: sources from the knowledge base used as context to answer the question
+- jobstatus: status update of the question answering process for the file specified
 
 Mutation call to trigger the question:
 
@@ -89,6 +124,21 @@ mutation MyMutation {
     verbose
   }
 }
+____________________________________________________________________
+Expected response:
+
+{
+  "data": {
+    "postQuestion": {
+      "answer": null,
+      "jobid": null,
+      "jobstatus": null,
+      "max_docs": null,
+      "question": null,
+      "verbose": null
+    }
+  }
+}
 ```
 
 Where:
@@ -100,24 +150,7 @@ Where:
 - streaming: boolean indicating if the streaming capability of Bedrock is used. If set to true, tokens will be send back to the subscriber as they are generated. If set to false, the entire response will be sent back to the subscriber once generated. 
 - filename: optional. Name of the file stored in the input S3 bucket, in txt format.
 
-Subscription call to get notifications about the question answering process:
-
-```
-subscription MySubscription {
-  updateQAJobStatus(jobid: "123") {
-    sources
-    question
-    answer
-    jobstatus
-  }
-}
-```
 
-Where:
-- jobid: id which can be used to filter subscriptions on client side
-- answer: response to the question from the Large Language Model as a base64 encoded string
-- sources: sources from the knowledge base used as context to answer the question
-- jobstatus: status update of the question answering process for the file specified
 
 ## Initializer
 

diff --git a/src/patterns/gen-ai/aws-qa-appsync-opensearch/index.ts b/src/patterns/gen-ai/aws-qa-appsync-opensearch/index.ts
@@ -227,14 +227,14 @@ export class QaAppsyncOpensearch extends Construct {
     if (!props.existingInputAssetsBucketObj) {
       let tmpBucket: s3.Bucket;
       if (!props.bucketInputsAssetsProps) {
-        tmpBucket = new s3.Bucket(this, 'inputAssetsBucket'+stage,
+        tmpBucket = new s3.Bucket(this, 'inputAssetsQABucket'+stage,
           {
             blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
             encryption: s3.BucketEncryption.S3_MANAGED,
-            bucketName: 'input-assets-bucket'+stage+'-'+Aws.ACCOUNT_ID,
+            bucketName: 'input-asset-qa-bucket'+stage+'-'+Aws.ACCOUNT_ID,
           });
       } else {
-        tmpBucket = new s3.Bucket(this, 'InputAssetsBucket'+stage, props.bucketInputsAssetsProps);
+        tmpBucket = new s3.Bucket(this, 'InputAssetsQABucket'+stage, props.bucketInputsAssetsProps);
       }
       inputAssetsBucket = tmpBucket;
       this.s3InputAssetsBucket = tmpBucket;

diff --git a/src/patterns/gen-ai/aws-rag-appsync-stepfn-opensearch/README.md b/src/patterns/gen-ai/aws-rag-appsync-stepfn-opensearch/README.md
@@ -66,7 +66,8 @@ import * as os from 'aws-cdk-lib/aws-opensearchservice';
 import * as cognito from 'aws-cdk-lib/aws-cognito';
 import { RagApiGatewayOpensearch, RagApiGatewayOpensearchProps } from '@awslabs/generative-ai-cdk-constructs';
 
-// get an existing OpenSearch provisioned cluster
+// get an existing OpenSearch provisioned cluster in the same VPC as of RagAppsyncStepfnOpensearch construct 
+// Security group for the existing opensearch cluster should allow traffic on 443.
 const osDomain = os.Domain.fromDomainAttributes(this, 'osdomain', {
     domainArn: 'arn:aws:es:us-east-1:XXXXXX',
     domainEndpoint: 'https://XXXXX.us-east-1.es.amazonaws.com'
@@ -105,6 +106,7 @@ Where:
 - ingestionjobid: id which can be used to filter subscriptions on client side
 - ignore_existing: boolean indicating if existing transformed files in the output bucket should be ignored. If true, the input document will be re-transformed (txt format), overwriting any existing transformed file for that document.
 
+
 Subscription call to get notifications about the ingestion process:
 
 ```
@@ -116,13 +118,58 @@ subscription MySubscription {
     }
   }
 }
+_________________________________________________
+Expected response:
+
+{
+  "data": {
+    "updateIngestionJobStatus": {
+      "files": [
+        {
+          "name": "a.pdf",
+          "status": "succeed"
+        }
+         {
+          "name": "b.pdf",
+          "status": "succeed"
+        }
+      ]
+    }
+  }
+}
 ```
 Where:
 - ingestionjobid: id which can be used to filter subscriptions on client side
 The subscription will display the status and name for each file 
 - files.status: status update of the ingestion for the file specified
 - files.name: name of the file stored in the input S3 bucket
 
+Mutation call to trigger the ingestion process:
+
+```
+mutation MyMutation {
+  ingestDocuments(ingestioninput: {files: [{status: "", name: "a.pdf"}, {status: "", name: "b.pdf"}], ingestionjobid: "123"}) {
+    ingestionjobid
+  }
+}
+_________________________________________________
+Expected response:
+
+{
+  "data": {
+    "ingestDocuments": {
+      "ingestionjobid": null
+    }
+  }
+}
+```
+Where:
+- files.status: this field will be used by the subscription to update the status of the ingestion for the file specified
+- files.name: name of the file stored in the input S3 bucket
+- ingestionjobid: id which can be used to filter subscriptions on client side
+
+
+
 ## Initializer
 
 ```

diff --git a/src/patterns/gen-ai/aws-summarization-appsync-stepfn/README.md b/src/patterns/gen-ai/aws-summarization-appsync-stepfn/README.md
@@ -78,7 +78,7 @@ For optional props like redis cluster set cfnCacheClusterProps.
 
 ```
 const cfnCacheClusterProps: elasticache.CfnCacheClusterProps = {
-      cacheNodeType: 'cache.r6g.xlarge',
+      cacheNodeType: 'cache.m4.large',
       engine: 'redis',
       numCacheNodes: 1,
     };
@@ -91,45 +91,82 @@ If file transformation is required set isFileTransformationRequired to 'True'
 
 For existing resource like Amazon VPC , Amazon S3 buckets use props like existingVpc, existingInputAssetsBucketObj and existingProcessedAssetsBucketObj.
 
-The code below provides an example of a mutation call and associated subscription to trigger the summarization workflow and get response notifications:
+After deploying the CDK stack, the document summarization workflow can be invoked using Graphql APIs. The API Schema details are present here - resources/gen-ai/aws-summarization-appsync-stepfn/schema.graphql.
 
-Mutation call to trigger the question:
+The code below provides an example of a subscription call and associated mutation to trigger the summarization workflow and get response notifications. The subscription call wait for mutation request to send the notifications.
+
+Subscription call to receive notifications:
 
 ```
-mutation MyMutation {
-  generateSummary(summaryInput:{files:[{name: "document1.txt", status: ""}], summary_job_id:"81", ignore_existing: false}) {
+subscription MySubscription {
+  updateSummaryJobStatus(name: "document1.txt", summary_job_id: "81") {
     name
     status
     summary
     summary_job_id
   }
 }
+_______________________________________
+
+Expected response:
+
+{
+  "data": {
+    "updateSummaryJobStatus": {
+      "files": [
+        {
+          "name": "document1.txt",
+          "status": "Completed",
+          "summary": "<base 64 encoded summary>"
+        }
+      ],
+      "summary_job_id": "81"
+    }
+  }
+}
 ```
 
 Where:
 - summary_job_id: id which can be used to filter subscriptions on client side
-- status: this field will be used by the subscription to update the status of the summarization process for the file(s) specified
-- name: Two formats are supported for files to be summarized. If the file is in text format, it needs to be stored in the trasformed S3 bucket, no file transformation is required. 
-If pdf format is selected, the file needs to be in the input S3 bucket and the construct prop ```isFileTransformationRequired``` needs to be set to true. The file will be transformed to text format.
-- ignore_existing: boolean indicating if existing summaries in the cache should be ignored. If true, the input document will be re-summarized, overwriting any existing cached summary for that document.
+- status: status update of the summarization process for the file(s) specified
+- name: name of the file stored in the input S3 bucket, same name + extension as passed to the previous mutation call.
+- summary: summary returned by the Large Language Model for the document specified, as a base64 encoded string
+
+Mutation call to trigger the summarization:
 
-Subscription call to get notifications about the summarization process:
 ```
-subscription MySubscription {
-  updateSummaryJobStatus(name: "document1.txt", summary_job_id: "81") {
+mutation MyMutation {
+  generateSummary(summaryInput:{files:[{name: "document1.txt", status: ""}], summary_job_id:"81", ignore_existing: false}) {
     name
     status
     summary
     summary_job_id
   }
 }
+
+_______________________________________
+
+Expected response: It invoke an asynchronous summarization process thus the response notification are send on subscription channel.
+
+{
+  "data": {
+    "generateSummary": {
+      "files": null,
+      "summary_job_id": null
+    }
+  }
+}
+
 ```
 
 Where:
 - summary_job_id: id which can be used to filter subscriptions on client side
-- status: status update of the summarization process for the file(s) specified
-- name: name of the file stored in the input S3 bucket, same name + extension as passed to the previous mutation call.
-- summary: summary returned by the Large Language Model for the document specified, as a base64 encoded string
+- status: this field will be used by the subscription to update the status of the summarization process for the file(s) specified
+- name: Two formats are supported for files to be summarized. If the file is in text format, it needs to be stored in the trasformed S3 bucket, no file transformation is required. 
+If pdf format is selected, the file needs to be in the input S3 bucket and the construct prop ```isFileTransformationRequired``` needs to be set to true. The file will be transformed to text format.
+- ignore_existing: boolean indicating if existing summaries in the cache should be ignored. If true, the input document will be re-summarized, overwriting any existing cached summary for that document.
+
+
 
 ```If multiple files are requested for summarization , then the client should filter response based on summary_job_id and name for each file. ```
 

diff --git a/src/patterns/gen-ai/aws-summarization-appsync-stepfn/index.ts b/src/patterns/gen-ai/aws-summarization-appsync-stepfn/index.ts
@@ -286,10 +286,10 @@ export class SummarizationAppsyncStepfn extends Construct {
       this.inputAssetBucket = props.existingInputAssetsBucketObj;
     } else if (props?.bucketInputsAssetsProps) {
       this.inputAssetBucket = new s3.Bucket(this,
-        'inputAssetsBucket'+stage, props.bucketInputsAssetsProps);
+        'inputAssetsSummaryBucket'+stage, props.bucketInputsAssetsProps);
     } else {
-      const bucketName= 'input-assets-bucket'+stage+'-'+cdk.Aws.ACCOUNT_ID;
-      this.inputAssetBucket = new s3.Bucket(this, 'inputAssetsBucket'+stage,
+      const bucketName= 'input-assets-summary-bucket'+stage+'-'+cdk.Aws.ACCOUNT_ID;
+      this.inputAssetBucket = new s3.Bucket(this, 'inputAssetsSummaryBucket'+stage,
         {
           blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
           encryption: s3.BucketEncryption.S3_MANAGED,
@@ -307,11 +307,11 @@ export class SummarizationAppsyncStepfn extends Construct {
       this.processedAssetBucket = props.existingProcessedAssetsBucketObj;
     } else if (props?.bucketProcessedAssetsProps) {
       this.processedAssetBucket = new s3.Bucket(this,
-        'processedAssetsBucket'+stage, props.bucketProcessedAssetsProps);
+        'processedAssetsSummaryBucket'+stage, props.bucketProcessedAssetsProps);
     } else {
-      const bucketName= 'processed-assets-bucket'+stage+'-'+cdk.Aws.ACCOUNT_ID;
+      const bucketName= 'processed-assets-summary-bucket'+stage+'-'+cdk.Aws.ACCOUNT_ID;
 
-      this.processedAssetBucket = new s3.Bucket(this, 'processedAssetsBucket'+stage,
+      this.processedAssetBucket = new s3.Bucket(this, 'processedAssetsSummaryBucket'+stage,
         {
           blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
           encryption: s3.BucketEncryption.S3_MANAGED,

diff --git a/test/patterns/gen-ai/aws-qa-appsync-opensearch/aws-qa-appsync-opensearch.test.ts b/test/patterns/gen-ai/aws-qa-appsync-opensearch/aws-qa-appsync-opensearch.test.ts
@@ -74,7 +74,7 @@ describe('QA Appsync Open search construct', () => {
               'GraphQLUrl',
             ],
           },
-          INPUT_BUCKET: { Ref: Match.stringLikeRegexp('testinputAssetsBucketdev') },
+          INPUT_BUCKET: { Ref: Match.stringLikeRegexp('testinputAssetsQABucketdev') },
           OPENSEARCH_DOMAIN_ENDPOINT: 'osendppint.amazon.aws.com',
           OPENSEARCH_INDEX: 'demoindex',
           OPENSEARCH_SECRET_ID: 'OSSecretId',

diff --git a/...patterns/gen-ai/aws-summarization-appsync-stepfn/aws-summarization-appsync-stepfn.test.ts b/...patterns/gen-ai/aws-summarization-appsync-stepfn/aws-summarization-appsync-stepfn.test.ts
@@ -133,11 +133,11 @@ describe('Summarization Appsync Stepfn construct', () => {
               'GraphQLUrl',
             ],
           },
-          INPUT_ASSET_BUCKET: { Ref: Match.stringLikeRegexp('testinputAssetsBucketdev') },
+          INPUT_ASSET_BUCKET: { Ref: Match.stringLikeRegexp('testinputAssetsSummaryBucketdev') },
           IS_FILE_TRANSFORMED: 'false',
           REDIS_HOST: { 'Fn::GetAtt': [Match.stringLikeRegexp('testredisCluster'), 'RedisEndpoint.Address'] },
           REDIS_PORT: { 'Fn::GetAtt': [Match.stringLikeRegexp('testredisCluster'), 'RedisEndpoint.Port'] },
-          TRANSFORMED_ASSET_BUCKET: { Ref: Match.stringLikeRegexp('testprocessedAssetsBucket') },
+          TRANSFORMED_ASSET_BUCKET: { Ref: Match.stringLikeRegexp('testprocessedAssetsSummaryBucket') },
         },
       },
     });
@@ -146,7 +146,10 @@ describe('Summarization Appsync Stepfn construct', () => {
       FunctionName: Match.stringLikeRegexp('summary_generator-dev'),
       Environment: {
         Variables: {
-          ASSET_BUCKET_NAME: { Ref: 'testprocessedAssetsBucketdevF293824A' },
+          ASSET_BUCKET_NAME: {
+            Ref: Match.stringLikeRegexp
+            ('testprocessedAssetsSummaryBucket'),
+          },
           GRAPHQL_URL: {
             'Fn::GetAtt': [
               Match.stringLikeRegexp('summaryMergedapi'),