From e83c86576aefd2efc047ca1007e439efa8006ac2 Mon Sep 17 00:00:00 2001 From: Andrea Amorosi Date: Mon, 4 Mar 2024 15:42:33 +0100 Subject: [PATCH] docs(jmespath): documentation & tests --- .../extractDataFromBuiltinEnvelope.json | 20 ++ .../extractDataFromBuiltinEnvelope.ts | 21 +++ .../jmespath/extractDataFromEnvelope.json | 8 + .../jmespath/extractDataFromEnvelope.ts | 31 ++++ docs/snippets/tsconfig.json | 6 +- docs/utilities/jmespath.md | 173 ++++++++++++++++++ packages/commons/package.json | 14 +- packages/commons/src/fromBase64.ts | 15 ++ packages/jmespath/package.json | 8 + packages/jmespath/src/PowertoolsFunctions.ts | 34 ++++ packages/jmespath/src/envelopes.ts | 103 +++++++++++ packages/jmespath/tests/unit/index.test.ts | 10 + 12 files changed, 439 insertions(+), 4 deletions(-) create mode 100644 docs/snippets/jmespath/extractDataFromBuiltinEnvelope.json create mode 100644 docs/snippets/jmespath/extractDataFromBuiltinEnvelope.ts create mode 100644 docs/snippets/jmespath/extractDataFromEnvelope.json create mode 100644 docs/snippets/jmespath/extractDataFromEnvelope.ts create mode 100644 docs/utilities/jmespath.md create mode 100644 packages/commons/src/fromBase64.ts create mode 100644 packages/jmespath/src/PowertoolsFunctions.ts create mode 100644 packages/jmespath/src/envelopes.ts diff --git a/docs/snippets/jmespath/extractDataFromBuiltinEnvelope.json b/docs/snippets/jmespath/extractDataFromBuiltinEnvelope.json new file mode 100644 index 0000000000..9357e9d4b6 --- /dev/null +++ b/docs/snippets/jmespath/extractDataFromBuiltinEnvelope.json @@ -0,0 +1,20 @@ +{ + "Records": [ + { + "messageId": "19dd0b57-b21e-4ac1-bd88-01bbb068cb78", + "receiptHandle": "MessageReceiptHandle", + "body": "{\"customerId\":\"dd4649e6-2484-4993-acb8-0f9123103394\",\"booking\":{\"id\":\"5b2c4803-330b-42b7-811a-c68689425de1\",\"reference\":\"ySz7oA\",\"outboundFlightId\":\"20c0d2f2-56a3-4068-bf20-ff7703db552d\"},\"payment\":{\"receipt\":\"https://pay.stripe.com/receipts/acct_1Dvn7pF4aIiftV70/ch_3JTC14F4aIiftV700iFq2CHB/rcpt_K7QsrFln9FgFnzUuBIiNdkkRYGxUL0X\",\"amount\":100}}", + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1523232000000", + "SenderId": "123456789012", + "ApproximateFirstReceiveTimestamp": "1523232000001" + }, + "messageAttributes": {}, + "md5OfBody": "7b270e59b47ff90a553787216d55d91d", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-1:123456789012:MyQueue", + "awsRegion": "us-east-1" + } + ] +} diff --git a/docs/snippets/jmespath/extractDataFromBuiltinEnvelope.ts b/docs/snippets/jmespath/extractDataFromBuiltinEnvelope.ts new file mode 100644 index 0000000000..6cd3102f7a --- /dev/null +++ b/docs/snippets/jmespath/extractDataFromBuiltinEnvelope.ts @@ -0,0 +1,21 @@ +import { + extractDataFromEnvelope, + SQS, +} from '@aws-lambda-powertools/jmespath/envelopes'; +import { Logger } from '@aws-lambda-powertools/logger'; +import type { SQSEvent } from 'aws-lambda'; + +const logger = new Logger(); + +type MessageBody = { + customerId: string; +}; + +export const handler = async (event: SQSEvent): Promise => { + const records = extractDataFromEnvelope>(event, SQS); + for (const record of records) { + // records is now a list containing the deserialized body of each message + const { customerId } = record; + logger.appendKeys({ customerId }); + } +}; diff --git a/docs/snippets/jmespath/extractDataFromEnvelope.json b/docs/snippets/jmespath/extractDataFromEnvelope.json new file mode 100644 index 0000000000..a802778bf7 --- /dev/null +++ b/docs/snippets/jmespath/extractDataFromEnvelope.json @@ -0,0 +1,8 @@ +{ + "body": "{\"customerId\":\"dd4649e6-2484-4993-acb8-0f9123103394\"}", + "deeplyNested": [ + { + "someData": [1, 2, 3] + } + ] +} diff --git a/docs/snippets/jmespath/extractDataFromEnvelope.ts b/docs/snippets/jmespath/extractDataFromEnvelope.ts new file mode 100644 index 0000000000..2d0f9bccf5 --- /dev/null +++ b/docs/snippets/jmespath/extractDataFromEnvelope.ts @@ -0,0 +1,31 @@ +import { extractDataFromEnvelope } from '@aws-lambda-powertools/jmespath/envelopes'; + +type MyEvent = { + body: string; // "{\"customerId\":\"dd4649e6-2484-4993-acb8-0f9123103394\"}" + deeplyNested: Array<{ someData: number[] }>; +}; + +type MessageBody = { + customerId: string; +}; + +export const handler = async (event: MyEvent): Promise => { + const payload = extractDataFromEnvelope( + event, + 'powertools_json(body)' + ); + const { customerId } = payload; // now deserialized + + // also works for fetching and flattening deeply nested data + const someData = extractDataFromEnvelope( + event, + 'deeplyNested[*].someData[]' + ); + + return { + customerId, + message: 'success', + context: someData, + statusCode: 200, + }; +}; diff --git a/docs/snippets/tsconfig.json b/docs/snippets/tsconfig.json index 1a3fe8b171..d6aec30ce7 100644 --- a/docs/snippets/tsconfig.json +++ b/docs/snippets/tsconfig.json @@ -27,7 +27,11 @@ "@aws-lambda-powertools/idempotency/middleware": [ "../../packages/idempotency/lib/middleware" ], - "@aws-lambda-powertools/batch": ["../../packages/batch/lib"] + "@aws-lambda-powertools/batch": ["../../packages/batch/lib"], + "@aws-lambda-powertools/jmespath": ["../../packages/jmespath/lib"], + "@aws-lambda-powertools/jmespath/envelopes": [ + "../../packages/jmespath/lib/envelopes" + ] } } } diff --git a/docs/utilities/jmespath.md b/docs/utilities/jmespath.md new file mode 100644 index 0000000000..63933f2e95 --- /dev/null +++ b/docs/utilities/jmespath.md @@ -0,0 +1,173 @@ +--- +title: JMESPath Functions +description: Utility +--- + +???+ tip + JMESPath is a query language for JSON used by tools like the AWS CLI and Powertools for AWS Lambda (TypeScript). + +Built-in [JMESPath](https://jmespath.org/){target="_blank" rel="nofollow"} Functions to easily deserialize common encoded JSON payloads in Lambda functions. + +## Key features + +* Deserialize JSON from JSON strings, base64, and compressed data +* Use JMESPath to extract and combine data recursively +* Provides commonly used JMESPath expression with popular event sources + +## Getting started + +You might have events that contains encoded JSON payloads as string, base64, or even in compressed format. It is a common use case to decode and extract them partially or fully as part of your Lambda function invocation. + +Powertools for AWS Lambda (TypeScript) also have utilities like [idempotency](idempotency.md){target="_blank"} where you might need to extract a portion of your data before using them. + +???+ info "Terminology" + **Envelope** is the terminology we use for the **JMESPath expression** to extract your JSON object from your data input. We might use those two terms interchangeably. + +### Extracting data + +You can use the `extractDataFromEnvelope` function with any [JMESPath expression](https://jmespath.org/tutorial.html){target="_blank" rel="nofollow"}. + +???+ tip + Another common use case is to fetch deeply nested data, filter, flatten, and more. + +=== "extractDataFromBuiltinEnvelope.ts" + ```typescript hl_lines="1 13 20" + --8<-- "docs/snippets/jmespath/extractDataFromEnvelope.ts" + ``` + +=== "extractDataFromEnvelope.json" + + ```json + --8<-- "docs/snippets/jmespath/extractDataFromEnvelope.json" + ``` + +### Built-in envelopes + +We provide built-in envelopes for popular AWS Lambda event sources to easily decode and/or deserialize JSON objects. + +=== "extractDataFromBuiltinEnvelope.ts" + ```typescript hl_lines="2-3 15" + --8<-- "docs/snippets/jmespath/extractDataFromBuiltinEnvelope.ts" + ``` + +=== "extractDataFromBuiltinEnvelope.json" + + ```json hl_lines="6 15" + --8<-- "docs/snippets/jmespath/extractDataFromBuiltinEnvelope.json" + ``` + +These are all built-in envelopes you can use along with their expression as a reference: + +| Envelope | JMESPath expression | +| --------------------------------- | ----------------------------------------------------------------------------------------- | +| **`API_GATEWAY_HTTP`** | `powertools_json(body)` | +| **`API_GATEWAY_REST`** | `powertools_json(body)` | +| **`CLOUDWATCH_EVENTS_SCHEDULED`** | `detail` | +| **`CLOUDWATCH_LOGS`** | `awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]` | +| **`EVENTBRIDGE`** | `detail` | +| **`KINESIS_DATA_STREAM`** | `Records[*].kinesis.powertools_json(powertools_base64(data))` | +| **`S3_EVENTBRIDGE_SQS`** | `Records[*].powertools_json(body).detail` | +| **`S3_KINESIS_FIREHOSE`** | `records[*].powertools_json(powertools_base64(data)).Records[0]` | +| **`S3_SNS_KINESIS_FIREHOSE`** | `records[*].powertools_json(powertools_base64(data)).powertools_json(Message).Records[0]` | +| **`S3_SNS_SQS`** | `Records[*].powertools_json(body).powertools_json(Message).Records[0]` | +| **`S3_SQS`** | `Records[*].powertools_json(body).Records[0]` | +| **`SNS`** | `Records[0].Sns.Message | powertools_json(@)` | +| **`SQS`** | `Records[*].powertools_json(body)` | + +???+ tip "Using SNS?" + If you don't require SNS metadata, enable [raw message delivery](https://docs.aws.amazon.com/sns/latest/dg/sns-large-payload-raw-message-delivery.html). It will reduce multiple payload layers and size, when using SNS in combination with other services (_e.g., SQS, S3, etc_). + +## Advanced + +### Built-in JMESPath functions + +You can use our built-in JMESPath functions within your envelope expression. They handle deserialization for common data formats found in AWS Lambda event sources such as JSON strings, base64, and uncompress gzip data. + +#### powertools_json function + +Use `powertools_json` function to decode any JSON string anywhere a JMESPath expression is allowed. + +> **Idempotency scenario** + +This sample will deserialize the JSON string within the `body` key before [Idempotency](./idempotency.md){target="_blank"} processes it. + +=== "powertools_json_idempotency_jmespath.py" + + ```python hl_lines="16" + --8<-- "examples/jmespath_functions/src/powertools_json_idempotency_jmespath.py" + ``` + +=== "powertools_json_idempotency_jmespath.json" + + ```json hl_lines="28" + --8<-- "examples/jmespath_functions/src/powertools_json_idempotency_jmespath.json" + ``` + +#### powertools_base64 function + +Use `powertools_base64` function to decode any base64 data. + +This sample will decode the base64 value within the `data` key, and deserialize the JSON string before processing. + +=== "powertools_base64_jmespath_function.py" + + ```python hl_lines="7 10 37 49 53 55 57" + --8<-- "examples/jmespath_functions/src/powertools_base64_jmespath_function.py" + ``` + +=== "powertools_base64_jmespath_schema.py" + + ```python hl_lines="7 8 10 12 17 19 24 26 31 33 38 40" + --8<-- "examples/jmespath_functions/src/powertools_base64_jmespath_schema.py" + ``` + +=== "powertools_base64_jmespath_payload.json" + + ```json + --8<-- "examples/jmespath_functions/src/powertools_base64_jmespath_payload.json" + ``` + +#### powertools_base64_gzip function + +Use `powertools_base64_gzip` function to decompress and decode base64 data. + +This sample will decompress and decode base64 data from Cloudwatch Logs, then use JMESPath pipeline expression to pass the result for decoding its JSON string. + +=== "powertools_base64_gzip_jmespath_function.py" + + ```python hl_lines="6 10 15 29 31 33 35" + --8<-- "examples/jmespath_functions/src/powertools_base64_gzip_jmespath_function.py" + ``` + +=== "powertools_base64_gzip_jmespath_schema.py" + + ```python hl_lines="7-15 17 19 24 26 31 33 38 40" + --8<-- "examples/jmespath_functions/src/powertools_base64_gzip_jmespath_schema.py" + ``` + +=== "powertools_base64_gzip_jmespath_payload.json" + + ```json + --8<-- "examples/jmespath_functions/src/powertools_base64_gzip_jmespath_payload.json" + ``` + +### Bring your own JMESPath function + +???+ warning + This should only be used for advanced use cases where you have special formats not covered by the built-in functions. + +For special binary formats that you want to decode before processing, you can bring your own [JMESPath function](https://github.com/jmespath/jmespath.py#custom-functions){target="_blank" rel="nofollow"} and any additional option via `jmespath_options` param. To keep Powertools for AWS Lambda (TypeScript) built-in functions, you can extend the `PowertoolsFunctions` class. + +Here is an example of how to decompress messages using [zlib](https://docs.python.org/3/library/zlib.html){target="_blank" rel="nofollow"}: + +=== "powertools_custom_jmespath_function.py" + + ```python hl_lines="9 14 17-18 23 34 39 41 43" + --8<-- "examples/jmespath_functions/src/powertools_custom_jmespath_function.py" + ``` + +=== "powertools_custom_jmespath_function.json" + + ```json + --8<-- "examples/jmespath_functions/src/powertools_custom_jmespath_function.json" + ``` \ No newline at end of file diff --git a/packages/commons/package.json b/packages/commons/package.json index 3256503909..b2a5bba1f5 100644 --- a/packages/commons/package.json +++ b/packages/commons/package.json @@ -40,6 +40,10 @@ "default": "./lib/esm/index.js" } }, + "./utils/base64": { + "import": "./lib/esm/fromBase64.js", + "require": "./lib/cjs/fromBase64.js" + }, "./typeutils": { "import": "./lib/esm/typeUtils.js", "require": "./lib/cjs/typeUtils.js" @@ -51,13 +55,17 @@ }, "typesVersions": { "*": { - "types": [ - "lib/cjs/types/index.d.ts", - "lib/esm/types/index.d.ts" + "utils/base64": [ + "lib/cjs/fromBase64.d.ts", + "lib/esm/fromBase64.d.ts" ], "typeutils": [ "lib/cjs/typeUtils.d.ts", "lib/esm/typeUtils.d.ts" + ], + "types": [ + "lib/cjs/types/index.d.ts", + "lib/esm/types/index.d.ts" ] } }, diff --git a/packages/commons/src/fromBase64.ts b/packages/commons/src/fromBase64.ts new file mode 100644 index 0000000000..b3af876113 --- /dev/null +++ b/packages/commons/src/fromBase64.ts @@ -0,0 +1,15 @@ +const BASE64_REGEX = /^[A-Za-z0-9+/]*={0,2}$/; + +const fromBase64 = (input: string, encoding?: BufferEncoding): Uint8Array => { + if ((input.length * 3) % 4 !== 0) { + throw new TypeError(`Incorrect padding on base64 string.`); + } + if (!BASE64_REGEX.exec(input)) { + throw new TypeError(`Invalid base64 string.`); + } + const buffer = encoding ? Buffer.from(input, encoding) : Buffer.from(input); + + return new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength); +}; + +export { fromBase64 }; diff --git a/packages/jmespath/package.json b/packages/jmespath/package.json index a43a68e817..c4c9645563 100644 --- a/packages/jmespath/package.json +++ b/packages/jmespath/package.json @@ -39,6 +39,10 @@ "default": "./lib/esm/index.js" } }, + "./envelopes": { + "import": "./lib/esm/envelopes.js", + "require": "./lib/cjs/envelopes.js" + }, "./types": { "import": "./lib/esm/types.js", "require": "./lib/cjs/types.js" @@ -49,6 +53,10 @@ "types": [ "lib/cjs/types.d.ts", "lib/esm/types.d.ts" + ], + "envelopes": [ + "lib/cjs/envelopes.d.ts", + "lib/esm/envelopes.d.ts" ] } }, diff --git a/packages/jmespath/src/PowertoolsFunctions.ts b/packages/jmespath/src/PowertoolsFunctions.ts new file mode 100644 index 0000000000..15d4ab6b3d --- /dev/null +++ b/packages/jmespath/src/PowertoolsFunctions.ts @@ -0,0 +1,34 @@ +import zlib from 'node:zlib'; +import type { JSONValue } from '@aws-lambda-powertools/commons/types'; +import { fromBase64 } from '@aws-lambda-powertools/commons/utils/base64'; +import { Functions } from './Functions.js'; + +const decoder = new TextDecoder('utf-8'); + +class PowertoolsFunctions extends Functions { + @Functions.signature({ + argumentsSpecs: [['string']], + }) + public funcPowertoolsBase64(value: string): string { + return decoder.decode(fromBase64(value, 'base64')); + } + + @Functions.signature({ + argumentsSpecs: [['string']], + }) + public funcPowertoolsBase64Gzip(value: string): string { + const encoded = fromBase64(value, 'base64'); + const uncompressed = zlib.gunzipSync(encoded); + + return uncompressed.toString(); + } + + @Functions.signature({ + argumentsSpecs: [['string']], + }) + public funcPowertoolsJson(value: string): JSONValue { + return JSON.parse(value); + } +} + +export { PowertoolsFunctions }; diff --git a/packages/jmespath/src/envelopes.ts b/packages/jmespath/src/envelopes.ts new file mode 100644 index 0000000000..84cf06c383 --- /dev/null +++ b/packages/jmespath/src/envelopes.ts @@ -0,0 +1,103 @@ +import { search } from './search.js'; +import { PowertoolsFunctions } from './PowertoolsFunctions.js'; +import type { ParsingOptions, JSONObject } from './types.js'; + +/** + * Searches and extracts data using JMESPath + * + * Envelope being the JMESPath expression to extract the data you're after + * + * Built-in JMESPath functions include: `powertools_json`, `powertools_base64`, `powertools_base64_gzip` + * + * @example + * ```typescript + * import { extractDataFromEnvelope } from '@aws-lambda-powertools/jmespath/envelopes'; + * + * type CustomEvent = { + * body: string; // "{\"customerId\":\"dd4649e6-2484-4993-acb8-0f9123103394\"}" + * }; + * + * type EventBody = { + * customerId: string; + * }; + * + * export const handler = async (event: CustomEvent): Promise => { + * const payload = extractDataFromEnvelope(event, "powertools_json(body)"); + * const { customerId } = payload; // now deserialized + * // ... + * }; + * ``` + * + * We provide built-in envelopes for popular AWS Lambda event sources to easily decode and/or deserialize JSON objects. + * + * @example + * ```typescript + * import { + * extractDataFromEnvelope, + * SQS, + * } from '@aws-lambda-powertools/jmespath/envelopes'; + * import type { SQSEvent } from 'aws-lambda'; + * + * type MessageBody = { + * customerId: string; + * }; + * + * export const handler = async (event: SQSEvent): Promise => { + * const records = extractDataFromEnvelope>(event, SQS); + * for (const record in records) { // records is now a list containing the deserialized body of each message + * const { customerId } = record; + * } + * }; + * ``` + * + * @param data The JSON object to search + * @param envelope The JMESPath expression to use + * @param options The parsing options to use + */ +const extractDataFromEnvelope = ( + data: JSONObject, + envelope: string, + options?: ParsingOptions +): T => { + if (!options) { + options = { customFunctions: new PowertoolsFunctions() }; + } + + return search(envelope, data, options) as T; +}; + +const API_GATEWAY_REST = 'powertools_json(body)'; +const API_GATEWAY_HTTP = 'powertools_json(body)'; +const SQS = 'Records[*].powertools_json(body)'; +const SNS = 'Records[0].Sns.Message | powertools_json(@)'; +const EVENTBRIDGE = 'detail'; +const CLOUDWATCH_EVENTS_SCHEDULED = 'detail'; +const KINESIS_DATA_STREAM = + 'Records[*].kinesis.powertools_json(powertools_base64(data))'; +const CLOUDWATCH_LOGS = + 'awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]'; +const S3_SNS_SQS = + 'Records[*].powertools_json(body).powertools_json(Message).Records[0]'; +const S3_SQS = 'Records[*].powertools_json(body).Records[0]'; +const S3_SNS_KINESIS_FIREHOSE = + 'records[*].powertools_json(powertools_base64(data)).powertools_json(Message).Records[0]'; +const S3_KINESIS_FIREHOSE = + 'records[*].powertools_json(powertools_base64(data)).Records[0]'; +const S3_EVENTBRIDGE_SQS = 'Records[*].powertools_json(body).detail'; + +export { + extractDataFromEnvelope, + API_GATEWAY_REST, + API_GATEWAY_HTTP, + SQS, + SNS, + EVENTBRIDGE, + CLOUDWATCH_EVENTS_SCHEDULED, + KINESIS_DATA_STREAM, + CLOUDWATCH_LOGS, + S3_SNS_SQS, + S3_SQS, + S3_SNS_KINESIS_FIREHOSE, + S3_KINESIS_FIREHOSE, + S3_EVENTBRIDGE_SQS, +}; diff --git a/packages/jmespath/tests/unit/index.test.ts b/packages/jmespath/tests/unit/index.test.ts index 896b23459f..0569fe73ce 100644 --- a/packages/jmespath/tests/unit/index.test.ts +++ b/packages/jmespath/tests/unit/index.test.ts @@ -3,6 +3,7 @@ * * @group unit/jmespath/coverage */ +import { JSONValue } from '@aws-lambda-powertools/commons/types'; import { search, EmptyExpressionError, @@ -16,6 +17,15 @@ import { Parser } from '../../src/Parser.js'; import { TreeInterpreter } from '../../src/TreeInterpreter.js'; describe('Coverage tests', () => { + it('does stuff', () => { + class Test {} + const test = new Test(); + + const result = search('foo', test as unknown as JSONValue); + + expect(result).toBe(test); + }); + // These expressions tests are not part of the compliance suite, but are added to ensure coverage describe('expressions', () => { it('throws an error if the expression is not a string', () => {