Skip to content

Commit

Permalink
docs: relocate image examples into a dedicated directory
Browse files Browse the repository at this point in the history
  • Loading branch information
HQarroum committed Jul 26, 2024
1 parent 95610fa commit 8f8318b
Show file tree
Hide file tree
Showing 44 changed files with 2,402 additions and 0 deletions.
18 changes: 18 additions & 0 deletions examples/simple-pipelines/image-processing-pipelines/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# 🖼️ Image Processing Pipelines

In this directory we provide several examples that showcase how to process and transform images using different middlewares on AWS using Project Lakechain.

## 🌟 Examples

Below is a list of the different examples available in this directory.

Pipeline | Description
--- | ---
[Image Background Removal](image-background-removal) | A pipeline demonstrating automatic image background removal using [Rembg](https://github.com/danielgatis/rembg).
[Image Captioning Pipeline](image-captioning-pipeline) | A pipeline demonstrating image captioning using the [BLIP2 model](https://huggingface.co/docs/transformers/main/model_doc/blip-2).
[Image Hashing Pipeline](image-hashing-pipeline) | An example showcasing how to compute the hash of images.
[Image Moderation Pipeline](image-moderation-pipeline) | A pipeline demonstrating how to classify moderated images.
[Image Resize Pipeline](image-resize-pipeline) | A pipeline showcasing how to resize images to multiple sizes.
[Image Transforms Pipeline](image-transforms-pipeline) | A pipeline showcasing how to transform images.
[Image Watermarking Pipeline](image-watermarking-pipeline) | A pipeline demonstrating how to watermark images.
[Laplacian Variance Pipeline](laplacian-variance-pipeline) | An example showcasing how to compute the Laplacian variance of images.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"app": "npx ts-node --prefer-ts-exts stack.ts",
"watch": {
"include": ["**"],
"exclude": [
"README.md",
"cdk*.json",
"**/*.d.ts",
"**/*.js",
"tsconfig.json",
"package*.json",
"yarn.lock",
"node_modules",
"test",
"**/*.zip"
]
},
"context": {
"@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true,
"@aws-cdk/core:stackRelativeExports": true,
"@aws-cdk/aws-rds:lowercaseDbIdentifier": true,
"@aws-cdk/aws-lambda:recognizeVersionProps": true,
"@aws-cdk/aws-lambda:recognizeLayerVersion": true,
"@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true,
"@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true,
"@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true,
"@aws-cdk/core:checkSecretUsage": true,
"@aws-cdk/aws-iam:minimizePolicies": true,
"@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true,
"@aws-cdk/core:validateSnapshotRemovalPolicy": true,
"@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true,
"@aws-cdk/aws-s3:createDefaultLoggingPolicy": true,
"@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true,
"@aws-cdk/aws-apigateway:disableCloudWatchRole": true,
"@aws-cdk/core:enablePartitionLiterals": true,
"@aws-cdk/customresources:installLatestAwsSdkDefault": false,
"@aws-cdk/core:target-partitions": ["aws", "aws-cn"]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"name": "image-background-removal",
"description": "Builds a pipeline demonstrating automatic image background removal using Rembg.",
"version": "0.7.0",
"private": true,
"scripts": {
"build": "tsc",
"build-pkg": "npx lerna run build --scope=image-background-removal --include-dependencies",
"clean": "npx rimraf dist/ cdk.out/ node_modules/",
"audit": "npm audit && npm run synth --silent | cfn_nag",
"lint": "npx eslint .",
"synth": "npx --yes cdk synth",
"deploy": "npx --yes cdk deploy",
"hotswap": "npx --yes cdk deploy --hotswap",
"destroy": "npx --yes cdk destroy --all"
},
"author": {
"name": "Amazon Web Services",
"url": "https://aws.amazon.com"
},
"repository": {
"type": "git",
"url": "git://github.com/awslabs/project-lakechain"
},
"license": "Apache-2.0",
"devDependencies": {
"@types/node": "^20.8.10",
"esbuild": "0.21.5",
"ts-jest": "^29.0.0",
"ts-node": "^10.9.2"
},
"dependencies": {
"@project-lakechain/s3-event-trigger": "*",
"@project-lakechain/rembg-image-processor": "*",
"@project-lakechain/s3-storage-connector": "*"
},
"peerDependencies": {
"aws-cdk-lib": "2.150.0",
"constructs": "^10.3.0"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
#!/usr/bin/env node

/*
* Copyright (C) 2023 Amazon.com, Inc. or its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import * as cdk from 'aws-cdk-lib';
import * as s3 from 'aws-cdk-lib/aws-s3';
import * as ec2 from 'aws-cdk-lib/aws-ec2';

import { Construct } from 'constructs';
import { CacheStorage } from '@project-lakechain/core';
import { S3EventTrigger } from '@project-lakechain/s3-event-trigger';
import { RembgImageProcessor } from '@project-lakechain/rembg-image-processor';
import { S3StorageConnector } from '@project-lakechain/s3-storage-connector';

/**
* Example stack for automatic background removal.
* The pipeline looks as follows:
*
* ┌──────────────┐ ┌────────────────────────┐ ┌───────────────┐
* │ S3 Trigger ├──►│ Rembg Image Processor ├──►| Output Bucket |
* └──────────────┘ └────────────────────────┘ └───────────────┘
*
* @see https://github.com/danielgatis/rembg/tree/main
*/
export class ImageBackgroundRemovalStack extends cdk.Stack {

/**
* Stack constructor.
*/
constructor(scope: Construct, id: string, env: cdk.StackProps) {
super(scope, id, {
description: 'A pipeline demonstrating how to automatically remove image backgrounds using Rembg.',
...env
});

// The VPC in which the Rembg middleware will be deployed.
const vpc = this.createVpc('Vpc');

///////////////////////////////////////////
/////// S3 Storage ///////
///////////////////////////////////////////

// The source bucket.
const source = new s3.Bucket(this, 'Bucket', {
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
autoDeleteObjects: true,
removalPolicy: cdk.RemovalPolicy.DESTROY,
enforceSSL: true
});

// The destination bucket.
const destination = new s3.Bucket(this, 'Destination', {
encryption: s3.BucketEncryption.S3_MANAGED,
blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL,
autoDeleteObjects: true,
removalPolicy: cdk.RemovalPolicy.DESTROY,
enforceSSL: true
});

// The cache storage.
const cache = new CacheStorage(this, 'Cache', {});

///////////////////////////////////////////
/////// Lakechain Pipeline ///////
///////////////////////////////////////////

// Create the S3 trigger monitoring the bucket
// for uploaded objects.
const trigger = new S3EventTrigger.Builder()
.withScope(this)
.withIdentifier('Trigger')
.withCacheStorage(cache)
.withBucket(source)
.build();

trigger
// Remove the background from images.
.pipe(
new RembgImageProcessor.Builder()
.withScope(this)
.withIdentifier('RembgImageProcessor')
.withCacheStorage(cache)
.withVpc(vpc)
.build()
)
// Write the results to the destination bucket.
.pipe(
new S3StorageConnector.Builder()
.withScope(this)
.withIdentifier('S3StorageConnector')
.withCacheStorage(cache)
.withDestinationBucket(destination)
.build()
);

// Display the source bucket information in the console.
new cdk.CfnOutput(this, 'SourceBucketName', {
description: 'The name of the source bucket.',
value: source.bucketName
});

// Display the destination bucket information in the console.
new cdk.CfnOutput(this, 'DestinationBucketName', {
description: 'The name of the destination bucket.',
value: destination.bucketName
});
}

/**
* @param id the VPC identifier.
* @returns a new VPC with a public, private and isolated
* subnets for the pipeline.
*/
private createVpc(id: string): ec2.IVpc {
return (new ec2.Vpc(this, id, {
enableDnsSupport: true,
enableDnsHostnames: true,
ipAddresses: ec2.IpAddresses.cidr('10.0.0.0/20'),
maxAzs: 1,
subnetConfiguration: [{
// Used by NAT Gateways to provide Internet access
// to the containers.
name: 'public',
subnetType: ec2.SubnetType.PUBLIC,
cidrMask: 28
}, {
// Used by the containers.
name: 'private',
subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS,
cidrMask: 24
}, {
// Used by EFS.
name: 'isolated',
subnetType: ec2.SubnetType.PRIVATE_ISOLATED,
cidrMask: 28
}]
}));
}
}

// Creating the CDK application.
const app = new cdk.App();

// Environment variables.
const account = process.env.CDK_DEFAULT_ACCOUNT ?? process.env.AWS_DEFAULT_ACCOUNT;
const region = process.env.CDK_DEFAULT_REGION ?? process.env.AWS_DEFAULT_REGION;

// Deploy the stack.
new ImageBackgroundRemovalStack(app, 'ImageBackgroundRemovalStack', {
env: {
account,
region
}
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"extends": "../../../tsconfig.json",
"compilerOptions": {
"outDir": "./dist"
},
"include": ["./*.ts"]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# :camera: Image Captioning Pipeline

> In this example, we use the [BLIP2 image captioning model](https://huggingface.co/docs/transformers/main/model_doc/blip-2) in a Lakechain pipeline to automatically generate captions for images.
## :dna: Pipeline

```mermaid
flowchart LR
Input([Input Bucket]) -.-> S3[S3 Trigger]
S3 -. Image .-> Blip2[Blip2 Image Processor]
Blip2 -. Image + Metadata .-> S3Storage[S3 Storage Connector]
S3Storage -.-> Output[Output Bucket]
```

## ❓ What is Happening

The BLIP-2 model, introduced in the paper [BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models](https://arxiv.org/abs/2301.12597), presents a new approach to vision-language tasks. It allows to generate captions for images using a pre-trained image encoder and a pre-trained language model.

The BLIP-2 Image Processor middleware allows to easily deploy the BLIP-2 model on GPU containers and use it to generate captions for images within a Lakechain document processing pipeline. This is useful if you want to create descriptive labels for your images for later use in search engines, or if you want to generate captions for your images to make them more accessible to visually impaired people.

<br />
<p align="center">
<img width="400" src="assets/image-captioning.png">
</p>
<br />

The BLIP-2 Image Processor middleware will enrich the metadata of the document with the gathered captions and pass them to the next middleware in the pipeline.

> **Note**
> The BLIP2 Image Processor middleware can take a few minutes to execute, as it spawns the BLIP2 model on an ECS cluster using GPU powered instances which can take a few minutes to spin up.
## 📝 Requirements

The following requirements are needed to deploy the infrastructure associated with this pipeline:

- You need access to a development AWS account.
- [AWS CDK](https://docs.aws.amazon.com/cdk/latest/guide/getting_started.html#getting_started_install) is required to deploy the infrastructure.
- [Docker](https://docs.docker.com/get-docker/) is required to be running to build middlewares.
- [Node.js](https://nodejs.org/en/download/) v18+ and NPM.
- [Python](https://www.python.org/downloads/) v3.8+ and [Pip](https://pip.pypa.io/en/stable/installation/).

## 🚀 Deploy

Head to the directory [`examples/simple-pipelines/image-captioning-pipeline`](/examples/simple-pipelines/image-captioning-pipeline) in the repository and run the following commands to build the example:

```bash
npm install
npm run build-pkg
```

You can then deploy the example to your account (ensure your AWS CDK is configured with the appropriate AWS credentials and AWS region):

```bash
npm run deploy
```

## 🧹 Clean up

Don't forget to clean up the resources created by this example by running the following command:

```bash
npm run destroy
```
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"app": "npx ts-node --prefer-ts-exts stack.ts",
"watch": {
"include": ["**"],
"exclude": [
"README.md",
"cdk*.json",
"**/*.d.ts",
"**/*.js",
"tsconfig.json",
"package*.json",
"yarn.lock",
"node_modules",
"test",
"**/*.zip"
]
},
"context": {
"@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true,
"@aws-cdk/core:stackRelativeExports": true,
"@aws-cdk/aws-rds:lowercaseDbIdentifier": true,
"@aws-cdk/aws-lambda:recognizeVersionProps": true,
"@aws-cdk/aws-lambda:recognizeLayerVersion": true,
"@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true,
"@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true,
"@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true,
"@aws-cdk/core:checkSecretUsage": true,
"@aws-cdk/aws-iam:minimizePolicies": true,
"@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true,
"@aws-cdk/core:validateSnapshotRemovalPolicy": true,
"@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true,
"@aws-cdk/aws-s3:createDefaultLoggingPolicy": true,
"@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true,
"@aws-cdk/aws-apigateway:disableCloudWatchRole": true,
"@aws-cdk/core:enablePartitionLiterals": true,
"@aws-cdk/customresources:installLatestAwsSdkDefault": false,
"@aws-cdk/core:target-partitions": ["aws", "aws-cn"]
}
}
Loading

0 comments on commit 8f8318b

Please sign in to comment.