From 8360dce0fc7866105a6ae27d547972a1bf31de73 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Wed, 16 Oct 2024 17:24:39 +0800 Subject: [PATCH 1/5] Clean top-level directory files --- .github/workflows/check.yml | 4 +- Dockerfile | 2 +- README.md | 180 +++++------------- local-docker-build.sh | 20 -- CONTRIBUTING.md => misc/CONTRIBUTING.md | 0 .../compose/docker-compose.yaml | 0 .../basic_data_caterer_flow_medium.gif | Bin {design => misc/design}/high-level-design.png | Bin .../design}/high_level_flow-basic-flow.svg | 0 .../high_level_flow-run-config-basic-flow.svg | 0 .../design}/high_level_flow-run-config.svg | 0 misc/distribution/README.md | 29 +++ .../docker-image}/run-data-caterer.sh | 0 .../insta-integration/insta-integration.yaml | 0 run-docker.sh | 5 - 15 files changed, 81 insertions(+), 159 deletions(-) delete mode 100644 local-docker-build.sh rename CONTRIBUTING.md => misc/CONTRIBUTING.md (100%) rename docker-compose.yaml => misc/compose/docker-compose.yaml (100%) rename {design => misc/design}/basic_data_caterer_flow_medium.gif (100%) rename {design => misc/design}/high-level-design.png (100%) rename {design => misc/design}/high_level_flow-basic-flow.svg (100%) rename {design => misc/design}/high_level_flow-run-config-basic-flow.svg (100%) rename {design => misc/design}/high_level_flow-run-config.svg (100%) create mode 100644 misc/distribution/README.md rename {script => misc/docker-image}/run-data-caterer.sh (100%) rename insta-integration.yaml => misc/insta-integration/insta-integration.yaml (100%) delete mode 100644 run-docker.sh diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index bd1c9bc5..fbf76fe3 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -17,8 +17,8 @@ jobs: - name: Run integration tests id: tests uses: data-catering/insta-integration@v1 - env: - LOG_LEVEL: debug + with: + configuration_file: misc/insta-integration/insta-integration.yaml - name: Print results run: | echo "Records generated: ${{ steps.tests.outputs.num_records_generated }}" diff --git a/Dockerfile b/Dockerfile index c4c1cfda..d71502b0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ RUN addgroup -S app \ && apk add --no-cache libc6-compat bash \ && mkdir -p /opt/app /opt/DataCaterer/connection /opt/DataCaterer/plan /opt/DataCaterer/execution /opt/DataCaterer/report \ && chown -R app:app /opt/app /opt/DataCaterer/connection /opt/DataCaterer/plan /opt/DataCaterer/execution /opt/DataCaterer/report -COPY --chown=app:app script app/src/main/resources app/build/libs /opt/app/ +COPY --chown=app:app misc/docker-image app/src/main/resources app/build/libs /opt/app/ USER app WORKDIR /opt/app diff --git a/README.md b/README.md index 954497df..7c061778 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A test data management tool with automated data generation, validation and cleanup. -![Basic data flow for Data Caterer](design/high_level_flow-run-config-basic-flow.svg) +![Basic data flow for Data Caterer](misc/design/high_level_flow-run-config-basic-flow.svg) [Generate data](https://data.catering/setup/generator/data-generator/) for databases, files, messaging systems or HTTP requests via UI, Scala/Java SDK or YAML input and executed via Spark. Run @@ -34,21 +34,21 @@ and deep dive into issues [from the generated report](https://data.catering/samp - [Alerts to be notified of results](https://data.catering/setup/report/alert/) - [Run as GitHub Action](https://github.com/data-catering/insta-integration) -![Basic flow](design/basic_data_caterer_flow_medium.gif) +![Basic flow](misc/design/basic_data_caterer_flow_medium.gif) ## Quick start -1. [Mac download](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-mac.zip) -2. [Windows download](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-windows.zip) +1. [UI App: Mac download](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-mac.zip) +2. [UI App: Windows download](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-windows.zip) 1. After downloading, go to 'Downloads' folder and 'Extract All' from data-caterer-windows 2. Double-click 'DataCaterer-1.0.0' to install Data Caterer 3. Click on 'More info' then at the bottom, click 'Run anyway' 4. Go to '/Program Files/DataCaterer' folder and run DataCaterer application 5. If your browser doesn't open, go to [http://localhost:9898](http://localhost:9898) in your preferred browser -3. [Linux download](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-linux.zip) +3. [UI App: Linux download](https://nightly.link/data-catering/data-caterer/workflows/build/main/data-caterer-linux.zip) 4. Docker ```shell - docker run -d -i -p 9898:9898 -e DEPLOY_MODE=standalone --name datacaterer datacatering/data-caterer-basic:0.11.9 + docker run -d -i -p 9898:9898 -e DEPLOY_MODE=standalone --name datacaterer datacatering/data-caterer-basic:0.11.11 ``` [Open localhost:9898](http://localhost:9898). @@ -64,105 +64,61 @@ cd data-caterer-example && ./run.sh ### Supported data sources -Data Caterer supports the below data sources. Additional data sources can be added on a demand basis. [Check here for -the full roadmap](#roadmap). - -| Data Source Type | Data Source | Support | Free | -|------------------|------------------------------------|---------|------| -| Cloud Storage | AWS S3 | ✅ | ✅ | -| Cloud Storage | Azure Blob Storage | ✅ | ✅ | -| Cloud Storage | GCP Cloud Storage | ✅ | ✅ | -| Database | Cassandra | ✅ | ✅ | -| Database | MySQL | ✅ | ✅ | -| Database | Postgres | ✅ | ✅ | -| Database | Elasticsearch | ❌ | ✅ | -| Database | MongoDB | ❌ | ✅ | -| File | CSV | ✅ | ✅ | -| File | Delta Lake | ✅ | ✅ | -| File | JSON | ✅ | ✅ | -| File | Iceberg | ✅ | ✅ | -| File | ORC | ✅ | ✅ | -| File | Parquet | ✅ | ✅ | -| File | Hudi | ❌ | ✅ | -| HTTP | REST API | ✅ | ❌ | -| Messaging | Kafka | ✅ | ❌ | -| Messaging | Solace | ✅ | ❌ | -| Messaging | ActiveMQ | ❌ | ❌ | -| Messaging | Pulsar | ❌ | ❌ | -| Messaging | RabbitMQ | ❌ | ❌ | -| Metadata | Great Expectations | ✅ | ❌ | -| Metadata | Marquez | ✅ | ❌ | -| Metadata | OpenAPI/Swagger | ✅ | ❌ | -| Metadata | OpenMetadata | ✅ | ❌ | -| Metadata | Open Data Contract Standard (ODCS) | ✅ | ❌ | -| Metadata | Amundsen | ❌ | ❌ | -| Metadata | Datahub | ❌ | ❌ | -| Metadata | Data Contract CLI | ❌ | ❌ | -| Metadata | Solace Event Portal | ❌ | ❌ | - - -## Supported use cases - -1. Insert into single data sink -2. Insert into multiple data sinks - 1. Foreign keys associated between data sources - 2. Number of records per column value -3. Set random seed at column and whole data generation level -4. Generate real-looking data (via DataFaker) and edge cases - 1. Names, addresses, places etc. - 2. Edge cases for each data type (e.g. newline character in string, maximum integer, NaN, 0) - 3. Nullability -5. Send events progressively -6. Automatically insert data into data source - 1. Read metadata from data source and insert for all sub data sources (e.g. tables) - 2. Get statistics from existing data in data source if exists -7. Track and delete generated data -8. Extract data profiling and metadata from given data sources - 1. Calculate the total number of combinations -9. Validate data - 1. Basic column validations (not null, contains, equals, greater than) - 2. Aggregate validations (group by account_id and sum amounts should be less than 100, each account should have at - least one transaction) - 3. Upstream data source validations (generate data and then check same data is inserted in another data source with - potential transformations) - 4. Column name validations (check count and ordering of column names) -10. Data migration validations - 1. Ensure row counts are equal - 2. Check both data sources have same values for key columns +Data Caterer supports the below data sources. [Check here for the full roadmap](#roadmap). + +| Data Source Type | Data Source | Support | +|------------------|------------------------------------|---------| +| Cloud Storage | AWS S3 | ✅ | +| Cloud Storage | Azure Blob Storage | ✅ | +| Cloud Storage | GCP Cloud Storage | ✅ | +| Database | Cassandra | ✅ | +| Database | MySQL | ✅ | +| Database | Postgres | ✅ | +| Database | Elasticsearch | ❌ | +| Database | MongoDB | ❌ | +| File | CSV | ✅ | +| File | Delta Lake | ✅ | +| File | JSON | ✅ | +| File | Iceberg | ✅ | +| File | ORC | ✅ | +| File | Parquet | ✅ | +| File | Hudi | ❌ | +| HTTP | REST API | ✅ | +| Messaging | Kafka | ✅ | +| Messaging | Solace | ✅ | +| Messaging | ActiveMQ | ❌ | +| Messaging | Pulsar | ❌ | +| Messaging | RabbitMQ | ❌ | +| Metadata | Data Contract CLI | ✅ | +| Metadata | Great Expectations | ✅ | +| Metadata | Marquez | ✅ | +| Metadata | OpenAPI/Swagger | ✅ | +| Metadata | OpenMetadata | ✅ | +| Metadata | Open Data Contract Standard (ODCS) | ✅ | +| Metadata | Amundsen | ❌ | +| Metadata | Datahub | ❌ | +| Metadata | Solace Event Portal | ❌ | -## Run Configurations - -Different ways to run Data Caterer based on your use case: - -![Types of run configurations](design/high_level_flow-run-config.svg) ## Sponsorship -Data Caterer is set up under a sponsorware model where all features are available to sponsors. The core features -are available here in this project for all to use/fork/update/improve etc., as the open core. - -Sponsors have access to the following features: - -- All data sources (see [here for all data sources](https://data.catering/setup/connection/)) -- Batch and Event generation -- [Auto generation from data connections or metadata sources](https://data.catering/setup/guide/scenario/auto-generate-connection/) -- Suggest data validations -- [Clean up generated and consumed data](https://data.catering/setup/guide/scenario/delete-generated-data/) -- Run as many times as you want, not charged by usage -- Metadata discovery -- [Plus more to come](#roadmap) +Data Caterer is set up under a sponsorship model. If you require support or additional features from Data Caterer +as an enterprise, you are required to be a sponsor for the project. [Find out more details here to help with sponsorship.](https://data.catering/sponsor) -This is inspired by the [mkdocs-material project](https://github.com/squidfunk/mkdocs-material) which -[follows the same model](https://squidfunk.github.io/mkdocs-material/insiders/). - ## Contributing -[View details here about how you can contribute to the project.](CONTRIBUTING.md) +[View details here about how you can contribute to the project.](misc/CONTRIBUTING.md) ## Additional Details +## Run Configurations + +Different ways to run Data Caterer based on your use case: + +![Types of run configurations](misc/design/high_level_flow-run-config.svg) + ### Design [Design motivations and details can be found here.](https://data.catering/setup/design) @@ -170,41 +126,3 @@ This is inspired by the [mkdocs-material project](https://github.com/squidfunk/m ### Roadmap [Can check here for full list.](https://data.catering/use-case/roadmap/) - -#### UI - -1. Allow the application to run with UI enabled -2. Runs as a long-lived app with UI that interacts with the existing app as a single container -3. Ability to run as UI, Spark job or both -4. Persist data in files or database (Postgres) -5. UI will show the history of data generation/validation runs, delete generated data, create new scenarios, define data connections - -#### Distribution - -##### Docker - -```shell -gradle clean :api:shadowJar :app:shadowJar -docker build --build-arg "APP_VERSION=0.7.0" --build-arg "SPARK_VERSION=3.5.0" --no-cache -t datacatering/data-caterer:0.7.0 . -docker run -d -i -p 9898:9898 -e DEPLOY_MODE=standalone -v data-caterer-data:/opt/data-caterer --name datacaterer datacatering/data-caterer:0.7.0 -#open localhost:9898 -``` - -##### Jpackage - -```bash -JPACKAGE_BUILD=true gradle clean :api:shadowJar :app:shadowJar -# Mac -jpackage "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-mac.cfg" -# Windows -jpackage "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-windows.cfg" -# Linux -jpackage "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-linux.cfg" -``` - -##### Java 17 VM Options - -```shell ---add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -``` --Dlog4j.configurationFile=classpath:log4j2.properties diff --git a/local-docker-build.sh b/local-docker-build.sh deleted file mode 100644 index 1bb7a203..00000000 --- a/local-docker-build.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash - -version=$(grep version gradle.properties | cut -d= -f2) -sparkVersion=$(grep sparkVersion gradle.properties | cut -d= -f2) - -echo "Creating API jar" -gradle clean :api:shadowJar - -echo "Creating data caterer jar, version=$version" -gradle build shadowJar -x test -build_app=$? -if [[ "$build_app" -ne 0 ]] ; then - echo "Failed to build app, exiting" - exit 1 -fi - -docker build \ - --build-arg "APP_VERSION=$version" \ - --build-arg "SPARK_VERSION=$sparkVersion" \ - -t datacatering/data-caterer-basic:$version . diff --git a/CONTRIBUTING.md b/misc/CONTRIBUTING.md similarity index 100% rename from CONTRIBUTING.md rename to misc/CONTRIBUTING.md diff --git a/docker-compose.yaml b/misc/compose/docker-compose.yaml similarity index 100% rename from docker-compose.yaml rename to misc/compose/docker-compose.yaml diff --git a/design/basic_data_caterer_flow_medium.gif b/misc/design/basic_data_caterer_flow_medium.gif similarity index 100% rename from design/basic_data_caterer_flow_medium.gif rename to misc/design/basic_data_caterer_flow_medium.gif diff --git a/design/high-level-design.png b/misc/design/high-level-design.png similarity index 100% rename from design/high-level-design.png rename to misc/design/high-level-design.png diff --git a/design/high_level_flow-basic-flow.svg b/misc/design/high_level_flow-basic-flow.svg similarity index 100% rename from design/high_level_flow-basic-flow.svg rename to misc/design/high_level_flow-basic-flow.svg diff --git a/design/high_level_flow-run-config-basic-flow.svg b/misc/design/high_level_flow-run-config-basic-flow.svg similarity index 100% rename from design/high_level_flow-run-config-basic-flow.svg rename to misc/design/high_level_flow-run-config-basic-flow.svg diff --git a/design/high_level_flow-run-config.svg b/misc/design/high_level_flow-run-config.svg similarity index 100% rename from design/high_level_flow-run-config.svg rename to misc/design/high_level_flow-run-config.svg diff --git a/misc/distribution/README.md b/misc/distribution/README.md new file mode 100644 index 00000000..390c71f4 --- /dev/null +++ b/misc/distribution/README.md @@ -0,0 +1,29 @@ +#### Distribution + +##### Docker + +```shell +gradle clean :api:shadowJar :app:shadowJar +docker build --build-arg "APP_VERSION=0.7.0" --build-arg "SPARK_VERSION=3.5.0" --no-cache -t datacatering/data-caterer:0.7.0 . +docker run -d -i -p 9898:9898 -e DEPLOY_MODE=standalone -v data-caterer-data:/opt/data-caterer --name datacaterer datacatering/data-caterer:0.7.0 +#open localhost:9898 +``` + +##### Jpackage + +```bash +JPACKAGE_BUILD=true gradle clean :api:shadowJar :app:shadowJar +# Mac +jpackage "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-mac.cfg" +# Windows +jpackage "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-windows.cfg" +# Linux +jpackage "@misc/jpackage/jpackage.cfg" "@misc/jpackage/jpackage-linux.cfg" +``` + +##### Java 17 VM Options + +```shell +--add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED +``` +-Dlog4j.configurationFile=classpath:log4j2.properties diff --git a/script/run-data-caterer.sh b/misc/docker-image/run-data-caterer.sh similarity index 100% rename from script/run-data-caterer.sh rename to misc/docker-image/run-data-caterer.sh diff --git a/insta-integration.yaml b/misc/insta-integration/insta-integration.yaml similarity index 100% rename from insta-integration.yaml rename to misc/insta-integration/insta-integration.yaml diff --git a/run-docker.sh b/run-docker.sh deleted file mode 100644 index 689e4ea7..00000000 --- a/run-docker.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -echo "======================" -echo "Try to run basic image" -echo "======================" -docker run -e ENABLE_RECORD_TRACKING=true -v /tmp/datagen:/opt/app/data-caterer datacatering/data-caterer-basic:0.1 From 2f8c2aa4d6a4c1c7b4654ce744d02ff5f3be42d3 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Wed, 16 Oct 2024 17:43:39 +0800 Subject: [PATCH 2/5] Fix directory for insta-integration, add in mildly quick start examples --- README.md | 53 +++++++++++++++++++ misc/insta-integration/insta-integration.yaml | 10 ++-- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7c061778..59a51076 100644 --- a/README.md +++ b/README.md @@ -126,3 +126,56 @@ Different ways to run Data Caterer based on your use case: ### Roadmap [Can check here for full list.](https://data.catering/use-case/roadmap/) + +### Mildly Quick Start + +#### I want to generate data in Postgres + +```scala +postgres("customer_postgres", "jdbc:postgresql://localhost:5432/customer") //name and url +``` + +#### But I want `account_id` to follow a pattern + +```scala +postgres("customer_postgres", "jdbc:postgresql://localhost:5432/customer") + .schema(field.name("account_id").regex("ACC[0-9]{10}")) +``` + +#### I also want to generate events + +```scala +kafka("my_kafka", "localhost:29092") + .topic("account-topic") + .schema(...) +``` + +#### But I want the same `account_id` to show in Postgres and Kafka + +```scala +val kafkaTask = kafka("my_kafka", "localhost:29092") + .topic("account-topic") + .schema(...) + +val postgresTask = postgres("customer_postgres", "jdbc:postgresql://localhost:5432/customer") + .schema(field.name("account_id").regex("ACC[0-9]{10}")) + +plan.addForeignKeyRelationship( + kafkaTask, List("account_id"), + List(postgresTask -> List("account_id")) +) +``` + +#### I want to generate 5 transactions per `account_id` + +```scala +postgres("customer_postgres", "jdbc:postgresql://localhost:5432/customer") + .count(count.recordsPerColumn(5, "account_id")) +``` + +#### But I want to generate 0 to 5 transactions per `account_id` + +```scala +postgres("customer_postgres", "jdbc:postgresql://localhost:5432/customer") + .count(count.recordsPerColumnGenerator(generator.min(0).max(5), "account_id")) +``` \ No newline at end of file diff --git a/misc/insta-integration/insta-integration.yaml b/misc/insta-integration/insta-integration.yaml index 84038e61..a9d03053 100644 --- a/misc/insta-integration/insta-integration.yaml +++ b/misc/insta-integration/insta-integration.yaml @@ -1,12 +1,12 @@ services: - name: postgres - data: app/src/test/resources/sample/sql/postgres + data: ../../app/src/test/resources/sample/sql/postgres run: - - command: java -jar app/build/libs/data-caterer.jar + - command: java -jar ../../app/build/libs/data-caterer.jar env: - PLAN_FILE_PATH: app/src/test/resources/sample/plan/account-balance-transaction-plan.yaml - TASK_FOLDER_PATH: app/src/test/resources/sample/task - APPLICATION_CONFIG_PATH: app/src/main/resources/application.conf + PLAN_FILE_PATH: ../../app/src/test/resources/sample/plan/account-balance-transaction-plan.yaml + TASK_FOLDER_PATH: ../../app/src/test/resources/sample/task + APPLICATION_CONFIG_PATH: ../../app/src/main/resources/application.conf generateFirst: false test: validation: From 9aac4f39a13f3182fbb8e4a6e6b83900b61cee9b Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Wed, 16 Oct 2024 17:47:16 +0800 Subject: [PATCH 3/5] Move insta-ingration.yaml back to top level, use version 0.11.11 --- .github/workflows/check.yml | 2 +- README.md | 10 +++++----- .../insta-integration.yaml => insta-integration.yaml | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) rename misc/insta-integration/insta-integration.yaml => insta-integration.yaml (63%) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index fbf76fe3..6930ad9e 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -18,7 +18,7 @@ jobs: id: tests uses: data-catering/insta-integration@v1 with: - configuration_file: misc/insta-integration/insta-integration.yaml + data_caterer_version: 0.11.11 - name: Print results run: | echo "Records generated: ${{ steps.tests.outputs.num_records_generated }}" diff --git a/README.md b/README.md index 59a51076..94e2df70 100644 --- a/README.md +++ b/README.md @@ -153,16 +153,16 @@ kafka("my_kafka", "localhost:29092") #### But I want the same `account_id` to show in Postgres and Kafka ```scala +val postgresTask = postgres("customer_postgres", "jdbc:postgresql://localhost:5432/customer") + .schema(field.name("account_id").regex("ACC[0-9]{10}")) + val kafkaTask = kafka("my_kafka", "localhost:29092") .topic("account-topic") .schema(...) -val postgresTask = postgres("customer_postgres", "jdbc:postgresql://localhost:5432/customer") - .schema(field.name("account_id").regex("ACC[0-9]{10}")) - plan.addForeignKeyRelationship( - kafkaTask, List("account_id"), - List(postgresTask -> List("account_id")) + postgresTask, List("account_id"), + List(kafkaTask -> List("account_id")) ) ``` diff --git a/misc/insta-integration/insta-integration.yaml b/insta-integration.yaml similarity index 63% rename from misc/insta-integration/insta-integration.yaml rename to insta-integration.yaml index a9d03053..84038e61 100644 --- a/misc/insta-integration/insta-integration.yaml +++ b/insta-integration.yaml @@ -1,12 +1,12 @@ services: - name: postgres - data: ../../app/src/test/resources/sample/sql/postgres + data: app/src/test/resources/sample/sql/postgres run: - - command: java -jar ../../app/build/libs/data-caterer.jar + - command: java -jar app/build/libs/data-caterer.jar env: - PLAN_FILE_PATH: ../../app/src/test/resources/sample/plan/account-balance-transaction-plan.yaml - TASK_FOLDER_PATH: ../../app/src/test/resources/sample/task - APPLICATION_CONFIG_PATH: ../../app/src/main/resources/application.conf + PLAN_FILE_PATH: app/src/test/resources/sample/plan/account-balance-transaction-plan.yaml + TASK_FOLDER_PATH: app/src/test/resources/sample/task + APPLICATION_CONFIG_PATH: app/src/main/resources/application.conf generateFirst: false test: validation: From 3b9914cf8c2c0731ef1ce1f2767fa99b7e39dd71 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Wed, 16 Oct 2024 17:50:11 +0800 Subject: [PATCH 4/5] Add in debug logs --- .github/workflows/check.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 6930ad9e..3e4518e7 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -17,6 +17,8 @@ jobs: - name: Run integration tests id: tests uses: data-catering/insta-integration@v1 + env: + LOG_LEVEL: debug with: data_caterer_version: 0.11.11 - name: Print results From 867fcb31320173319ca4353a26d404ba1c9d82e2 Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Wed, 16 Oct 2024 17:53:37 +0800 Subject: [PATCH 5/5] Add in debug logs --- .github/workflows/check.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 3e4518e7..bd1c9bc5 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -19,8 +19,6 @@ jobs: uses: data-catering/insta-integration@v1 env: LOG_LEVEL: debug - with: - data_caterer_version: 0.11.11 - name: Print results run: | echo "Records generated: ${{ steps.tests.outputs.num_records_generated }}"