-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Gitlab->Github phase 2 - extract_load (pre-merge)
- Loading branch information
Showing
10 changed files
with
271 additions
and
88 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
ELT_ENVIRONMENT="gdp_dev" | ||
DB_NAME="GDP_DEV" | ||
GOODDATA_ENVIRONMENT_ID="development" | ||
DB_USER="gdp_dev" | ||
DB_WAREHOUSE="${SNOWFLAKE_WAREHOUSE}" | ||
DB_ACCOUNT="${SNOWFLAKE_ACCOUNT}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
DBT_PROFILES_DIR="profile" | ||
MELTANO_TARGET="target-snowflake" | ||
DBT_TARGET="snowflake" | ||
GOODDATA_PROFILES="demo_cicd" | ||
# TODO - uncomment once the GitHub pipeline is fully ready | ||
# GOODDATA_PROFILES="demo_cicd closed_beta closed_beta_se labs_se" | ||
# Snowflake objects are upper-case by default. We use Snowflake in most jobs. | ||
GOODDATA_UPPER_CASE="--gooddata-upper-case" | ||
SNOWFLAKE_ACCOUNT="gooddata" | ||
SNOWFLAKE_WAREHOUSE="DEMO_WH" | ||
VERTICA_HOST="140.236.88.151" | ||
VERTICA_PORT="5433" | ||
VERTICA_USER="gooddata" | ||
VERTICA_DBNAME="PartPub80DB" | ||
OUTPUT_SCHEMA="cicd_output_stage" | ||
MELTANO_STATE_AWS_BUCKET="jacek-blueprint-data-pipeline" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
name: Extract, Load, Transform, and Analytics (Dev) | ||
# TODO - add analytics | ||
|
||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
paths: | ||
# TODO - cannot define paths for extract_load and transform separately, | ||
# - because I need to define INPUT_SCHEMA_* variables only once | ||
# - they must be inputs because env vars are not evaluated in matrix(used in the reusable workflow) | ||
- .github/workflows/elta_dev.yml | ||
# Extract load | ||
- data_pipeline/meltano.yml | ||
- data_pipeline/meltano_conf/**/* | ||
- data_pipeline/requirements-meltano.txt | ||
- .github/workflows/reusable_extract_load.yml | ||
- .github/variables/elta_dev.yml | ||
- .github/variables/elta_shared.yml | ||
# Transform | ||
- data_pipeline/macros/**/* | ||
- data_pipeline/models/**/* | ||
- data_pipeline/profile/**/* | ||
- data_pipeline/dbt_project.yml | ||
- data_pipeline/packages.yml | ||
- data_pipeline/requirements-dbt.txt | ||
- data_pipeline/requirements-gooddata.txt | ||
- .github/workflows/reusable_transform.yml | ||
|
||
env: | ||
INPUT_SCHEMA_FAA: "faa_input_stage" | ||
INPUT_SCHEMA_GITHUB: "github_input_stage" | ||
INPUT_SCHEMA_EXCHANGERATEHOST: "exchangeratehost_input_stage" | ||
INPUT_SCHEMA_ECOMMERCE_DEMO: "ecommerce_demo_input_stage" | ||
INPUT_SCHEMA_DATA_SCIENCE: "data_science_input_stage" | ||
|
||
jobs: | ||
extract-load-dev: | ||
uses: ./.github/workflows/reusable_extract_load.yml | ||
with: | ||
ENV_FILE: ./.github/variables/elta_dev.env | ||
INPUT_SCHEMA_FAA: ${{ env.INPUT_SCHEMA_FAA }} | ||
INPUT_SCHEMA_GITHUB: ${{ env.INPUT_SCHEMA_GITHUB }} | ||
INPUT_SCHEMA_EXCHANGERATEHOST: ${{ env.INPUT_SCHEMA_EXCHANGERATEHOST }} | ||
INPUT_SCHEMA_ECOMMERCE_DEMO: ${{ env.INPUT_SCHEMA_ECOMMERCE_DEMO }} | ||
INPUT_SCHEMA_DATA_SCIENCE: ${{ env.INPUT_SCHEMA_DATA_SCIENCE }} | ||
secrets: inherit | ||
transform-dev: | ||
needs: extract-load-dev | ||
uses: ./.github/workflows/reusable_transform.yml | ||
with: | ||
ENV_FILE: ./.github/variables/elta_dev.env | ||
INPUT_SCHEMA_FAA: ${{ env.INPUT_SCHEMA_FAA }} | ||
INPUT_SCHEMA_GITHUB: ${{ env.INPUT_SCHEMA_GITHUB }} | ||
INPUT_SCHEMA_EXCHANGERATEHOST: ${{ env.INPUT_SCHEMA_EXCHANGERATEHOST }} | ||
INPUT_SCHEMA_ECOMMERCE_DEMO: ${{ env.INPUT_SCHEMA_ECOMMERCE_DEMO }} | ||
INPUT_SCHEMA_DATA_SCIENCE: ${{ env.INPUT_SCHEMA_DATA_SCIENCE }} | ||
secrets: inherit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
on: | ||
workflow_call: | ||
inputs: | ||
ENV_FILE: | ||
required: true | ||
type: string | ||
INPUT_SCHEMA_FAA: | ||
required: true | ||
type: string | ||
INPUT_SCHEMA_GITHUB: | ||
required: true | ||
type: string | ||
INPUT_SCHEMA_EXCHANGERATEHOST: | ||
required: true | ||
type: string | ||
INPUT_SCHEMA_ECOMMERCE_DEMO: | ||
required: true | ||
type: string | ||
INPUT_SCHEMA_DATA_SCIENCE: | ||
required: true | ||
type: string | ||
|
||
jobs: | ||
reusable_extract_load: | ||
strategy: | ||
matrix: | ||
MELTANO_SOURCE: [tap-github-repo, tap-github-org, tap-s3-csv-faa, tap-s3-csv-ecommerce-demo, tap-s3-csv-data-science] | ||
include: | ||
- MELTANO_SOURCE: tap-github-repo | ||
TARGET_SCHEMA: "${{ inputs.INPUT_SCHEMA_GITHUB }}" | ||
- MELTANO_SOURCE: tap-github-org | ||
TARGET_SCHEMA: "${{ inputs.INPUT_SCHEMA_GITHUB }}" | ||
- MELTANO_SOURCE: tap-s3-csv-faa | ||
TARGET_SCHEMA: "${{ inputs.INPUT_SCHEMA_FAA }}" | ||
- MELTANO_SOURCE: tap-s3-csv-ecommerce-demo | ||
TARGET_SCHEMA: "${{ inputs.INPUT_SCHEMA_ECOMMERCE_DEMO }}" | ||
- MELTANO_SOURCE: tap-s3-csv-data-science | ||
TARGET_SCHEMA: "${{ inputs.INPUT_SCHEMA_DATA_SCIENCE }}" | ||
# TODO - uncomment once https://github.com/anelendata/tap-exchangeratehost/issues/3 is fixed | ||
# - MELTANO_SOURCE: tap-exchangeratehost | ||
# TARGET_SCHEMA: "${env.INPUT_SCHEMA_EXCHANGERATEHOST}" | ||
env: | ||
GIT_DEPTH: "0" | ||
runs-on: ubuntu-latest | ||
environment: ${{ github.ref == 'refs/heads/prod' && 'prod' || github.ref == 'refs/heads/master' && 'staging' || 'dev' }} | ||
steps: | ||
- name: Checkout Repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set Environment Variables | ||
uses: ./.github/actions/setvars | ||
with: | ||
varFilePath: "./.github/variables/elta_shared.env ${{ inputs.ENV_FILE }}" | ||
|
||
- name: Setup Environment | ||
run: | | ||
ln -s ${{ env.IMAGES_WORKDIR }}/.meltano .meltano | ||
- name: Run Extract and Load | ||
timeout-minutes: 15 | ||
env: | ||
FR_ARG: ${{ "${{ env.FULL_REFRESH }}" == "true" && "--full-refresh" || "" }} | ||
TARGET_SCHEMA: "${{ matrix.TARGET_SCHEMA }}" | ||
DB_PASS: "${{ secrets.DB_PASS }}" | ||
MELTANO_STATE_AWS_ACCESS_KEY_ID: "${{ secrets.MELTANO_STATE_AWS_ACCESS_KEY_ID }}" | ||
MELTANO_STATE_AWS_SECRET_ACCESS_KEY: "${{ secrets.MELTANO_STATE_AWS_SECRET_ACCESS_KEY }}" | ||
TAP_GITHUB_AUTH_TOKEN: "${{ secrets.TAP_GITHUB_AUTH_TOKEN }}" | ||
# TODO - move it to deploy_analytics workflow | ||
# GOODDATA_PROFILES_FILE: "${{ secrets.GOODDATA_PROFILES_FILE }}" | ||
run: | | ||
# TODO - move it to deploy_analytics workflow | ||
# echo $GOODDATA_PROFILES_FILE | base64 --decode > ~/.gooddata/profiles.yaml | ||
meltano --environment ${{ env.ELT_ENVIRONMENT }} run ${{ matrix.MELTANO_SOURCE }} ${{ env.MELTANO_TARGET }} ${{ env.FR_ARG }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
on: | ||
workflow_call: | ||
inputs: | ||
ENV_FILE: | ||
required: true | ||
type: string | ||
INPUT_SCHEMA_FAA: | ||
required: true | ||
type: string | ||
INPUT_SCHEMA_GITHUB: | ||
required: true | ||
type: string | ||
INPUT_SCHEMA_EXCHANGERATEHOST: | ||
required: true | ||
type: string | ||
INPUT_SCHEMA_ECOMMERCE_DEMO: | ||
required: true | ||
type: string | ||
INPUT_SCHEMA_DATA_SCIENCE: | ||
required: true | ||
type: string | ||
|
||
jobs: | ||
reusable_transform: | ||
needs: reusable_extract_load | ||
env: | ||
GIT_DEPTH: "0" | ||
runs-on: ubuntu-latest | ||
environment: ${{ github.ref == 'refs/heads/prod' && 'prod' || github.ref == 'refs/heads/master' && 'staging' || 'dev' }} | ||
steps: | ||
- name: Checkout Repository | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set Environment Variables | ||
uses: ./.github/actions/setvars | ||
with: | ||
varFilePath: "./.github/variables/elta_shared.env ${{ inputs.ENV_FILE }}" | ||
|
||
- name: Setup Environment | ||
env: | ||
GOODDATA_PROFILES_FILE: "${{ secrets.GOODDATA_PROFILES_FILE }}" | ||
run: | | ||
mkdir -p ~/.gooddata | ||
cp $GOODDATA_PROFILES_FILE ~/.gooddata/profiles.yaml | ||
cd $SRC_DATA_PIPELINE | ||
# dbt packages are installed during build of docker image to workdir | ||
ln -s ${IMAGES_WORKDIR}/dbt_packages dbt_packages | ||
echo $GOODDATA_PROFILES_FILE | base64 --decode > ~/.gooddata/profiles.yaml | ||
- name: Run Transform | ||
timeout-minutes: 15 | ||
env: | ||
FR_ARG: ${{ "${{ env.FULL_REFRESH }}" == "true" && "--full-refresh" || "" }} | ||
# dbt cloud insist on env variables must contain DBT_ prefix. We have to duplicate them here. | ||
# dbt profiles.yml file in this repo relies on DBT_ prefix. | ||
# It means that even jobs not running against dbt cloud rely on DBT_ prefix. | ||
# More variables are duplicated later in this file based on what database is used. | ||
DBT_OUTPUT_SCHEMA: "${{ env.OUTPUT_SCHEMA }}" | ||
DBT_INPUT_SCHEMA_GITHUB: "${{ inputs.INPUT_SCHEMA_GITHUB }}" | ||
DBT_INPUT_SCHEMA_FAA: "${{ inputs.INPUT_SCHEMA_FAA }}" | ||
DBT_INPUT_SCHEMA_EXCHANGERATEHOST: "${{ inputs.INPUT_SCHEMA_EXCHANGERATEHOST }}" | ||
DBT_INPUT_SCHEMA_ECOMMERCE_DEMO: "${{ inputs.INPUT_SCHEMA_ECOMMERCE_DEMO }}" | ||
DBT_INPUT_SCHEMA_DATA_SCIENCE: "${{ inputs.INPUT_SCHEMA_DATA_SCIENCE }}" | ||
DBT_DB_USER: "${{ env.DB_USER }}" | ||
DBT_DB_WAREHOUSE: "${{ env.DB_WAREHOUSE }}" | ||
DBT_DB_ACCOUNT: "${{ env.DB_ACCOUNT }}" | ||
DBT_DB_HOST: "${{ env.DB_HOST }}" | ||
DBT_DB_PORT: "${{ env.DB_PORT }}" | ||
DBT_DB_NAME: "${{ env.DB_NAME }}" | ||
DBT_DB_PASS: "${{ secrets.DB_PASS }}" | ||
# TODO - move it to separate job dedicated to dbt Cloud | ||
# Notify by sending comment to the merge request, | ||
# if duration of a dbt model exceeds average duration from last X runs by DBT_ALLOWED_DEGRADATION percents | ||
DBT_ALLOWED_DEGRADATION: 20 | ||
DBT_INCREMENTAL_STRATEGY: "merge" | ||
run: | | ||
dbt run --profiles-dir $DBT_PROFILES_DIR --profile $ELT_ENVIRONMENT --target $DBT_TARGET $FR_ARG | ||
dbt test --profiles-dir $DBT_PROFILES_DIR --profile $ELT_ENVIRONMENT --target $DBT_TARGET | ||
gooddata-dbt provision_workspaces | ||
gooddata-dbt register_data_sources $GOODDATA_UPPER_CASE --profile $ELT_ENVIRONMENT --target $DBT_TARGET | ||
gooddata-dbt deploy_ldm $GOODDATA_UPPER_CASE --profile $ELT_ENVIRONMENT --target $DBT_TARGET | ||
# Invalidates GoodData caches | ||
gooddata-dbt upload_notification --profile $ELT_ENVIRONMENT --target $DBT_TARGET |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.