Skip to content

Commit

Permalink
iter: Gitlab->Github phase 2 - extract_load, transform (pre-merge)
Browse files Browse the repository at this point in the history
  • Loading branch information
jaceksan committed Jan 4, 2024
1 parent 4c0a81c commit 0fbc649
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 41 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ on:

jobs:
reusable_transform:
environment: ${{ inputs.ENVIRONMENT }}
env:
GIT_DEPTH: "0"
runs-on: ubuntu-latest
environment: ${{ github.ref == 'refs/heads/prod' && 'prod' || github.ref == 'refs/heads/master' && 'staging' || 'dev' }}
container: ghcr.io/$GITHUB_REPOSITORY/${{ env.DBT_CUSTOM_IMAGE }}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
Expand Down
70 changes: 43 additions & 27 deletions .github/workflows/elta_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,42 +12,58 @@ on:
# Shared
- .github/workflows/elta_dev.yml
- .github/variables/elta_shared.yml
# Build custom Meltano image
- data_pipeline/requirements-meltano.txt
- data_pipeline/meltano-plugins.yml
- data_pipeline/plugins/**/*.lock
- Dockerfile_meltano
# Extract load
- data_pipeline/meltano.yml
- data_pipeline/meltano_conf/**/*
- data_pipeline/requirements-meltano.txt
- .github/workflows/reusable_extract_load.yml
# Transform
- data_pipeline/macros/**/*
- data_pipeline/models/**/*
- data_pipeline/profile/**/*
- data_pipeline/dbt_project.yml
- data_pipeline/packages.yml
- data_pipeline/requirements-dbt.txt
- data_pipeline/requirements-gooddata.txt
- .github/workflows/reusable_transform.yml
# - data_pipeline/macros/**/*
# - data_pipeline/models/**/*
# - data_pipeline/profile/**/*
# - data_pipeline/dbt_project.yml
# - data_pipeline/packages.yml
# - data_pipeline/requirements-dbt.txt
# - data_pipeline/requirements-gooddata.txt
# - .github/workflows/reusable_transform.yml

jobs:
# build-and-push-custom-meltano-image:
# uses: ./.github/workflows/reusable_build.yml
# with:
# TAGS: |
# ghcr.io/${{ github.repository }}/${{ vars.MELTANO_CUSTOM_IMAGE_BASE }}:${{ vars.MELTANO_VERSION }}
# DOCKER_FILE: Dockerfile_meltano
# BUILD_ARGS: |
# MELTANO_VERSION=${{ vars.MELTANO_VERSION }}
# IMAGES_WORKDIR=${{ vars.IMAGES_WORKDIR }}
# secrets: inherit
extract-load-dev:
# needs: build-and-push-custom-meltano-image
uses: ./.github/workflows/reusable_extract_load.yml
with:
ENV_FILE: ./.github/variables/elta_dev.env
# TODO - hardcoded in multiple places. They have to be passed as inputs because matrix cannot evaluate env vars
INPUT_SCHEMA_FAA: "faa_input_stage"
INPUT_SCHEMA_GITHUB: "github_input_stage"
INPUT_SCHEMA_EXCHANGERATEHOST: "exchangeratehost_input_stage"
INPUT_SCHEMA_ECOMMERCE_DEMO: "ecommerce_demo_input_stage"
INPUT_SCHEMA_DATA_SCIENCE: "data_science_input_stage"
secrets: inherit
transform-dev:
needs: extract-load-dev
uses: ./.github/workflows/reusable_transform.yml
with:
ENV_FILE: ./.github/variables/elta_dev.env
# TODO - hardcoded in multiple places. They have to be passed as inputs because matrix cannot evaluate env vars
INPUT_SCHEMA_FAA: "faa_input_stage"
INPUT_SCHEMA_GITHUB: "github_input_stage"
INPUT_SCHEMA_EXCHANGERATEHOST: "exchangeratehost_input_stage"
INPUT_SCHEMA_ECOMMERCE_DEMO: "ecommerce_demo_input_stage"
INPUT_SCHEMA_DATA_SCIENCE: "data_science_input_stage"
INPUT_SCHEMA_FAA: "${{ vars.INPUT_SCHEMA_FAA }}"
INPUT_SCHEMA_GITHUB: "${{ vars.INPUT_SCHEMA_GITHUB }}"
INPUT_SCHEMA_EXCHANGERATEHOST: "${{ vars.INPUT_SCHEMA_EXCHANGERATEHOST }}"
INPUT_SCHEMA_ECOMMERCE_DEMO: "${{ vars.INPUT_SCHEMA_ECOMMERCE_DEMO }}"
INPUT_SCHEMA_DATA_SCIENCE: "${{ vars.INPUT_SCHEMA_DATA_SCIENCE }}"
ENVIRONMENT: "dev"
FULL_REFRESH: "false" # TODO: define workflow_dispatch with this parameter and set it here
MELTANO_CUSTOM_IMAGE: "ghcr.io/${{ github.repository }}/${{ vars.MELTANO_CUSTOM_IMAGE_BASE }}:${{ vars.MELTANO_VERSION }}"
secrets: inherit
# transform-dev:
# needs: extract-load-dev
# uses: ./.github/workflows/reusable_transform.yml
# with:
# INPUT_SCHEMA_FAA: "${{ vars.INPUT_SCHEMA_FAA }}"
# INPUT_SCHEMA_GITHUB: "${{ vars.INPUT_SCHEMA_GITHUB }}"
# INPUT_SCHEMA_EXCHANGERATEHOST: "${{ vars.INPUT_SCHEMA_EXCHANGERATEHOST }}"
# INPUT_SCHEMA_ECOMMERCE_DEMO: "${{ vars.INPUT_SCHEMA_ECOMMERCE_DEMO }}"
# INPUT_SCHEMA_DATA_SCIENCE: "${{ vars.INPUT_SCHEMA_DATA_SCIENCE }}"
# ENVIRONMENT: "dev"
# secrets: inherit
37 changes: 37 additions & 0 deletions .github/workflows/reusable_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
on:
workflow_call:
inputs:
TAGS:
required: true
type: string
DOCKER_FILE:
required: true
type: string
BUILD_ARGS:
type: string

jobs:
reusable_build:
env:
GIT_DEPTH: "0"
runs-on: ubuntu-latest
environment: ${{ github.ref == 'refs/heads/prod' && 'prod' || github.ref == 'refs/heads/master' && 'staging' || 'dev' }}
steps:
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
-
name: Build and push
uses: docker/build-push-action@v5
with:
push: true
tags: ${{ inputs.TAGS }}
file: ${{ inputs.DOCKER_FILE }}
build-args: ${{ inputs.BUILD_ARGS }}
31 changes: 18 additions & 13 deletions .github/workflows/reusable_extract_load.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
on:
workflow_call:
inputs:
ENV_FILE:
ENVIRONMENT:
required: true
type: string
INPUT_SCHEMA_FAA:
Expand All @@ -19,9 +19,16 @@ on:
INPUT_SCHEMA_DATA_SCIENCE:
required: true
type: string
FULL_REFRESH:
required: true
type: string
MELTANO_CUSTOM_IMAGE:
required: true
type: string

jobs:
reusable_extract_load:
environment: ${{ inputs.ENVIRONMENT }}
strategy:
matrix:
MELTANO_SOURCE: [tap-github-repo, tap-github-org, tap-s3-csv-faa, tap-s3-csv-ecommerce-demo, tap-s3-csv-data-science]
Expand All @@ -42,32 +49,30 @@ jobs:
env:
GIT_DEPTH: "0"
runs-on: ubuntu-latest
environment: ${{ github.ref == 'refs/heads/prod' && 'prod' || github.ref == 'refs/heads/master' && 'staging' || 'dev' }}
container: ${{ inputs.MELTANO_CUSTOM_IMAGE }}
steps:
- name: Checkout Repository
uses: actions/checkout@v4

- name: Set Environment Variables
uses: ./.github/actions/setvars
with:
varFilePath: "./.github/variables/elta_shared.env ${{ inputs.ENV_FILE }}"
#- name: Set Environment Variables
# uses: ./.github/actions/setvars
# with:
# varFilePath: "./.github/variables/elta_shared.env ${{ inputs.ENV_FILE }}"

- name: Setup Environment
run: |
ln -s ${{ env.IMAGES_WORKDIR }}/.meltano .meltano
cd ${{ vars.SRC_DATA_PIPELINE }}
ln -s ${{ vars.IMAGES_WORKDIR }}/.meltano .meltano
- name: Run Extract and Load
timeout-minutes: 15
env:
FR_ARG: ${{ env.FULL_REFRESH == 'true' && '--full-refresh' || '' }}
FR_ARG: ${{ inputs.FULL_REFRESH == 'true' && '--full-refresh' || '' }}
TARGET_SCHEMA: "${{ matrix.TARGET_SCHEMA }}"
DB_PASS: "${{ secrets.DB_PASS }}"
MELTANO_STATE_AWS_ACCESS_KEY_ID: "${{ secrets.MELTANO_STATE_AWS_ACCESS_KEY_ID }}"
MELTANO_STATE_AWS_SECRET_ACCESS_KEY: "${{ secrets.MELTANO_STATE_AWS_SECRET_ACCESS_KEY }}"
TAP_GITHUB_AUTH_TOKEN: "${{ secrets.TAP_GITHUB_AUTH_TOKEN }}"
# TODO - move it to deploy_analytics workflow
# GOODDATA_PROFILES_FILE: "${{ secrets.GOODDATA_PROFILES_FILE }}"
run: |
# TODO - move it to deploy_analytics workflow
# echo $GOODDATA_PROFILES_FILE | base64 --decode > ~/.gooddata/profiles.yaml
meltano --environment ${{ env.ELT_ENVIRONMENT }} run ${{ matrix.MELTANO_SOURCE }} ${{ env.MELTANO_TARGET }} ${{ env.FR_ARG }}
cd ${{ vars.SRC_DATA_PIPELINE }}
meltano --environment ${{ vars.ELT_ENVIRONMENT }} run ${{ matrix.MELTANO_SOURCE }} ${{ vars.MELTANO_TARGET }} ${{ env.FR_ARG }}

0 comments on commit 0fbc649

Please sign in to comment.