Skip to content

Commit

Permalink
Bump Meltano
Browse files Browse the repository at this point in the history
Fixing tap-jira errors finally led to complete bump of version of everything related to Meltano.
As a side effect, I can finally merge Meltano and dbt venvs into one venv.
  • Loading branch information
jaceksan committed Jan 15, 2025
1 parent 37abf5d commit 8761be3
Show file tree
Hide file tree
Showing 19 changed files with 442 additions and 407 deletions.
2 changes: 1 addition & 1 deletion .env.base
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export GOODDATA_PROFILES="demo_cicd closed_beta closed_beta_se labs_se"
export DBT_CUSTOM_IMAGE_BASE="gooddata-data-pipeline-dbt"
export MELTANO_CUSTOM_IMAGE_BASE="gooddata-data-pipeline-meltano"
export GOODDATA_SDK_CUSTOM_IMAGE_BASE="gooddata-data-pipeline-gooddata"
export MELTANO_VERSION="v3.1.0-python3.11"
export MELTANO_VERSION="v3.6.0-python3.12"
export DBT_VERSION="1.9.1"
export GOODDATA_SDK_VERSION="1.33.0"
export PYTHON_IMAGE="python:3.12.8-bookworm"
4 changes: 2 additions & 2 deletions .github/workflows/reusable_extract_load.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ jobs:
MELTANO_STATE_AWS_ACCESS_KEY_ID: "${{ secrets.MELTANO_STATE_AWS_ACCESS_KEY_ID }}"
MELTANO_STATE_AWS_SECRET_ACCESS_KEY: "${{ secrets.MELTANO_STATE_AWS_SECRET_ACCESS_KEY }}"
TAP_GITHUB_AUTH_TOKEN: "${{ secrets.TAP_GITHUB_AUTH_TOKEN }}"
TAP_JIRA_AUTH_USERNAME: "${{ secrets.TAP_JIRA_AUTH_USERNAME }}"
TAP_JIRA_AUTH_PASSWORD: "${{ secrets.TAP_JIRA_AUTH_PASSWORD }}"
TAP_JIRA_EMAIL: "${{ secrets.TAP_JIRA_EMAIL }}"
TAP_JIRA_API_TOKEN: "${{ secrets.TAP_JIRA_API_TOKEN }}"
MOTHERDUCK_TOKEN: "${{ secrets.MOTHERDUCK_TOKEN }}"
run: |
source .env.${{ inputs.ENVIRONMENT }} ${{ inputs.DB_OVERRIDE }}
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile_meltano
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG MELTANO_VERSION=v3.1.0-python3.11
ARG MELTANO_VERSION=v3.6.0-python3.12
ARG MELTANO_IMAGE=meltano/meltano:$MELTANO_VERSION
FROM $MELTANO_IMAGE

Expand Down
15 changes: 6 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,15 @@ dbt_compile:

dev:
# Create virtualenv
# Freeze to 3.10 because higher versions are not yet supported by underlying tools
python3.10 -m venv .venv_el --upgrade-deps
python3.12 -m venv .venv --upgrade-deps
# Install Meltano and required plugins
.venv_el/bin/pip3 install -r $(SRC_DATA_PIPELINE)/requirements-meltano.txt
.venv_el/bin/meltano --cwd $(SRC_DATA_PIPELINE) install
# dbt must be installed to separate venv, there are conflicts with what Meltano needs
python3.12 -m venv .venv_t --upgrade-deps
.venv/bin/pip3 install -r $(SRC_DATA_PIPELINE)/requirements-meltano.txt
.venv/bin/meltano --cwd $(SRC_DATA_PIPELINE) install
# Install dbt and required plugins
.venv_t/bin/pip3 install -r $(SRC_DATA_PIPELINE)/requirements-dbt.txt
.venv_t/bin/dbt deps --project-dir $(SRC_DATA_PIPELINE) --profiles-dir $(SRC_DATA_PIPELINE)/profile
.venv/bin/pip3 install -r $(SRC_DATA_PIPELINE)/requirements-dbt.txt
.venv/bin/dbt deps --project-dir $(SRC_DATA_PIPELINE) --profiles-dir $(SRC_DATA_PIPELINE)/profile
# Install dbt-gooddata plugin and related dependencies
.venv_t/bin/pip3 install -r $(SRC_DATA_PIPELINE)/requirements-gooddata.txt
.venv/bin/pip3 install -r $(SRC_DATA_PIPELINE)/requirements-gooddata.txt

extract_load:
cd $(SRC_DATA_PIPELINE) && export TARGET_SCHEMA=$$INPUT_SCHEMA_GITHUB && meltano --environment $$ELT_ENVIRONMENT run tap-github-repo $$MELTANO_TARGET tap-github-org $$MELTANO_TARGET $$FR
Expand Down
10 changes: 3 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,9 @@ Bootstrap developer environment:
# Creates virtualenv and installs all dependencies
make dev

# Activate virtualenv for extract_load part or for transform/analytics parts
source .venv_el/bin/activate
# Activate virtualenv for transform and analytics parts
source .venv_t/bin/activate
# You should see e.g. a `(.venv_el)` appear at the beginning of your terminal prompt indicating that you are working inside the `virtualenv`.

# TODO: once my PR https://github.com/meltano/meltano/pull/8302 is merged, we can merge two venvs into one
# Activate virtualenv
source .venv/bin/activate
# You should see a `(.venv)` appear at the beginning of your terminal prompt indicating that you are working inside the `virtualenv`.

# Deactivate virtual env once you are done
deactivate
Expand Down
3 changes: 1 addition & 2 deletions data_pipeline/meltano-plugins.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ plugins:
- name: target-postgres
variant: meltanolabs
- name: target-snowflake
variant: transferwise
pip_url: pipelinewise-target-snowflake numpy<2.0
variant: meltanolabs
- name: target-vertica
variant: full360
pip_url: git+https://github.com/full360/pipelinewise-target-vertica.git
Expand Down
12 changes: 8 additions & 4 deletions data_pipeline/meltano.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ environments:
config:
start_date: '2010-01-01'
- name: tap-jira
start_date: '2023-01-01'
config:
start_date: '2024-01-01'

- name: cicd_prod
config:
Expand All @@ -22,7 +23,8 @@ environments:
config:
start_date: '2010-01-01'
- name: tap-jira
start_date: '2023-01-01'
config:
start_date: '2024-01-01'
- name: cicd_cloud_staging
config:
plugins:
Expand All @@ -31,7 +33,8 @@ environments:
config:
start_date: '2010-01-01'
- name: tap-jira
start_date: '2023-01-01'
config:
start_date: '2024-01-01'
# Dedicated for dbt Cloud. We need to separate their state files (they run against different databases)
- name: cicd_cloud_dev
- name: cicd_cloud_prod
Expand All @@ -42,7 +45,8 @@ environments:
config:
start_date: '2010-01-01'
- name: tap-jira
start_date: '2023-01-01'
config:
start_date: '2024-01-01'

include_paths:
- "./meltano_conf/extractors/github.yml"
Expand Down
4 changes: 1 addition & 3 deletions data_pipeline/meltano_conf/extractors/jira.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@ plugins:
pip_url: git+https://github.com/MeltanoLabs/tap-jira.git
config:
domain: gooddata.atlassian.net
auth:
flow: password
start_date: '2024-01-01'
start_date: '2024-12-01'
flattening_enabled: 'True'
flattening_max_depth: 1
page_size:
Expand Down
6 changes: 2 additions & 4 deletions data_pipeline/meltano_conf/loaders/loaders.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,11 @@ plugins:
database: ${DB_NAME}
default_target_schema: ${TARGET_SCHEMA}
add_metadata_columns: true
dialect+driver: postgresql+psycopg2
- name: target-snowflake
variant: transferwise
pip_url: pipelinewise-target-snowflake numpy<2.0
variant: meltanolabs
config:
account: ${DB_ACCOUNT}
dbname: ${DB_NAME}
database: ${DB_NAME}
user: ${DB_USER}
password: ${DB_PASS}
warehouse: ${DB_WAREHOUSE}
Expand Down
73 changes: 61 additions & 12 deletions data_pipeline/plugins/extractors/tap-github--meltanolabs.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"label": "GitHub",
"docs": "https://hub.meltano.com/extractors/tap-github--meltanolabs",
"repo": "https://github.com/MeltanoLabs/tap-github",
"pip_url": "git+https://github.com/MeltanoLabs/tap-github.git",
"pip_url": "meltanolabs-tap-github",
"description": "Code hosting platform",
"logo_url": "https://hub.meltano.com/assets/logos/extractors/github.png",
"capabilities": [
Expand Down Expand Up @@ -42,20 +42,27 @@
"label": "Additional Auth Tokens",
"description": "List of GitHub tokens to authenticate with. Streams will loop through them when hitting rate limits."
},
{
"name": "auth_app_keys",
"kind": "array",
"label": "Auth App Keys",
"description": "List of GitHub App credentials to authenticate with. Each credential can be constructed by combining an App ID and App private key into the format `:app_id:;;-----BEGIN RSA PRIVATE KEY----- _YOUR_P_KEY_ -----END RSA PRIVATE KEY-----`."
},
{
"name": "auth_token",
"kind": "password",
"kind": "string",
"label": "Auth Token",
"description": "GitHub token to authenticate with."
"description": "GitHub token to authenticate with.",
"sensitive": true
},
{
"name": "batch_config.encoding.compression",
"kind": "options",
"label": "Batch Config Encoding Compression",
"label": "Batch Compression Format",
"description": "Compression format to use for batch files.",
"options": [
{
"label": "Gzip",
"label": "GZIP",
"value": "gzip"
},
{
Expand All @@ -67,37 +74,58 @@
{
"name": "batch_config.encoding.format",
"kind": "options",
"label": "Batch Config Encoding Format",
"label": "Batch Encoding Format",
"description": "Format to use for batch files.",
"options": [
{
"label": "Jsonl",
"label": "JSONL",
"value": "jsonl"
},
{
"label": "Parquet",
"value": "parquet"
}
]
},
{
"name": "batch_config.storage.prefix",
"kind": "string",
"label": "Batch Config Storage Prefix",
"label": "Batch Storage Prefix",
"description": "Prefix to use when writing batch files."
},
{
"name": "batch_config.storage.root",
"kind": "string",
"label": "Batch Config Storage Root",
"label": "Batch Storage Root",
"description": "Root path to use when writing batch files."
},
{
"name": "expiry_time_buffer",
"kind": "integer",
"label": "Expiry Time Buffer"
},
{
"name": "faker_config.locale",
"kind": "array",
"label": "Faker Locale",
"description": "One or more LCID locale strings to produce localized output for: https://faker.readthedocs.io/en/master/#localization"
},
{
"name": "faker_config.seed",
"kind": "string",
"label": "Faker Seed",
"description": "Value to seed the Faker generator for deterministic output: https://faker.readthedocs.io/en/master/#seeding-the-generator"
},
{
"name": "flattening_enabled",
"kind": "boolean",
"label": "Flattening Enabled",
"label": "Enable Schema Flattening",
"description": "'True' to enable schema flattening and automatically expand nested properties."
},
{
"name": "flattening_max_depth",
"kind": "integer",
"label": "Flattening Max Depth",
"label": "Max Flattening Depth",
"description": "The max depth to flatten schemas."
},
{
Expand Down Expand Up @@ -151,6 +179,27 @@
"kind": "object",
"label": "Stream Maps"
},
{
"name": "stream_options.milestones.state",
"kind": "options",
"value": "open",
"label": "Stream Options Milestones State",
"description": "Configures which states are of interest. Must be one of [open, closed, all], defaults to open.",
"options": [
{
"label": "Open",
"value": "open"
},
{
"label": "Closed",
"value": "closed"
},
{
"label": "All",
"value": "all"
}
]
},
{
"name": "user_agent",
"kind": "string",
Expand All @@ -173,4 +222,4 @@
"*.*",
"!traffic_*.*"
]
}
}
60 changes: 25 additions & 35 deletions data_pipeline/plugins/extractors/tap-jira--meltanolabs.lock
Original file line number Diff line number Diff line change
Expand Up @@ -34,38 +34,11 @@
],
"settings": [
{
"name": "auth.flow",
"name": "api_token",
"kind": "string",
"label": "Auth Flow",
"description": "The authentication type to use.",
"options": [
{
"label": "OAuth",
"value": "oauth"
},
{
"label": "Password",
"value": "password"
}
]
},
{
"name": "auth.access_token",
"kind": "password",
"label": "Auth Access Token",
"description": "The Jira API access token if using OAuth auth flow."
},
{
"name": "auth.username",
"kind": "string",
"label": "Auth Username",
"description": "The Jira username if using the user/pass auth flow."
},
{
"name": "auth.password",
"kind": "password",
"label": "Auth Password",
"description": "The Jira password if using the user/pass auth flow."
"label": "API Token",
"description": "Jira API Token.",
"sensitive": true
},
{
"name": "batch_config.encoding.compression",
Expand All @@ -74,7 +47,7 @@
"description": "Compression format to use for batch files.",
"options": [
{
"label": "Gzip",
"label": "GZIP",
"value": "gzip"
},
{
Expand All @@ -90,8 +63,12 @@
"description": "Format to use for batch files.",
"options": [
{
"label": "Jsonl",
"label": "JSONL",
"value": "jsonl"
},
{
"label": "Parquet",
"value": "parquet"
}
]
},
Expand All @@ -111,7 +88,13 @@
"name": "domain",
"kind": "string",
"label": "Domain",
"description": "Site URL"
"description": "The Domain for your Jira account, e.g. meltano. Atlassian. Net"
},
{
"name": "email",
"kind": "string",
"label": "Email",
"description": "The user email for your Jira account."
},
{
"name": "end_date",
Expand All @@ -131,6 +114,13 @@
"label": "Flattening Max Depth",
"description": "The max depth to flatten schemas."
},
{
"name": "page_size.issues",
"kind": "integer",
"value": 100,
"label": "Page Size Issues",
"description": "Page size for issues stream"
},
{
"name": "start_date",
"kind": "date_iso8601",
Expand All @@ -150,4 +140,4 @@
"description": "Config object for stream maps capability. For more information check out [Stream Maps](https://sdk.meltano.com/en/latest/stream_maps.html)."
}
]
}
}
Loading

0 comments on commit 8761be3

Please sign in to comment.