diff --git a/.github/workflows/docker-ingestion-base.yml b/.github/workflows/docker-ingestion-base.yml new file mode 100644 index 00000000000000..9d0c4c5d259c06 --- /dev/null +++ b/.github/workflows/docker-ingestion-base.yml @@ -0,0 +1,40 @@ +name: ingestion base +on: + push: + branches: + - master + paths: + - "docker/datahub-ingestion/**" + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + + build-base: + name: Build and Push Docker Image to Docker Hub + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Build and Push image + uses: docker/build-push-action@v2 + with: + context: ./docker/datahub-ingestion + file: ./docker/datahub-ingestion/base.Dockerfile + platforms: linux/amd64,linux/arm64 + tags: acryldata/datahub-ingestion-base:latest + push: true diff --git a/docker/datahub-ingestion/base-requirements.txt b/docker/datahub-ingestion/base-requirements.txt new file mode 100644 index 00000000000000..ea0fc24498e58c --- /dev/null +++ b/docker/datahub-ingestion/base-requirements.txt @@ -0,0 +1,313 @@ +absl-py==1.1.0 +acryl-iceberg-legacy==0.0.4 +acryl-PyHive==0.6.13 +aenum==3.1.11 +aiohttp==3.8.1 +aiosignal==1.2.0 +alembic==1.8.0 +altair==4.2.0 +anyio==3.6.1 +apache-airflow==2.2.5 +apache-airflow-providers-ftp==3.0.0 +apache-airflow-providers-http==3.0.0 +apache-airflow-providers-imap==3.0.0 +apache-airflow-providers-sqlite==3.0.0 +apispec==3.3.2 +argcomplete==2.0.0 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +asgiref==3.5.2 +asn1crypto==1.5.1 +asttokens==2.0.5 +async-timeout==4.0.2 +attrs==20.3.0 +avro==1.10.2 +avro-gen3==0.7.4 +azure-core==1.24.1 +azure-identity==1.10.0 +azure-storage-blob==12.12.0 +azure-storage-file-datalake==12.7.0 +Babel==2.10.3 +backcall==0.2.0 +backports.zoneinfo==0.2.1 +beautifulsoup4==4.11.1 +bleach==5.0.0 +blinker==1.4 +boto3==1.24.14 +botocore==1.27.14 +bracex==2.3.post1 +cached-property==1.5.2 +cachelib==0.8.0 +cachetools==5.2.0 +cattrs==1.10.0 +certifi==2022.6.15 +cffi==1.15.0 +chardet==4.0.0 +charset-normalizer==2.0.12 +click==8.1.3 +click-default-group==1.2.2 +clickclick==20.10.2 +clickhouse-driver==0.2.4 +clickhouse-sqlalchemy==0.1.8 +colorama==0.4.5 +colorlog==6.6.0 +commonmark==0.9.1 +confluent-kafka==1.8.2 +connexion==2.13.1 +croniter==1.3.5 +cryptography==36.0.2 +cx-Oracle==8.3.0 +debugpy==1.6.0 +decorator==5.1.1 +defusedxml==0.7.1 +Deprecated==1.2.13 +dill==0.3.5.1 +dnspython==2.2.1 +docker==5.0.3 +docutils==0.16 +ecdsa==0.17.0 +elasticsearch==7.13.4 +email-validator==1.2.1 +entrypoints==0.4 +et-xmlfile==1.1.0 +executing==0.8.3 +expandvars==0.9.0 +fastapi==0.78.0 +fastavro==1.5.1 +fastjsonschema==2.15.3 +feast==0.18.0 +Flask==1.1.2 +Flask-AppBuilder==3.4.5 +Flask-Babel==2.0.0 +Flask-Caching==1.11.1 +Flask-Cors==3.0.10 +Flask-JWT-Extended==3.25.1 +Flask-Login==0.4.1 +Flask-OpenID==1.3.0 +Flask-Session==0.4.0 +Flask-SQLAlchemy==2.5.1 +Flask-WTF==0.14.3 +flatdict==4.0.1 +frozenlist==1.3.0 +future==0.18.2 +GeoAlchemy2==0.11.1 +google-api-core==2.8.0 +google-auth==2.8.0 +google-cloud-appengine-logging==1.1.2 +google-cloud-audit-log==0.2.0 +google-cloud-bigquery==3.2.0 +google-cloud-bigquery-storage==2.13.2 +google-cloud-core==2.3.1 +google-cloud-logging==3.1.1 +google-crc32c==1.3.0 +google-resumable-media==2.3.3 +googleapis-common-protos==1.52.0 +graphviz==0.20 +great-expectations==0.15.2 +greenlet==1.1.2 +grpc-google-iam-v1==0.12.3 +grpcio==1.44.0 +grpcio-reflection==1.44.0 +grpcio-status==1.44.0 +grpcio-tools==1.44.0 +gunicorn==20.1.0 +h11==0.12.0 +hmsclient==0.1.1 +httpcore==0.15.0 +httptools==0.4.0 +httpx==0.23.0 +humanfriendly==10.0 +idna==3.3 +ijson==3.1.4 +importlib-metadata==4.11.4 +importlib-resources==5.8.0 +inflection==0.5.1 +ipykernel==6.15.0 +ipython==8.4.0 +ipython-genutils==0.2.0 +iso8601==1.0.2 +isodate==0.6.1 +itsdangerous==1.1.0 +jedi==0.18.1 +Jinja2==3.0.3 +jmespath==1.0.1 +JPype1==1.4.0 +jsonlines==3.0.0 +jsonpatch==1.32 +jsonpointer==2.3 +jsonschema==3.2.0 +jupyter-client==7.3.4 +jupyter-core==4.10.0 +jupyterlab-pygments==0.2.2 +lazy-object-proxy==1.7.1 +linear-tsv==1.1.0 +lkml==1.2.0 +lockfile==0.12.2 +looker-sdk==22.2.1 +Mako==1.2.0 +Markdown==3.3.7 +MarkupSafe==2.0.1 +marshmallow==3.16.0 +marshmallow-enum==1.5.1 +marshmallow-oneofschema==3.0.1 +marshmallow-sqlalchemy==0.26.1 +matplotlib-inline==0.1.3 +mistune==0.8.4 +mixpanel==4.9.0 +mmh3==3.0.0 +more-itertools==8.13.0 +moto==3.1.14 +msal==1.16.0 +msal-extensions==1.0.0 +msrest==0.7.1 +multidict==6.0.2 +mypy-extensions==0.4.3 +nbclient==0.6.3 +nbconvert==6.5.0 +nbformat==5.4.0 +nest-asyncio==1.5.5 +networkx==2.8.4 +notebook==6.4.12 +numpy==1.22.4 +oauthlib==3.2.0 +okta==1.7.0 +openpyxl==3.0.10 +orderedset==2.0.3 +oscrypto==1.3.0 +packaging==21.3 +pandas==1.4.2 +pandavro==1.5.2 +pandocfilters==1.5.0 +parse==1.19.0 +parso==0.8.3 +pendulum==2.1.2 +pexpect==4.8.0 +pickleshare==0.7.5 +portalocker==2.4.0 +prison==0.2.1 +progressbar2==4.0.0 +prometheus-client==0.14.1 +prompt-toolkit==3.0.29 +proto-plus==1.19.6 +protobuf==3.20.1 +psutil==5.9.1 +psycopg2-binary==2.9.3 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pure-sasl==0.6.2 +py4j==0.10.9 +pyarrow==6.0.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pyathena==2.4.1 +pycparser==2.21 +pycryptodome==3.14.1 +pycryptodomex==3.14.1 +pydantic==1.9.1 +pydash==5.1.0 +pydeequ==1.0.1 +pydruid==0.6.3 +Pygments==2.12.0 +PyJWT==1.7.1 +pymongo==4.1.1 +PyMySQL==1.0.2 +pyOpenSSL==22.0.0 +pyparsing==2.4.7 +pyrsistent==0.18.1 +pyspark==3.0.3 +python-daemon==2.3.0 +python-dateutil==2.8.2 +python-dotenv==0.20.0 +python-jose==3.3.0 +python-ldap==3.4.0 +python-nvd3==0.15.0 +python-slugify==4.0.1 +python-tds==1.11.0 +python-utils==3.3.3 +python3-openid==3.2.0 +pytz==2022.1 +pytz-deprecation-shim==0.1.0.post0 +pytzdata==2020.1 +PyYAML==6.0 +pyzmq==23.2.0 +ratelimiter==1.2.0.post0 +redash-toolbelt==0.1.9 +requests==2.28.0 +requests-oauthlib==1.3.1 +responses==0.21.0 +retrying==1.3.3 +rfc3986==1.5.0 +rich==12.4.4 +rsa==4.8 +ruamel.yaml==0.17.17 +ruamel.yaml.clib==0.2.6 +s3transfer==0.6.0 +sasl3==0.2.11 +scipy==1.8.1 +Send2Trash==1.8.0 +setproctitle==1.2.3 +six==1.16.0 +smart-open==6.0.0 +sniffio==1.2.0 +snowflake-connector-python==2.7.8 +snowflake-sqlalchemy==1.2.4 +soupsieve==2.3.2.post1 +sql-metadata==2.2.2 +SQLAlchemy==1.3.24 +sqlalchemy-bigquery==1.4.4 +SQLAlchemy-JSONField==1.0.0 +sqlalchemy-pytds==0.3.4 +sqlalchemy-redshift==0.8.9 +SQLAlchemy-Utils==0.38.2 +sqlalchemy-vertica==0.0.5 +sqllineage==1.3.5 +sqlparse==0.4.2 +stack-data==0.3.0 +stackprinter==0.2.6 +starlette==0.19.1 +swagger-ui-bundle==0.0.9 +tableauserverclient==0.19.0 +tableschema==1.20.2 +tabulate==0.8.10 +tabulator==1.53.5 +tenacity==8.0.1 +tensorflow-metadata==1.9.0 +termcolor==1.1.0 +terminado==0.15.0 +text-unidecode==1.3 +thrift==0.16.0 +thrift-sasl==0.4.3 +tinycss2==1.1.1 +toml==0.10.2 +toolz==0.11.2 +tornado==6.1 +tqdm==4.64.0 +traitlets==5.2.1.post0 +trino==0.313.0 +types-Deprecated==1.2.8 +types-protobuf==3.19.22 +types-termcolor==1.1.4 +types-ujson==5.3.0 +typing-inspect==0.7.1 +typing_extensions==4.2.0 +tzdata==2022.1 +tzlocal==4.2 +ujson==5.3.0 +unicodecsv==0.14.1 +urllib3==1.26.9 +uvicorn==0.17.6 +uvloop==0.16.0 +vertica-python==1.1.0 +watchgod==0.8.2 +wcmatch==8.4 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.3.3 +websockets==10.3 +Werkzeug==1.0.1 +wrapt==1.14.1 +WTForms==2.3.3 +xlrd==2.0.1 +xmltodict==0.13.0 +yarl==1.7.2 +zipp==3.8.0 \ No newline at end of file diff --git a/docker/datahub-ingestion/base.Dockerfile b/docker/datahub-ingestion/base.Dockerfile new file mode 100644 index 00000000000000..7c8ab43d8c1bfb --- /dev/null +++ b/docker/datahub-ingestion/base.Dockerfile @@ -0,0 +1,5 @@ +FROM python:3.9.9 as base + +COPY ./base-requirements.txt requirements.txt + +RUN pip install -r requirements.txt \ No newline at end of file