diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 000000000..1fd946621 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,16 @@ +--- +# using the pre-commit action from: https://github.com/pre-commit/action +name: pre-commit + +on: # yamllint disable-line rule:truthy + pull_request: + push: + branches: [main] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v3 + - uses: pre-commit/action@v3.0.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..95010aaf5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,36 @@ +--- +repos: + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.3.1 + hooks: + - id: remove-tabs + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-added-large-files + - id: check-ast + - id: check-byte-order-marker + - id: check-case-conflict + - id: check-docstring-first + - id: check-json + - id: check-merge-conflict + - id: check-symlinks + - id: check-toml + - id: check-yaml + args: [--allow-multiple-documents] + - id: debug-statements + - id: detect-private-key + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/psf/black + rev: 23.10.0 + hooks: + - id: black + + - repo: https://github.com/PyCQA/flake8 + rev: '6.1.0' + hooks: + - id: flake8 + args: ['--max-line-length=120'] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cdb41d0c1..c046fd5b2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,6 +15,7 @@ Pull requests are the best way to propose changes to the notebooks repository: - Configure name and email in git - Fork the repo and create your branch from main. +- Install [pre-commit](https://pre-commit.com/) into your [git hooks](https://githooks.com/) by running `pre-commit install`. See [linting](#linting) for more. - Sign off your commit using the -s, --signoff option. Write a good commit message (see [How to Write a Git Commit Message](https://chris.beams.io/posts/git-commit/)) - If you've added code that should be tested, [add tests](https://github.com/openshift/release/blob/master/ci-operator/config/opendatahub-io/notebooks/opendatahub-io-notebooks-main.yaml). - Ensure the test suite passes. @@ -35,7 +36,7 @@ Pull requests are the best way to propose changes to the notebooks repository: # Your comment here .PHONY: jupyter-${NOTEBOOK_NAME}-ubi8-python-3.8 jupyter-${NOTEBOOK_NAME}-ubi8-python-3.8: jupyter-minimal-ubi8-python-3.8 - $(call image,$@,jupyter/${NOTEBOOK_NAME}/ubi8-python-3.8,$<) + $(call image,$@,jupyter/${NOTEBOOK_NAME}/ubi8-python-3.8,$<) ``` - Add the paths of the new pipfiles under `refresh-pipfilelock-files` - Test the changes locally, by manually running the `$ make jupyter-${NOTEBOOK_NAME}-ubi8-python-3.8` from the terminal. @@ -56,3 +57,17 @@ Pull requests are the best way to propose changes to the notebooks repository: ### Testing your PR locally - Test the changes locally, by manually running the `$make jupyter-${NOTEBOOK_NAME}-ubi8-python-3.8` from the terminal. This definitely helps in that initial phase. + +### Linting + +To run linting tests, we use [pre-commit](https://pre-commit.com/). + +We have setup a [pre-commit](https://pre-commit.com) config file in [.pre-commit-config.yaml](.pre-commit-config.yaml). +To [utilize pre-commit](https://pre-commit.com/#usage), install pre-commit with `pip3 install pre-commit` and then either: + +Run `pre-commit install` after you clone the repo, `pre-commit` will run automatically on git commit. + * If any one of the tests fail, add and commit the changes made by pre-commit. Once the pre-commit check passes, you can make your PR. + * `pre-commit` will from now on run all the checkers/linters/formatters on every commit. + * If you later want to commit without running it, just run `git commit` with `-n/--no-verify`. +or + * If you want to manually run all the checkers/linters/formatters, run `pre-commit run --all-files`. diff --git a/Makefile b/Makefile index c84b7e42a..e97f5608f 100644 --- a/Makefile +++ b/Makefile @@ -331,8 +331,8 @@ test-%: bin/kubectl $(KUBECTL_BIN) wait --for=condition=ready pod -l app=$(NOTEBOOK_NAME) --timeout=600s $(KUBECTL_BIN) port-forward svc/$(NOTEBOOK_NAME)-notebook 8888:8888 & curl --retry 5 --retry-delay 5 --retry-connrefused http://localhost:8888/notebook/opendatahub/jovyan/api ; EXIT_CODE=$$?; echo && pkill --full "^$(KUBECTL_BIN).*port-forward.*"; \ $(eval FULL_NOTEBOOK_NAME = $(shell ($(KUBECTL_BIN) get pods -l app=$(NOTEBOOK_NAME) -o custom-columns=":metadata.name" | tr -d '\n'))) - - # Tests notebook's functionalities + + # Tests notebook's functionalities if echo "$(FULL_NOTEBOOK_NAME)" | grep -q "minimal-ubi9"; then \ $(call test_with_papermill,minimal,ubi9,python-3.9) \ elif echo "$(FULL_NOTEBOOK_NAME)" | grep -q "datascience-ubi9"; then \ @@ -467,4 +467,3 @@ refresh-pipfilelock-files: cd runtimes/tensorflow/ubi8-python-3.8 && pipenv lock cd runtimes/tensorflow/ubi9-python-3.9 && pipenv lock cd base/c9s-python-3.9 && pipenv lock - \ No newline at end of file diff --git a/README.md b/README.md index fded02854..0b6f6cde1 100644 --- a/README.md +++ b/README.md @@ -223,7 +223,7 @@ make undeployX-${NOTEBOOK_NAME} ## Validating Runtimes The runtimes image requires to have curl and python installed, -so that on runtime additional packages can be installed. +so that on runtime additional packages can be installed. Deploy the runtime images in your Kubernetes environment using deploy8-${WORKBENCH_NAME} for ubi8 or deploy9-${WORKBENCH_NAME} for ubi9: diff --git a/UPDATES.md b/UPDATES.md index 7082d93c0..e559194cf 100644 --- a/UPDATES.md +++ b/UPDATES.md @@ -4,7 +4,7 @@ This document aims to provide an overview of the rebuilding plan for the notebook images. There are two types of updates that are implemented: 1. *Release updates* - These updates will be carried out twice a year and will incorporate major updates to the notebook images. - + 2. *Patch updates* - These updates will be carried out weekly and will focus on incorporating security updates to the notebook images. ## Scope and frequency of the updates diff --git a/base/anaconda-python-3.8/Dockerfile b/base/anaconda-python-3.8/Dockerfile index c743c3c35..06f271fcb 100644 --- a/base/anaconda-python-3.8/Dockerfile +++ b/base/anaconda-python-3.8/Dockerfile @@ -101,4 +101,4 @@ RUN curl -L https://mirror.openshift.com/pub/openshift-v4/x86_64/clients/ocp/sta # Fix permissions to support pip in Openshift environments RUN fix-permissions /opt/app-root -P -WORKDIR /opt/app-root/src \ No newline at end of file +WORKDIR /opt/app-root/src diff --git a/ci/check-json.sh b/ci/check-json.sh index d68e83edf..589a0afa4 100755 --- a/ci/check-json.sh +++ b/ci/check-json.sh @@ -24,13 +24,13 @@ function check_json() { if grep --quiet --extended-regexp "${string}" "${f}"; then #if $(grep -e "${string}" "${f}"); then jsons=$(yq -r ".spec.tags[].annotations.\"${string}\"" "${f}") - + while IFS= read -r json; do echo " ${json}" echo -n " > "; echo "${json}" | json_verify || ret_code="${?}" done <<< "${jsons}" else - echo " Ignoring as this file doesn't contain necessary key field '${string}' for check" + echo " Ignoring as this file doesn't contain necessary key field '${string}' for check" fi return "${ret_code}" diff --git a/codeserver/c9s-python-3.9/nginx/root/usr/share/container-scripts/nginx/common.sh b/codeserver/c9s-python-3.9/nginx/root/usr/share/container-scripts/nginx/common.sh index 319219cd2..286a1f1de 100755 --- a/codeserver/c9s-python-3.9/nginx/root/usr/share/container-scripts/nginx/common.sh +++ b/codeserver/c9s-python-3.9/nginx/root/usr/share/container-scripts/nginx/common.sh @@ -23,9 +23,9 @@ function process_extending_files() { # Custom file is prefered if [ -f $custom_dir/$filename ]; then source $custom_dir/$filename - elif [ -f $default_dir/$filename ]; then + elif [ -f $default_dir/$filename ]; then source $default_dir/$filename fi fi done <<<"$(get_matched_files "$custom_dir" "$default_dir" '*.sh' | sort -u)" -} \ No newline at end of file +} diff --git a/codeserver/c9s-python-3.9/run-code-server.sh b/codeserver/c9s-python-3.9/run-code-server.sh index 6f69f1d76..e156c2eb0 100755 --- a/codeserver/c9s-python-3.9/run-code-server.sh +++ b/codeserver/c9s-python-3.9/run-code-server.sh @@ -6,7 +6,7 @@ source ${SCRIPT_DIR}/utils/*.sh # Start nginx and fastcgiwrap run-nginx.sh & -spawn-fcgi -s /var/run/fcgiwrap.socket -M 766 /usr/sbin/fcgiwrap +spawn-fcgi -s /var/run/fcgiwrap.socket -M 766 /usr/sbin/fcgiwrap # Add .bashrc for custom promt if not present if [ ! -f "/opt/app-root/src/.bashrc" ]; then diff --git a/codeserver/c9s-python-3.9/run-nginx.sh b/codeserver/c9s-python-3.9/run-nginx.sh index 5d0135fa8..ca8207a0c 100755 --- a/codeserver/c9s-python-3.9/run-nginx.sh +++ b/codeserver/c9s-python-3.9/run-nginx.sh @@ -23,4 +23,4 @@ else envsubst '${BASE_URL}' < /etc/nginx/nginx.conf | tee /etc/nginx/nginx.conf fi -nginx \ No newline at end of file +nginx diff --git a/codeserver/c9s-python-3.9/utils/process.sh b/codeserver/c9s-python-3.9/utils/process.sh index 6b69254b7..4da716a9f 100755 --- a/codeserver/c9s-python-3.9/utils/process.sh +++ b/codeserver/c9s-python-3.9/utils/process.sh @@ -16,4 +16,4 @@ function start_process() { function stop_process() { kill -TERM $PID -} \ No newline at end of file +} diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 4eb5c5be4..5e65c03a7 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -2,7 +2,7 @@ The following sections are aimed to provide a comprehensive guide for developers ## Getting Started This project utilizes three branches for the development: the **main** branch, which hosts the latest development, and t**wo additional branches for each release**. -These release branches follow a specific naming format: YYYYx, where "YYYY" represents the year, and "x" is an increasing letter. Thus, they help to keep working on minor updates and bug fixes on the supported versions (N & N-1) of each workbench. +These release branches follow a specific naming format: YYYYx, where "YYYY" represents the year, and "x" is an increasing letter. Thus, they help to keep working on minor updates and bug fixes on the supported versions (N & N-1) of each workbench. ## Architecture The structure of the notebook's build chain is derived from the parent image. To better comprehend this concept, refer to the following graph. @@ -19,13 +19,13 @@ Detailed instructions on how developers can contribute to this project can be fo ## Workbench ImageStreams ODH supports multiple out-of-the-box pre-built workbench images ([provided in this repository](https://github.com/opendatahub-io/notebooks)). For each of those workbench images, there is a dedicated ImageStream object definition. This ImageStream object references the actual image tag(s) and contains additional metadata that describe the workbench image. - + ### **Annotations** Aside from the general ImageStream config values, there are additional annotations that can be provided in the workbench ImageStream definition. This additional data is leveraged further by the [odh-dashboard](https://github.com/opendatahub-io/odh-dashboard/). -### **ImageStream-specific annotations** -The following labels and annotations are specific to the particular workbench image. They are provided in their respective sections in the `metadata` section. +### **ImageStream-specific annotations** +The following labels and annotations are specific to the particular workbench image. They are provided in their respective sections in the `metadata` section. ```yaml metadata: labels: @@ -33,16 +33,16 @@ metadata: annotations: ... ``` -### **Available labels** +### **Available labels** - **`opendatahub.io/notebook-image:`** - a flag that determines whether the ImageStream references a workbench image that is meant be shown in the UI ### **Available annotations** - **`opendatahub.io/notebook-image-url:`** - a URL reference to the source of the particular workbench image - **`opendatahub.io/notebook-image-name:`** - a desired display name string for the particular workbench image (used in the UI) -- **`opendatahub.io/notebook-image-desc:`** - a desired description string of the of the particular workbench image (used in the UI) +- **`opendatahub.io/notebook-image-desc:`** - a desired description string of the of the particular workbench image (used in the UI) - **`opendatahub.io/notebook-image-order:`** - an index value for the particular workbench ImageStream (used by the UI to list available workbench images in a specific order) - **`opendatahub.io/recommended-accelerators`** - a string that represents the list of recommended hardware accelerators for the particular workbench ImageStream (used in the UI) -### **Tag-specific annotations** +### **Tag-specific annotations** One ImageStream can reference multiple image tags. The following annotations are specific to a particular workbench image tag and are provided in its `annotations:` section. ```yaml spec: @@ -54,17 +54,17 @@ spec: name: image-repository/tag name: tag-name ``` -### **Available annotations** +### **Available annotations** - **`opendatahub.io/notebook-software:`** - a string that represents the technology stack included within the workbench image. Each technology in the list is described by its name and the version used (e.g. `'[{"name":"CUDA","version":"11.8"},{"name":"Python","version":"v3.9"}]`') - **`opendatahub.io/notebook-python-dependencies:`** - a string that represents the list of Python libraries included within the workbench image. Each library is described by its name and currently used version (e.g. `'[{"name":"Numpy","version":"1.24"},{"name":"Pandas","version":"1.5"}]'`) - **`openshift.io/imported-from:`** - a reference to the image repository where the workbench image was obtained (e.g. `quay.io/repository/opendatahub/workbench-images`) - **`opendatahub.io/workbench-image-recommended:`** - a flag that allows the ImageStream tag to be marked as Recommended (used by the UI to distinguish which tags are recommended for use, e.g., when the workbench image offers multiple tags to choose from) -### **ImageStream definitions for the supported out-of-the-box images in ODH** +### **ImageStream definitions for the supported out-of-the-box images in ODH** The ImageStream definitions of the out-of-the-box workbench images for ODH can be found [here](https://github.com/opendatahub-io/notebooks/tree/main/manifests). -### **Example ImageStream object definition** +### **Example ImageStream object definition** An exemplary, non-functioning ImageStream object definition that uses all the aforementioned annotations is provided below. @@ -114,11 +114,11 @@ The opendatahub-io-ci-image-mirror job will be used to mirror the images from th tests: - as: ${NOTEBOOK_IMAGE_NAME}-image-mirror steps: - dependencies: - SOURCE_IMAGE_REF: ${NOTEBOOK_IMAGE_NAME} - env: - IMAGE_REPO: notebooks - workflow: opendatahub-io-ci-image-mirror + dependencies: + SOURCE_IMAGE_REF: ${NOTEBOOK_IMAGE_NAME} + env: + IMAGE_REPO: notebooks + workflow: opendatahub-io-ci-image-mirror ``` The images mirrored under 2 different scenarios: 1. A new PR is opened. @@ -128,7 +128,7 @@ The Openshift CI is also configured to run the unit and integration tests: ``` tests: - - as: notebooks-e2e-tests + - as: notebooks-e2e-tests steps: test: - as: ${NOTEBOOK_IMAGE_NAME}-e2e-tests @@ -146,15 +146,15 @@ This GitHub action is configured to be triggered on a weekly basis, specifically ### **Sync the downstream release branch with the upstream** [[Link]](https://github.com/red-hat-data-services/notebooks/blob/main/.github/workflows/sync-release-branch-2023a.yml) -This GitHub action is configured to be triggered on a weekly basis, specifically every Tuesday at 08:00 AM UTC. Its main objective is to automatically update the downstream release branch with the upstream branch. +This GitHub action is configured to be triggered on a weekly basis, specifically every Tuesday at 08:00 AM UTC. Its main objective is to automatically update the downstream release branch with the upstream branch. ### **Digest Updater workflow on the manifests** [[Link]](https://github.com/opendatahub-io/odh-manifests/blob/master/.github/workflows/notebooks-digest-updater-upstream.yaml) - + This GitHub action is designed to be triggered on a weekly basis, specifically every Friday at 12:00 AM UTC. Its primary purpose is to automate the process of updating the SHA digest of the notebooks. It achieves this by fetching the new SHA values from the quay.io registry and updating the [param.env](https://github.com/opendatahub-io/odh-manifests/blob/master/notebook-images/base/params.env) file, which is hosted on the odh-manifest repository. By automatically updating the SHA digest, this action ensures that the notebooks remain synchronized with the latest changes. ### **Digest Updater workflow on the live-builder** [[Link]](https://gitlab.cee.redhat.com/data-hub/rhods-live-builder/-/blob/main/.gitlab/notebook-sha-digest-updater.yml) -This GitHub action works with the same logic as the above and is designed to be triggered on a weekly basis, specifically every Friday. It is also update the SHA digest of the images into the [CSV](https://gitlab.cee.redhat.com/data-hub/rhods-live-builder/-/blob/main/rhods-operator-live/bundle/template/manifests/clusterserviceversion.yml.j2#L725) file on the live-builder repo. - - +This GitHub action works with the same logic as the above and is designed to be triggered on a weekly basis, specifically every Friday. It is also update the SHA digest of the images into the [CSV](https://gitlab.cee.redhat.com/data-hub/rhods-live-builder/-/blob/main/rhods-operator-live/bundle/template/manifests/clusterserviceversion.yml.j2#L725) file on the live-builder repo. + + [Previous Page](https://github.com/opendatahub-io/notebooks/wiki/Workbenches) | [Next Page](https://github.com/opendatahub-io/notebooks/wiki/User-Guide) diff --git a/docs/user-guide.md b/docs/user-guide.md index b09a087b8..7b8523a05 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -1,7 +1,7 @@ The following sections are aimed to provide a comprehensive guide on effectively utilizing an out-of-the-box notebook by a user. There are two options for launching a workbench image: either through the Enabled applications or the Data Science Project. -## Notebook Spawner +## Notebook Spawner In the ODH dashboard, you can navigate to Applications -> Enabled -> Launch Application from the Jupyter tile. The notebook server spawner page displays a list of available container images you can run as a single user." @@ -36,11 +36,10 @@ During the release lifecycle, which is the period during which the update is sup Our goal is to ensure that notebook images are supported for a minimum of one year, meaning that typically two supported images will be available at any given time. This provides sufficient time for users to update their code to use components from the latest notebook images. We will continue to make older images available in the registry for users to add as custom notebook images, even if they are no longer supported. This way, users can still access the older images if needed. Example lifecycle (not actual dates): -2023-01-01 - only one version of the notebook images is available - version 1 for all images. -2023-06-01 - release updated images - version 2 (v2023a). Versions 1 & 2 are supported and available for selection in the UI. -2023-12-01 - release updated images - version 3 (v2023b). Versions 2 & 3 are supported and available for selection in the UI. -2024-06-01 - release updated images - version 4 (v2024a). Versions 3 & 4 are supported and available for selection in the UI. +2023-01-01 - only one version of the notebook images is available - version 1 for all images. +2023-06-01 - release updated images - version 2 (v2023a). Versions 1 & 2 are supported and available for selection in the UI. +2023-12-01 - release updated images - version 3 (v2023b). Versions 2 & 3 are supported and available for selection in the UI. +2024-06-01 - release updated images - version 4 (v2024a). Versions 3 & 4 are supported and available for selection in the UI. [Previous Page](https://github.com/opendatahub-io/notebooks/wiki/Developer-Guide) - diff --git a/docs/workbench-imagestreams.md b/docs/workbench-imagestreams.md index b576d37f2..3e2fa8de4 100644 --- a/docs/workbench-imagestreams.md +++ b/docs/workbench-imagestreams.md @@ -1,13 +1,13 @@ ## Workbench ImageStreams ODH supports multiple out-of-the-box pre-built workbench images ([provided in this repository](https://github.com/opendatahub-io/notebooks)). For each of those workbench images, there is a dedicated ImageStream object definition. This ImageStream object references the actual image tag(s) and contains additional metadata that describe the workbench image. - + ### **Annotations** Aside from the general ImageStream config values, there are additional annotations that can be provided in the workbench ImageStream definition. This additional data is leveraged further by the [odh-dashboard](https://github.com/opendatahub-io/odh-dashboard/). -### **ImageStream-specific annotations** -The following labels and annotations are specific to the particular workbench image. They are provided in their respective sections in the `metadata` section. +### **ImageStream-specific annotations** +The following labels and annotations are specific to the particular workbench image. They are provided in their respective sections in the `metadata` section. ```yaml metadata: labels: @@ -15,16 +15,16 @@ metadata: annotations: ... ``` -### **Available labels** +### **Available labels** - **`opendatahub.io/notebook-image:`** - a flag that determines whether the ImageStream references a workbench image that is meant be shown in the UI ### **Available annotations** - **`opendatahub.io/notebook-image-url:`** - a URL reference to the source of the particular workbench image - **`opendatahub.io/notebook-image-name:`** - a desired display name string for the particular workbench image (used in the UI) -- **`opendatahub.io/notebook-image-desc:`** - a desired description string of the of the particular workbench image (used in the UI) +- **`opendatahub.io/notebook-image-desc:`** - a desired description string of the of the particular workbench image (used in the UI) - **`opendatahub.io/notebook-image-order:`** - an index value for the particular workbench ImageStream (used by the UI to list available workbench images in a specific order) - **`opendatahub.io/recommended-accelerators`** - a string that represents the list of recommended hardware accelerators for the particular workbench ImageStream (used in the UI) -### **Tag-specific annotations** +### **Tag-specific annotations** One ImageStream can reference multiple image tags. The following annotations are specific to a particular workbench image tag and are provided in its `annotations:` section. ```yaml spec: @@ -36,17 +36,17 @@ spec: name: image-repository/tag name: tag-name ``` -### **Available annotations** +### **Available annotations** - **`opendatahub.io/notebook-software:`** - a string that represents the technology stack included within the workbench image. Each technology in the list is described by its name and the version used (e.g. `'[{"name":"CUDA","version":"11.8"},{"name":"Python","version":"v3.9"}]`') - **`opendatahub.io/notebook-python-dependencies:`** - a string that represents the list of Python libraries included within the workbench image. Each library is described by its name and currently used version (e.g. `'[{"name":"Numpy","version":"1.24"},{"name":"Pandas","version":"1.5"}]'`) - **`openshift.io/imported-from:`** - a reference to the image repository where the workbench image was obtained (e.g. `quay.io/repository/opendatahub/workbench-images`) - **`opendatahub.io/workbench-image-recommended:`** - a flag that allows the ImageStream tag to be marked as Recommended (used by the UI to distinguish which tags are recommended for use, e.g., when the workbench image offers multiple tags to choose from) -### **ImageStream definitions for the supported out-of-the-box images in ODH** +### **ImageStream definitions for the supported out-of-the-box images in ODH** The ImageStream definitions of the out-of-the-box workbench images for ODH can be found [here](https://github.com/opendatahub-io/notebooks/tree/main/manifests). -### **Example ImageStream object definition** +### **Example ImageStream object definition** An exemplary, non-functioning ImageStream object definition that uses all the aforementioned annotations is provided below. diff --git a/docs/workbenches.md b/docs/workbenches.md index eb51cc2fe..22d79a84b 100644 --- a/docs/workbenches.md +++ b/docs/workbenches.md @@ -15,12 +15,12 @@ Open Data Hub contains the following workbench images with different variations: | Code Server | ☑ | ☒| C9S | ☒| ☒| | R Studio | ☑ | ☒| C9S | ☑ | ☒| -These notebooks are incorporated to be used in conjunction with Open Data Hub, specifically utilizing the ODH Notebook Controller as the launching platform. The table above provides insights into the characteristics of each notebook, including their availability in both ODH and RHODS environments, GPU support, and whether they are offered as runtimes ie without the JupyterLab UI. +These notebooks are incorporated to be used in conjunction with Open Data Hub, specifically utilizing the ODH Notebook Controller as the launching platform. The table above provides insights into the characteristics of each notebook, including their availability in both ODH and RHODS environments, GPU support, and whether they are offered as runtimes ie without the JupyterLab UI. All the notebooks are available on the[ Quay.io registry](https://quay.io/repository/opendatahub/workbench-images?tab=tags&tag=latest); please filter the results by using the tag "2023b" for the latest release and "2023a" for the n-1. ## Jupyter Minimal -Jupyter Minimal provides a browser-based integrated development environment where you can write, edit, and debug code using the familiar interface and features of JupyterLab. +Jupyter Minimal provides a browser-based integrated development environment where you can write, edit, and debug code using the familiar interface and features of JupyterLab. If you do not require advanced machine learning features or additional resources for compute-intensive data science work, you can use the Minimal Python image to develop your models. [2023b Packages](https://github.com/opendatahub-io/notebooks/blob/2023b/jupyter/minimal/ubi9-python-3.9/Pipfile) || [2023a Packages](https://github.com/opendatahub-io/notebooks/blob/2023a/jupyter/minimal/ubi9-python-3.9/Pipfile) @@ -30,30 +30,30 @@ If you do not require advanced machine learning features or additional resources CUDA provides a browser-based integrated development environment where you can write, edit, and debug code using the familiar interface and features of JupyterLab. If you are working with compute-intensive data science models that require GPU support, use the Compute Unified Device Architecture (CUDA) notebook image to gain access to the NVIDIA CUDA Toolkit. You can optimize your work using GPU-accelerated libraries and optimization tools using this toolkit. -[2023b Packages](https://github.com/opendatahub-io/notebooks/blob/2023b/jupyter/minimal/ubi9-python-3.9/Pipfile) || [2023a Packages](https://github.com/opendatahub-io/notebooks/blob/2023a/jupyter/minimal/ubi9-python-3.9/Pipfile) +[2023b Packages](https://github.com/opendatahub-io/notebooks/blob/2023b/jupyter/minimal/ubi9-python-3.9/Pipfile) || [2023a Packages](https://github.com/opendatahub-io/notebooks/blob/2023a/jupyter/minimal/ubi9-python-3.9/Pipfile) -## HabanaAI +## HabanaAI HabanaAI provides a browser-based integrated development environment where you can write, edit, and debug code using the familiar interface and features of JupyterLab. The HabanaAI notebook image optimizes high-performance deep learning (DL) with Habana Gaudi devices. Habana Gaudi devices accelerate DL training workloads and maximize training throughput and efficiency. -[2023b Packages](https://github.com/opendatahub-io/notebooks/blob/2023b/habana/1.11.0/ubi8-python-3.8/Pipfile) || [2023a Packages](https://github.com/opendatahub-io/notebooks/blob/2023a/habana/1.10.0/ubi8-python-3.8/Pipfile) +[2023b Packages](https://github.com/opendatahub-io/notebooks/blob/2023b/habana/1.11.0/ubi8-python-3.8/Pipfile) || [2023a Packages](https://github.com/opendatahub-io/notebooks/blob/2023a/habana/1.10.0/ubi8-python-3.8/Pipfile) ## Jupyter Data Science -Standard Data Science provides a browser-based integrated development environment where you can write, edit, and debug code using the familiar interface and features of JupyterLab. Use the Standard Data Science notebook image for models that do not require TensorFlow or PyTorch. +Standard Data Science provides a browser-based integrated development environment where you can write, edit, and debug code using the familiar interface and features of JupyterLab. Use the Standard Data Science notebook image for models that do not require TensorFlow or PyTorch. This image contains commonly used libraries to assist you in developing your machine-learning models. Furthermore, we have integrated several useful libraries and applications. Notably, we've included **Mesa-libgl**, an additional library designed for OpenCV tasks. We've also introduced **Git-lfs**, which provides an efficient solution for handling large files, such as audio samples, videos, datasets, and graphics. The integration of **unixODBC** offers a standardized API for accessing data sources, including SQL Servers and other data sources with ODBC drivers. Lastly, the addition of **Libsndfile** makes it easier to read and write files containing sampled audio data. Additionally, this notebook comes equipped with standard **database clients** for MySQL, PostgreSQL, MSSQL, and MongoDB. **NOTE:** All notebooks derived from the Jupyter Data Science Notebook inherit these libraries and applications, with the exception of the minimal and CUDA variants. [2023b Packages](https://github.com/opendatahub-io/notebooks/blob/2023b/jupyter/datascience/ubi9-python-3.9/Pipfile) || [2023a Packages](https://github.com/opendatahub-io/notebooks/blob/2023a/jupyter/datascience/ubi9-python-3.9/Pipfile) -## Jupyter Tensorflow +## Jupyter Tensorflow TensorFlow is an open-source platform for machine learning. It provides a browser-based integrated development environment where you can write, edit, and debug code using the familiar interface and features of JupyterLab. With TensorFlow, you can build, train and deploy your machine learning models. TensorFlow contains advanced data visualization features, such as computational graph visualizations. It also allows you to easily monitor and track the progress of your models. -[2023b Packages](https://github.com/opendatahub-io/notebooks/blob/2023b/jupyter/tensorflow/ubi9-python-3.9/Pipfile) || [2023a Packages](https://github.com/opendatahub-io/notebooks/blob/2023a/jupyter/tensorflow/ubi9-python-3.9/Pipfile) +[2023b Packages](https://github.com/opendatahub-io/notebooks/blob/2023b/jupyter/tensorflow/ubi9-python-3.9/Pipfile) || [2023a Packages](https://github.com/opendatahub-io/notebooks/blob/2023a/jupyter/tensorflow/ubi9-python-3.9/Pipfile) -## Jupyter PyTorch +## Jupyter PyTorch PyTorch is an open-source machine learning library optimized for deep learning. If you are working with computer vision or natural language processing models, use the Pytorch notebook image. It provides a browser-based integrated development environment where you can write, edit, and debug code using the familiar interface and features of JupyterLab. @@ -63,7 +63,7 @@ PyTorch is an open-source machine learning library optimized for deep learning. Use the TrustyAI notebook image to leverage your data science work with model explainability, tracing and accountability, and runtime monitoring. It provides a browser-based integrated development environment where you can write, edit, and debug code using the familiar interface and features of JupyterLab. -[2023b Packages](https://github.com/opendatahub-io/notebooks/blob/2023b/jupyter/trustyai/ubi9-python-3.9/Pipfile) || [2023a Packages](https://github.com/opendatahub-io/notebooks/blob/2023a/jupyter/trustyai/ubi9-python-3.9/Pipfile) +[2023b Packages](https://github.com/opendatahub-io/notebooks/blob/2023b/jupyter/trustyai/ubi9-python-3.9/Pipfile) || [2023a Packages](https://github.com/opendatahub-io/notebooks/blob/2023a/jupyter/trustyai/ubi9-python-3.9/Pipfile) ## Code Server @@ -73,13 +73,10 @@ Code Server (VS Code) provides a browser-based integrated development environmen ## R Studio -It provides a powerful integrated development environment specifically designed for R programming. By integrating R Studio IDE into ODH, you equip data analysts with a dedicated environment for exploring and manipulating data, building models, and generating insightful visualizations. Moreover, If you are working with compute-intensive data science models that require GPU support, use the CUDA R Studio notebook image to gain access to the NVIDIA CUDA Toolkit. +It provides a powerful integrated development environment specifically designed for R programming. By integrating R Studio IDE into ODH, you equip data analysts with a dedicated environment for exploring and manipulating data, building models, and generating insightful visualizations. Moreover, If you are working with compute-intensive data science models that require GPU support, use the CUDA R Studio notebook image to gain access to the NVIDIA CUDA Toolkit. - - -[Previous Page](https://github.com/opendatahub-io/notebooks/wiki) | [Next Page](https://github.com/opendatahub-io/notebooks/wiki/Developer-Guide) - +[Previous Page](https://github.com/opendatahub-io/notebooks/wiki) | [Next Page](https://github.com/opendatahub-io/notebooks/wiki/Developer-Guide) diff --git a/habana/1.10.0/ubi8-python-3.8/Dockerfile b/habana/1.10.0/ubi8-python-3.8/Dockerfile index 411573ddf..924d4c3e6 100644 --- a/habana/1.10.0/ubi8-python-3.8/Dockerfile +++ b/habana/1.10.0/ubi8-python-3.8/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # # HabanaLabs Dockerfile base installer layer for RedHat 8.6 -# Reference: https://github.com/HabanaAI/Setup_and_Install/blob/1.10.0/dockerfiles/base/Dockerfile.rhel8.6 +# Reference: https://github.com/HabanaAI/Setup_and_Install/blob/1.10.0/dockerfiles/base/Dockerfile.rhel8.6 ARG BASE_IMAGE FROM ${BASE_IMAGE} @@ -80,7 +80,7 @@ RUN dnf install -y habanalabs-rdma-core-"$VERSION"-"$REVISION".el8 \ ENV PIP_NO_CACHE_DIR=on ENV PIP_DISABLE_PIP_VERSION_CHECK=1 -# Install python packages +# Install python packages # RUN python3.8 -m pip install hpu_media_loader=="${VERSION}"."${REVISION}" # Install Python packages and Jupyterlab extensions from Pipfile.lock COPY Pipfile.lock ./ @@ -95,7 +95,7 @@ ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins ## Install habana tensorflow -## Reference: https://github.com/HabanaAI/Setup_and_Install/blob/1.10.0/dockerfiles/tensorflow/Dockerfile.rhel8.6 +## Reference: https://github.com/HabanaAI/Setup_and_Install/blob/1.10.0/dockerfiles/tensorflow/Dockerfile.rhel8.6 # For AML/CentOS/RHEL OS'es TFIO_DATAPATH have to be specified to import tensorflow_io lib correctly ENV TFIO_DATAPATH=/opt/app-root/src/python3.8/site-packages/ @@ -135,7 +135,7 @@ RUN wget --no-verbose https://"${ARTIFACTORY_URL}"/artifactory/gaudi-pt-modules/ rm -rf /root/habanalabs/pytorch_temp/ && \ rm -rf pytorch_modules-v"${PT_VERSION}"_"${VERSION}"_"${REVISION}".tgz && \ echo "source /etc/profile.d/habanalabs.sh" >> ~/.bashrc - + ENV LD_PRELOAD=/lib64/libtcmalloc.so.4 ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768 @@ -160,4 +160,4 @@ RUN sed -i -e "s/Python.*/$(python --version| cut -d '.' -f-2)\",/" /opt/app-roo USER 1001 -WORKDIR /opt/app-root/src \ No newline at end of file +WORKDIR /opt/app-root/src diff --git a/habana/1.10.0/ubi8-python-3.8/README.md b/habana/1.10.0/ubi8-python-3.8/README.md index 124f8eb4c..61b8f5a8b 100644 --- a/habana/1.10.0/ubi8-python-3.8/README.md +++ b/habana/1.10.0/ubi8-python-3.8/README.md @@ -1,8 +1,8 @@ # Habana Notebook Image -This directory contains the Dockerfile to build a Notebook image compatible with 1.10.0 Habana Version. +This directory contains the Dockerfile to build a Notebook image compatible with 1.10.0 Habana Version. -Currently supporting the support matrix: -https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix_v1.10.0.html +Currently supporting the support matrix: +https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix_v1.10.0.html | Firmware | version | @@ -19,9 +19,9 @@ https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix_v1.10.0.html | numpy | 1.23.5 | | scipy | 1.10.1 | -Reference: -https://github.com/HabanaAI/Setup_and_Install/tree/1.10.0 +Reference: +https://github.com/HabanaAI/Setup_and_Install/tree/1.10.0 -For further documentation related to HabanaAI, please refer: -https://docs.habana.ai/en/v1.10.0/Gaudi_Overview/index.html \ No newline at end of file +For further documentation related to HabanaAI, please refer: +https://docs.habana.ai/en/v1.10.0/Gaudi_Overview/index.html diff --git a/habana/1.11.0/ubi8-python-3.8/Dockerfile b/habana/1.11.0/ubi8-python-3.8/Dockerfile index 45873d215..9b4fdcf42 100644 --- a/habana/1.11.0/ubi8-python-3.8/Dockerfile +++ b/habana/1.11.0/ubi8-python-3.8/Dockerfile @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # # HabanaLabs Dockerfile base installer layer for RedHat 8.6 -# Reference: https://github.com/HabanaAI/Setup_and_Install/blob/1.11.0/dockerfiles/base/Dockerfile.rhel8.6 +# Reference: https://github.com/HabanaAI/Setup_and_Install/blob/1.11.0/dockerfiles/base/Dockerfile.rhel8.6 ARG BASE_IMAGE FROM ${BASE_IMAGE} @@ -85,7 +85,7 @@ ENV PIP_DISABLE_PIP_VERSION_CHECK=1 ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib -# Install python packages +# Install python packages # RUN python3.8 -m pip install hpu_media_loader=="${VERSION}"."${REVISION}" # Install Python packages and Jupyterlab extensions from Pipfile.lock COPY Pipfile.lock ./ @@ -164,4 +164,4 @@ RUN sed -i -e "s/Python.*/$(python --version| cut -d '.' -f-2)\",/" /opt/app-roo USER 1001 -WORKDIR /opt/app-root/src \ No newline at end of file +WORKDIR /opt/app-root/src diff --git a/habana/1.11.0/ubi8-python-3.8/README.md b/habana/1.11.0/ubi8-python-3.8/README.md index 728e56234..50d35ac4f 100644 --- a/habana/1.11.0/ubi8-python-3.8/README.md +++ b/habana/1.11.0/ubi8-python-3.8/README.md @@ -1,7 +1,7 @@ # Habana Notebook Image -This directory contains the Dockerfile to build a Notebook image compatible with 1.11.0 Habana Version. +This directory contains the Dockerfile to build a Notebook image compatible with 1.11.0 Habana Version. -Currently supporting the support matrix: +Currently supporting the support matrix: https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html | Firmware | version | @@ -18,9 +18,9 @@ https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html | numpy | 1.23.5 | | scipy | 1.10.1 | -Reference: +Reference: https://github.com/HabanaAI/Setup_and_Install/tree/1.11.0 -For further documentation related to HabanaAI, please refer: -https://docs.habana.ai/en/v1.11.0/Gaudi_Overview/index.html \ No newline at end of file +For further documentation related to HabanaAI, please refer: +https://docs.habana.ai/en/v1.11.0/Gaudi_Overview/index.html diff --git a/habana/1.9.0/ubi8-python-3.8/Dockerfile b/habana/1.9.0/ubi8-python-3.8/Dockerfile index 90bb1b31f..1e772604a 100644 --- a/habana/1.9.0/ubi8-python-3.8/Dockerfile +++ b/habana/1.9.0/ubi8-python-3.8/Dockerfile @@ -79,7 +79,7 @@ RUN dnf install -y habanalabs-thunk-"$VERSION"-"$REVISION".el8 \ ENV PIP_NO_CACHE_DIR=on ENV PIP_DISABLE_PIP_VERSION_CHECK=1 -# Install python packages +# Install python packages # RUN python3.8 -m pip install hpu_media_loader=="${VERSION}"."${REVISION}" # Install Python packages and Jupyterlab extensions from Pipfile.lock COPY Pipfile.lock ./ @@ -94,7 +94,7 @@ ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins ## Install habana pytorch -# Reference: https://github.com/HabanaAI/Setup_and_Install/blob/1.9.0/dockerfiles/Dockerfile_rhel8.6_pytorch_installer +# Reference: https://github.com/HabanaAI/Setup_and_Install/blob/1.9.0/dockerfiles/Dockerfile_rhel8.6_pytorch_installer ENV LANG=en_US.UTF-8 ENV PYTHONPATH=/root:/usr/lib/habanalabs/ @@ -149,4 +149,4 @@ RUN sed -i -e "s/Python.*/$(python --version| cut -d '.' -f-2)\",/" /opt/app-roo USER 1001 -WORKDIR /opt/app-root/src \ No newline at end of file +WORKDIR /opt/app-root/src diff --git a/habana/1.9.0/ubi8-python-3.8/README.md b/habana/1.9.0/ubi8-python-3.8/README.md index dc36235a3..8214783ba 100644 --- a/habana/1.9.0/ubi8-python-3.8/README.md +++ b/habana/1.9.0/ubi8-python-3.8/README.md @@ -1,8 +1,8 @@ # Habana Notebook Image -This directory contains the Dockerfile to build a Notebook image compatible with 1.9.0 Habana Version. +This directory contains the Dockerfile to build a Notebook image compatible with 1.9.0 Habana Version. -Currently supporting the support matrix: -https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix_v1.9.0.html +Currently supporting the support matrix: +https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix_v1.9.0.html | Firmware | version | @@ -19,9 +19,9 @@ https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix_v1.9.0.html | numpy | 1.23.5 | | scipy | 1.10.1 | -Reference: -https://github.com/HabanaAI/Setup_and_Install/tree/1.9.0 +Reference: +https://github.com/HabanaAI/Setup_and_Install/tree/1.9.0 -For further documentation related to HabanaAI, please refer: -https://docs.habana.ai/en/v1.9.0/Gaudi_Overview/index.html \ No newline at end of file +For further documentation related to HabanaAI, please refer: +https://docs.habana.ai/en/v1.9.0/Gaudi_Overview/index.html diff --git a/habana/README.md b/habana/README.md index bd8ccf5e0..58ac537fb 100644 --- a/habana/README.md +++ b/habana/README.md @@ -1,32 +1,32 @@ # Habana Notebook Image -This directory contains the Dockerfiles to build Notebook images compatible with HabanaAI Gaudi Devices. +This directory contains the Dockerfiles to build Notebook images compatible with HabanaAI Gaudi Devices. -Currently supporting the support matrix: -https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix_v1.9.0.html -https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix_v1.10.0.html -https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html +Currently supporting the support matrix: +https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix_v1.9.0.html +https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix_v1.10.0.html +https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html -### Setup Habana AI on Openshift. +### Setup Habana AI on Openshift. -The device on AWS with machine `dl1.24xlarge` has habana fireware. -With documentation for [OpenShift Enviornment](https://docs.habana.ai/en/latest/Orchestration/HabanaAI_Operator/index.html?highlight=openshift). +The device on AWS with machine `dl1.24xlarge` has habana fireware. +With documentation for [OpenShift Enviornment](https://docs.habana.ai/en/latest/Orchestration/HabanaAI_Operator/index.html?highlight=openshift). ### Utilize with OpenDatahub -User can use the Habana base notebook image with OpenDatahub, -With the [notebook manifests](../manifests/base/jupyter-habana-notebook-imagestream.yaml), +User can use the Habana base notebook image with OpenDatahub, +With the [notebook manifests](../manifests/base/jupyter-habana-notebook-imagestream.yaml), user can include the habanaAI compatible image directly to Opendatahub. -### Reference +### Reference -https://github.com/HabanaAI/Setup_and_Install/tree/1.9.0 -https://github.com/HabanaAI/Setup_and_Install/tree/1.10.0 -https://github.com/HabanaAI/Setup_and_Install/tree/1.11.0 +https://github.com/HabanaAI/Setup_and_Install/tree/1.9.0 +https://github.com/HabanaAI/Setup_and_Install/tree/1.10.0 +https://github.com/HabanaAI/Setup_and_Install/tree/1.11.0 -For further documentation related to HabanaAI, please refer: -https://docs.habana.ai/en/v1.9.0/Gaudi_Overview/index.html -https://docs.habana.ai/en/v1.10.0/Gaudi_Overview/index.html -https://docs.habana.ai/en/v1.11.0/Gaudi_Overview/index.html +For further documentation related to HabanaAI, please refer: +https://docs.habana.ai/en/v1.9.0/Gaudi_Overview/index.html +https://docs.habana.ai/en/v1.10.0/Gaudi_Overview/index.html +https://docs.habana.ai/en/v1.11.0/Gaudi_Overview/index.html diff --git a/jupyter/datascience/anaconda-python-3.8/environment.in b/jupyter/datascience/anaconda-python-3.8/environment.in index c4185f4b0..a47023359 100644 --- a/jupyter/datascience/anaconda-python-3.8/environment.in +++ b/jupyter/datascience/anaconda-python-3.8/environment.in @@ -25,4 +25,4 @@ cryptography nodejs>12.0.0 urllib3<1.26.16 elyra -kfp-tekton<1.6.0 \ No newline at end of file +kfp-tekton<1.6.0 diff --git a/jupyter/datascience/anaconda-python-3.8/jupyter_elyra_config.py b/jupyter/datascience/anaconda-python-3.8/jupyter_elyra_config.py index caa1981d0..d3343774d 100644 --- a/jupyter/datascience/anaconda-python-3.8/jupyter_elyra_config.py +++ b/jupyter/datascience/anaconda-python-3.8/jupyter_elyra_config.py @@ -2,10 +2,10 @@ # Pre-generated via `jupyter elyra --generate-config` # Editted out the rest of the content, use the above command to get additional config sections. -c = get_config() #noqa +c = get_config() # noqa -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # PipelineProcessorRegistry(SingletonConfigurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -c.PipelineProcessorRegistry.runtimes = ['kfp'] \ No newline at end of file +c.PipelineProcessorRegistry.runtimes = ["kfp"] diff --git a/jupyter/datascience/anaconda-python-3.8/pipeline-flow.svg b/jupyter/datascience/anaconda-python-3.8/pipeline-flow.svg index d265fe8bd..5617ba65a 100644 --- a/jupyter/datascience/anaconda-python-3.8/pipeline-flow.svg +++ b/jupyter/datascience/anaconda-python-3.8/pipeline-flow.svg @@ -4,4 +4,4 @@ - \ No newline at end of file + diff --git a/jupyter/datascience/anaconda-python-3.8/runtime-images/datascience-ubi8-py38.json b/jupyter/datascience/anaconda-python-3.8/runtime-images/datascience-ubi8-py38.json index af8a25f9c..bb1884db6 100644 --- a/jupyter/datascience/anaconda-python-3.8/runtime-images/datascience-ubi8-py38.json +++ b/jupyter/datascience/anaconda-python-3.8/runtime-images/datascience-ubi8-py38.json @@ -6,4 +6,4 @@ "image_name": "quay.io/opendatahub/workbench-images:runtime-datascience-ubi8-python-3.8-6a6098d" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/anaconda-python-3.8/runtime-images/pytorch-ubi8-py38.json b/jupyter/datascience/anaconda-python-3.8/runtime-images/pytorch-ubi8-py38.json index 7d0fd0dfd..36ce59efd 100644 --- a/jupyter/datascience/anaconda-python-3.8/runtime-images/pytorch-ubi8-py38.json +++ b/jupyter/datascience/anaconda-python-3.8/runtime-images/pytorch-ubi8-py38.json @@ -6,4 +6,4 @@ "image_name": "quay.io/opendatahub/workbench-images:runtime-pytorch-ubi8-python-3.8-6a6098d" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/anaconda-python-3.8/runtime-images/tensorflow-ubi8-py38.json b/jupyter/datascience/anaconda-python-3.8/runtime-images/tensorflow-ubi8-py38.json index 6aae37402..52553d02b 100644 --- a/jupyter/datascience/anaconda-python-3.8/runtime-images/tensorflow-ubi8-py38.json +++ b/jupyter/datascience/anaconda-python-3.8/runtime-images/tensorflow-ubi8-py38.json @@ -6,4 +6,4 @@ "image_name": "quay.io/opendatahub/workbench-images:runtime-cuda-tensorflow-ubi8-python-3.8-6a6098d" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/anaconda-python-3.8/runtime-images/ubi8-py38.json b/jupyter/datascience/anaconda-python-3.8/runtime-images/ubi8-py38.json index 9dcff9b8f..e6e0c5e3f 100644 --- a/jupyter/datascience/anaconda-python-3.8/runtime-images/ubi8-py38.json +++ b/jupyter/datascience/anaconda-python-3.8/runtime-images/ubi8-py38.json @@ -6,4 +6,4 @@ "image_name": "quay.io/opendatahub/workbench-images:runtime-minimal-ubi8-python-3.8-6a6098d" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/anaconda-python-3.8/setup-elyra.sh b/jupyter/datascience/anaconda-python-3.8/setup-elyra.sh index e800f3dd9..b9e62ac3b 100644 --- a/jupyter/datascience/anaconda-python-3.8/setup-elyra.sh +++ b/jupyter/datascience/anaconda-python-3.8/setup-elyra.sh @@ -46,4 +46,4 @@ fi # Environment vars set for accessing ssl_sa_certs and sa_token export KF_PIPELINES_SSL_SA_CERTS="/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" export KF_PIPELINES_SA_TOKEN_ENV="/var/run/secrets/kubernetes.io/serviceaccount/token" -export KF_PIPELINES_SA_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount/token" \ No newline at end of file +export KF_PIPELINES_SA_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount/token" diff --git a/jupyter/datascience/ubi8-python-3.8/runtime-images/datascience-ubi8-py38.json b/jupyter/datascience/ubi8-python-3.8/runtime-images/datascience-ubi8-py38.json index 6483705f8..5e28f6bf7 100644 --- a/jupyter/datascience/ubi8-python-3.8/runtime-images/datascience-ubi8-py38.json +++ b/jupyter/datascience/ubi8-python-3.8/runtime-images/datascience-ubi8-py38.json @@ -7,4 +7,4 @@ "pull_policy": "IfNotPresent" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/ubi8-python-3.8/runtime-images/pytorch-ubi8-py38.json b/jupyter/datascience/ubi8-python-3.8/runtime-images/pytorch-ubi8-py38.json index 4c5e4e402..3a5e6ee51 100644 --- a/jupyter/datascience/ubi8-python-3.8/runtime-images/pytorch-ubi8-py38.json +++ b/jupyter/datascience/ubi8-python-3.8/runtime-images/pytorch-ubi8-py38.json @@ -7,4 +7,4 @@ "pull_policy": "IfNotPresent" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/ubi8-python-3.8/runtime-images/tensorflow-ubi8-py38.json b/jupyter/datascience/ubi8-python-3.8/runtime-images/tensorflow-ubi8-py38.json index a5acb72a7..b5430f14a 100644 --- a/jupyter/datascience/ubi8-python-3.8/runtime-images/tensorflow-ubi8-py38.json +++ b/jupyter/datascience/ubi8-python-3.8/runtime-images/tensorflow-ubi8-py38.json @@ -7,4 +7,4 @@ "pull_policy": "IfNotPresent" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/ubi8-python-3.8/runtime-images/ubi8-py38.json b/jupyter/datascience/ubi8-python-3.8/runtime-images/ubi8-py38.json index 412d2b908..2fbc44d72 100644 --- a/jupyter/datascience/ubi8-python-3.8/runtime-images/ubi8-py38.json +++ b/jupyter/datascience/ubi8-python-3.8/runtime-images/ubi8-py38.json @@ -7,4 +7,4 @@ "pull_policy": "IfNotPresent" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/ubi8-python-3.8/setup-elyra.sh b/jupyter/datascience/ubi8-python-3.8/setup-elyra.sh index d9b08b397..7917c641d 100644 --- a/jupyter/datascience/ubi8-python-3.8/setup-elyra.sh +++ b/jupyter/datascience/ubi8-python-3.8/setup-elyra.sh @@ -21,4 +21,4 @@ export KF_PIPELINES_SA_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount # Environment vars set for accessing following dependencies for air-gapped enviroment export ELYRA_BOOTSTRAP_SCRIPT_URL="file:///opt/app-root/bin/utils/bootstrapper.py" export ELYRA_PIP_CONFIG_URL="file:///opt/app-root/bin/utils/pip.conf" -export ELYRA_REQUIREMENTS_URL="file:///opt/app-root/bin/utils/requirements-elyra.txt" \ No newline at end of file +export ELYRA_REQUIREMENTS_URL="file:///opt/app-root/bin/utils/requirements-elyra.txt" diff --git a/jupyter/datascience/ubi8-python-3.8/utils/component_runtime.json b/jupyter/datascience/ubi8-python-3.8/utils/component_runtime.json index a1e84196e..b79b32e67 100644 --- a/jupyter/datascience/ubi8-python-3.8/utils/component_runtime.json +++ b/jupyter/datascience/ubi8-python-3.8/utils/component_runtime.json @@ -7,4 +7,4 @@ "uihints": { "hidden": true } -} \ No newline at end of file +} diff --git a/jupyter/datascience/ubi8-python-3.8/utils/jupyter_elyra_config.py b/jupyter/datascience/ubi8-python-3.8/utils/jupyter_elyra_config.py index caa1981d0..d3343774d 100644 --- a/jupyter/datascience/ubi8-python-3.8/utils/jupyter_elyra_config.py +++ b/jupyter/datascience/ubi8-python-3.8/utils/jupyter_elyra_config.py @@ -2,10 +2,10 @@ # Pre-generated via `jupyter elyra --generate-config` # Editted out the rest of the content, use the above command to get additional config sections. -c = get_config() #noqa +c = get_config() # noqa -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # PipelineProcessorRegistry(SingletonConfigurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -c.PipelineProcessorRegistry.runtimes = ['kfp'] \ No newline at end of file +c.PipelineProcessorRegistry.runtimes = ["kfp"] diff --git a/jupyter/datascience/ubi8-python-3.8/utils/kfp_authentication.patch b/jupyter/datascience/ubi8-python-3.8/utils/kfp_authentication.patch index 3b4b776d1..b89f7b468 100644 --- a/jupyter/datascience/ubi8-python-3.8/utils/kfp_authentication.patch +++ b/jupyter/datascience/ubi8-python-3.8/utils/kfp_authentication.patch @@ -1,11 +1,11 @@ ---- a/kfp_authentication.py 2023-06-09 10:13:11.412910808 -0400 -+++ b/kfp_authentication.py 2023-06-09 10:14:39.879565175 -0400 +--- a/kfp_authentication.py 2023-06-09 10:13:11.412910808 -0400 ++++ b/kfp_authentication.py 2023-06-09 10:14:39.879565175 -0400 @@ -230,6 +230,7 @@ """ - + kf_url = urlsplit(api_endpoint)._replace(path="").geturl() + kf_pipelines_ssl_sa_cert = os.getenv("PIPELINES_SSL_SA_CERTS", None) - + # return data structure for successful requests auth_info = { @@ -239,6 +240,7 @@ @@ -14,5 +14,5 @@ "existing_token": None, # passed to KFP SDK client as "existing_token" param value + "ssl_ca_cert": kf_pipelines_ssl_sa_cert, # passed to KFP SDK Client as "ssl_ca_cert" param value } - + try: diff --git a/jupyter/datascience/ubi8-python-3.8/utils/pipeline-flow.svg b/jupyter/datascience/ubi8-python-3.8/utils/pipeline-flow.svg index d265fe8bd..5617ba65a 100644 --- a/jupyter/datascience/ubi8-python-3.8/utils/pipeline-flow.svg +++ b/jupyter/datascience/ubi8-python-3.8/utils/pipeline-flow.svg @@ -4,4 +4,4 @@ - \ No newline at end of file + diff --git a/jupyter/datascience/ubi8-python-3.8/utils/processor_kfp.patch b/jupyter/datascience/ubi8-python-3.8/utils/processor_kfp.patch index feb0d705c..2ed8f1c10 100644 --- a/jupyter/datascience/ubi8-python-3.8/utils/processor_kfp.patch +++ b/jupyter/datascience/ubi8-python-3.8/utils/processor_kfp.patch @@ -1,5 +1,5 @@ ---- a/processor_kfp.py 2023-06-09 10:17:15.659461927 -0400 -+++ b/processor_kfp.py 2023-06-09 10:16:20.062429914 -0400 +--- a/processor_kfp.py 2023-06-09 10:17:15.659461927 -0400 ++++ b/processor_kfp.py 2023-06-09 10:16:20.062429914 -0400 @@ -213,6 +213,7 @@ credentials=auth_info.get("credentials", None), existing_token=auth_info.get("existing_token", None), diff --git a/jupyter/datascience/ubi9-python-3.9/mongodb-org-6.0.repo-x86_64 b/jupyter/datascience/ubi9-python-3.9/mongodb-org-6.0.repo-x86_64 index 691865d46..9a8132ab0 100644 --- a/jupyter/datascience/ubi9-python-3.9/mongodb-org-6.0.repo-x86_64 +++ b/jupyter/datascience/ubi9-python-3.9/mongodb-org-6.0.repo-x86_64 @@ -3,4 +3,4 @@ name=MongoDB Repository baseurl=https://repo.mongodb.org/yum/redhat/9/mongodb-org/6.0/x86_64/ gpgcheck=1 enabled=1 -gpgkey=https://www.mongodb.org/static/pgp/server-6.0.asc \ No newline at end of file +gpgkey=https://www.mongodb.org/static/pgp/server-6.0.asc diff --git a/jupyter/datascience/ubi9-python-3.9/mssql-2022.repo-x86_64 b/jupyter/datascience/ubi9-python-3.9/mssql-2022.repo-x86_64 index 0f70854e5..16e708e9e 100644 --- a/jupyter/datascience/ubi9-python-3.9/mssql-2022.repo-x86_64 +++ b/jupyter/datascience/ubi9-python-3.9/mssql-2022.repo-x86_64 @@ -3,4 +3,4 @@ name=packages-microsoft-com-prod baseurl=https://packages.microsoft.com/rhel/9.0/prod/ enabled=1 gpgcheck=1 -gpgkey=https://packages.microsoft.com/keys/microsoft.asc \ No newline at end of file +gpgkey=https://packages.microsoft.com/keys/microsoft.asc diff --git a/jupyter/datascience/ubi9-python-3.9/runtime-images/pytorch-ubi9-py39.json b/jupyter/datascience/ubi9-python-3.9/runtime-images/pytorch-ubi9-py39.json index a33e23d6f..2e4d10bf8 100644 --- a/jupyter/datascience/ubi9-python-3.9/runtime-images/pytorch-ubi9-py39.json +++ b/jupyter/datascience/ubi9-python-3.9/runtime-images/pytorch-ubi9-py39.json @@ -7,4 +7,4 @@ "pull_policy": "IfNotPresent" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/ubi9-python-3.9/runtime-images/tensorflow-ubi9-py39.json b/jupyter/datascience/ubi9-python-3.9/runtime-images/tensorflow-ubi9-py39.json index 6ab4cfe24..391454fc8 100644 --- a/jupyter/datascience/ubi9-python-3.9/runtime-images/tensorflow-ubi9-py39.json +++ b/jupyter/datascience/ubi9-python-3.9/runtime-images/tensorflow-ubi9-py39.json @@ -7,4 +7,4 @@ "pull_policy": "IfNotPresent" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/ubi9-python-3.9/runtime-images/ubi9-py39.json b/jupyter/datascience/ubi9-python-3.9/runtime-images/ubi9-py39.json index 6f4357697..762e40019 100644 --- a/jupyter/datascience/ubi9-python-3.9/runtime-images/ubi9-py39.json +++ b/jupyter/datascience/ubi9-python-3.9/runtime-images/ubi9-py39.json @@ -7,4 +7,4 @@ "pull_policy": "IfNotPresent" }, "schema_name": "runtime-image" -} \ No newline at end of file +} diff --git a/jupyter/datascience/ubi9-python-3.9/setup-elyra.sh b/jupyter/datascience/ubi9-python-3.9/setup-elyra.sh index d9b08b397..7917c641d 100644 --- a/jupyter/datascience/ubi9-python-3.9/setup-elyra.sh +++ b/jupyter/datascience/ubi9-python-3.9/setup-elyra.sh @@ -21,4 +21,4 @@ export KF_PIPELINES_SA_TOKEN_PATH="/var/run/secrets/kubernetes.io/serviceaccount # Environment vars set for accessing following dependencies for air-gapped enviroment export ELYRA_BOOTSTRAP_SCRIPT_URL="file:///opt/app-root/bin/utils/bootstrapper.py" export ELYRA_PIP_CONFIG_URL="file:///opt/app-root/bin/utils/pip.conf" -export ELYRA_REQUIREMENTS_URL="file:///opt/app-root/bin/utils/requirements-elyra.txt" \ No newline at end of file +export ELYRA_REQUIREMENTS_URL="file:///opt/app-root/bin/utils/requirements-elyra.txt" diff --git a/jupyter/datascience/ubi9-python-3.9/utils/component_runtime.json b/jupyter/datascience/ubi9-python-3.9/utils/component_runtime.json index a1e84196e..b79b32e67 100644 --- a/jupyter/datascience/ubi9-python-3.9/utils/component_runtime.json +++ b/jupyter/datascience/ubi9-python-3.9/utils/component_runtime.json @@ -7,4 +7,4 @@ "uihints": { "hidden": true } -} \ No newline at end of file +} diff --git a/jupyter/datascience/ubi9-python-3.9/utils/jupyter_elyra_config.py b/jupyter/datascience/ubi9-python-3.9/utils/jupyter_elyra_config.py index caa1981d0..d3343774d 100644 --- a/jupyter/datascience/ubi9-python-3.9/utils/jupyter_elyra_config.py +++ b/jupyter/datascience/ubi9-python-3.9/utils/jupyter_elyra_config.py @@ -2,10 +2,10 @@ # Pre-generated via `jupyter elyra --generate-config` # Editted out the rest of the content, use the above command to get additional config sections. -c = get_config() #noqa +c = get_config() # noqa -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ # PipelineProcessorRegistry(SingletonConfigurable) configuration -#------------------------------------------------------------------------------ +# ------------------------------------------------------------------------------ -c.PipelineProcessorRegistry.runtimes = ['kfp'] \ No newline at end of file +c.PipelineProcessorRegistry.runtimes = ["kfp"] diff --git a/jupyter/datascience/ubi9-python-3.9/utils/kfp_authentication.patch b/jupyter/datascience/ubi9-python-3.9/utils/kfp_authentication.patch index 3b4b776d1..b89f7b468 100644 --- a/jupyter/datascience/ubi9-python-3.9/utils/kfp_authentication.patch +++ b/jupyter/datascience/ubi9-python-3.9/utils/kfp_authentication.patch @@ -1,11 +1,11 @@ ---- a/kfp_authentication.py 2023-06-09 10:13:11.412910808 -0400 -+++ b/kfp_authentication.py 2023-06-09 10:14:39.879565175 -0400 +--- a/kfp_authentication.py 2023-06-09 10:13:11.412910808 -0400 ++++ b/kfp_authentication.py 2023-06-09 10:14:39.879565175 -0400 @@ -230,6 +230,7 @@ """ - + kf_url = urlsplit(api_endpoint)._replace(path="").geturl() + kf_pipelines_ssl_sa_cert = os.getenv("PIPELINES_SSL_SA_CERTS", None) - + # return data structure for successful requests auth_info = { @@ -239,6 +240,7 @@ @@ -14,5 +14,5 @@ "existing_token": None, # passed to KFP SDK client as "existing_token" param value + "ssl_ca_cert": kf_pipelines_ssl_sa_cert, # passed to KFP SDK Client as "ssl_ca_cert" param value } - + try: diff --git a/jupyter/datascience/ubi9-python-3.9/utils/pipeline-flow.svg b/jupyter/datascience/ubi9-python-3.9/utils/pipeline-flow.svg index d265fe8bd..5617ba65a 100644 --- a/jupyter/datascience/ubi9-python-3.9/utils/pipeline-flow.svg +++ b/jupyter/datascience/ubi9-python-3.9/utils/pipeline-flow.svg @@ -4,4 +4,4 @@ - \ No newline at end of file + diff --git a/jupyter/datascience/ubi9-python-3.9/utils/processor_kfp.patch b/jupyter/datascience/ubi9-python-3.9/utils/processor_kfp.patch index 2b8353a35..bc85ffaa3 100644 --- a/jupyter/datascience/ubi9-python-3.9/utils/processor_kfp.patch +++ b/jupyter/datascience/ubi9-python-3.9/utils/processor_kfp.patch @@ -1,5 +1,5 @@ ---- a/processor_kfp.py 2023-06-09 10:19:08.882563609 -0400 -+++ b/processor_kfp.py 2023-07-13 19:31:43.572407879 -0400 +--- a/processor_kfp.py 2023-06-09 10:19:08.882563609 -0400 ++++ b/processor_kfp.py 2023-07-13 19:31:43.572407879 -0400 @@ -213,6 +213,7 @@ credentials=auth_info.get("credentials", None), existing_token=auth_info.get("existing_token", None), @@ -9,25 +9,25 @@ else: client = ArgoClient( @@ -416,7 +417,7 @@ - + # create pipeline run (or specified pipeline version) run = client.run_pipeline( - experiment_id=experiment.id, job_name=job_name, pipeline_id=pipeline_id, version_id=version_id + experiment_id=experiment.id, job_name=job_name, pipeline_id=pipeline_id, version_id=pipeline_id ) - + except Exception as ex: @@ -435,7 +436,7 @@ - + self.log_pipeline_info( pipeline_name, - f"pipeline submitted: {public_api_endpoint}/#/runs/details/{run.id}", + f"pipeline submitted: {public_api_endpoint}/{run.id}", duration=time.time() - t0, ) - + @@ -451,7 +452,7 @@ - + return KfpPipelineProcessorResponse( run_id=run.id, - run_url=f"{public_api_endpoint}/#/runs/details/{run.id}", @@ -35,4 +35,4 @@ object_storage_url=object_storage_url, object_storage_path=object_storage_path, - ) \ No newline at end of file + ) diff --git a/jupyter/minimal/ubi8-python-3.8/Dockerfile b/jupyter/minimal/ubi8-python-3.8/Dockerfile index e8c3d4c9b..c75bfa383 100644 --- a/jupyter/minimal/ubi8-python-3.8/Dockerfile +++ b/jupyter/minimal/ubi8-python-3.8/Dockerfile @@ -30,4 +30,3 @@ WORKDIR /opt/app-root/src RUN sed -i -e "s/Python.*/$(python --version | cut -d '.' -f-2)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json ENTRYPOINT ["start-notebook.sh"] - diff --git a/jupyter/minimal/ubi9-python-3.9/Dockerfile b/jupyter/minimal/ubi9-python-3.9/Dockerfile index 94a88473d..d5bafcc81 100644 --- a/jupyter/minimal/ubi9-python-3.9/Dockerfile +++ b/jupyter/minimal/ubi9-python-3.9/Dockerfile @@ -30,4 +30,3 @@ WORKDIR /opt/app-root/src RUN sed -i -e "s/Python.*/$(python --version | cut -d '.' -f-2)\",/" /opt/app-root/share/jupyter/kernels/python3/kernel.json ENTRYPOINT ["start-notebook.sh"] - diff --git a/jupyter/trustyai/ubi9-python-3.9/Pipfile b/jupyter/trustyai/ubi9-python-3.9/Pipfile index e85e1b655..90f1b5c32 100644 --- a/jupyter/trustyai/ubi9-python-3.9/Pipfile +++ b/jupyter/trustyai/ubi9-python-3.9/Pipfile @@ -49,4 +49,3 @@ setuptools = "~=68.1.2" [requires] python_version = "3.9" - diff --git a/manifests/overlays/additional/README.md b/manifests/overlays/additional/README.md index 38bba857c..941a8e983 100644 --- a/manifests/overlays/additional/README.md +++ b/manifests/overlays/additional/README.md @@ -6,4 +6,4 @@ The source build files for any notebook in this repo must exist under the [opend Notebooks: === -- None \ No newline at end of file +- None diff --git a/rstudio/c9s-python-3.9/Dockerfile b/rstudio/c9s-python-3.9/Dockerfile index 3667ba8e1..c647bdee1 100644 --- a/rstudio/c9s-python-3.9/Dockerfile +++ b/rstudio/c9s-python-3.9/Dockerfile @@ -45,7 +45,7 @@ RUN chmod 1777 /var/run/rstudio-server && \ mkdir -p /usr/share/doc/R COPY rsession.conf /etc/rstudio/rsession.conf -# Install NGINX to proxy RStudio and pass probes check +# Install NGINX to proxy RStudio and pass probes check ENV NGINX_VERSION=1.22 \ NGINX_SHORT_VER=122 \ NGINX_CONFIGURATION_PATH=${APP_ROOT}/etc/nginx.d \ diff --git a/rstudio/c9s-python-3.9/nginx/api/kernels/access.cgi b/rstudio/c9s-python-3.9/nginx/api/kernels/access.cgi index 35a70e03e..e1d5d58aa 100755 --- a/rstudio/c9s-python-3.9/nginx/api/kernels/access.cgi +++ b/rstudio/c9s-python-3.9/nginx/api/kernels/access.cgi @@ -12,4 +12,4 @@ if [[ $(date -d $LAST_ACTIVITY"+10 minutes" +%s) -lt $(date +%s) ]]; then sed s/busy/idle/ <<<"$LOG_TAIL" else echo $LOG_TAIL -fi \ No newline at end of file +fi diff --git a/rstudio/c9s-python-3.9/nginx/api/probe.cgi b/rstudio/c9s-python-3.9/nginx/api/probe.cgi index 2d0df7396..2597513e6 100755 --- a/rstudio/c9s-python-3.9/nginx/api/probe.cgi +++ b/rstudio/c9s-python-3.9/nginx/api/probe.cgi @@ -9,4 +9,4 @@ else echo "Content-type: text/html" echo "" echo "RServer is not running!" -fi \ No newline at end of file +fi diff --git a/rstudio/c9s-python-3.9/nginx/root/usr/share/container-scripts/nginx/common.sh b/rstudio/c9s-python-3.9/nginx/root/usr/share/container-scripts/nginx/common.sh index 319219cd2..286a1f1de 100755 --- a/rstudio/c9s-python-3.9/nginx/root/usr/share/container-scripts/nginx/common.sh +++ b/rstudio/c9s-python-3.9/nginx/root/usr/share/container-scripts/nginx/common.sh @@ -23,9 +23,9 @@ function process_extending_files() { # Custom file is prefered if [ -f $custom_dir/$filename ]; then source $custom_dir/$filename - elif [ -f $default_dir/$filename ]; then + elif [ -f $default_dir/$filename ]; then source $default_dir/$filename fi fi done <<<"$(get_matched_files "$custom_dir" "$default_dir" '*.sh' | sort -u)" -} \ No newline at end of file +} diff --git a/rstudio/c9s-python-3.9/rsession.sh b/rstudio/c9s-python-3.9/rsession.sh index 55b15ce8a..8db6d4456 100755 --- a/rstudio/c9s-python-3.9/rsession.sh +++ b/rstudio/c9s-python-3.9/rsession.sh @@ -1,3 +1,3 @@ #!/bin/bash -/usr/lib/rstudio-server/bin/rsession "$@" \ No newline at end of file +/usr/lib/rstudio-server/bin/rsession "$@" diff --git a/rstudio/c9s-python-3.9/run-nginx.sh b/rstudio/c9s-python-3.9/run-nginx.sh index 5d0135fa8..ca8207a0c 100755 --- a/rstudio/c9s-python-3.9/run-nginx.sh +++ b/rstudio/c9s-python-3.9/run-nginx.sh @@ -23,4 +23,4 @@ else envsubst '${BASE_URL}' < /etc/nginx/nginx.conf | tee /etc/nginx/nginx.conf fi -nginx \ No newline at end of file +nginx diff --git a/rstudio/c9s-python-3.9/run-rstudio.sh b/rstudio/c9s-python-3.9/run-rstudio.sh index d30ec9b30..a0c0edd13 100755 --- a/rstudio/c9s-python-3.9/run-rstudio.sh +++ b/rstudio/c9s-python-3.9/run-rstudio.sh @@ -6,7 +6,7 @@ source ${SCRIPT_DIR}/utils/*.sh # Start nginx and fastcgiwrap run-nginx.sh & -spawn-fcgi -s /var/run/fcgiwrap.socket -M 766 /usr/sbin/fcgiwrap +spawn-fcgi -s /var/run/fcgiwrap.socket -M 766 /usr/sbin/fcgiwrap # Add .bashrc for custom promt if not present diff --git a/rstudio/c9s-python-3.9/setup_rstudio.py b/rstudio/c9s-python-3.9/setup_rstudio.py index c6d3c0d96..169be05fe 100644 --- a/rstudio/c9s-python-3.9/setup_rstudio.py +++ b/rstudio/c9s-python-3.9/setup_rstudio.py @@ -4,15 +4,15 @@ import subprocess import tempfile from textwrap import dedent -from urllib.parse import urlparse, urlunparse + def get_rstudio_executable(prog): # Find prog in known locations other_paths = [ # When rstudio-server deb is installed - os.path.join('/usr/lib/rstudio-server/bin', prog), + os.path.join("/usr/lib/rstudio-server/bin", prog), # When just rstudio deb is installed - os.path.join('/usr/lib/rstudio/bin', prog), + os.path.join("/usr/lib/rstudio/bin", prog), ] if shutil.which(prog): return shutil.which(prog) @@ -21,31 +21,36 @@ def get_rstudio_executable(prog): if os.path.exists(op): return op - raise FileNotFoundError(f'Could not find {prog} in PATH') + raise FileNotFoundError(f"Could not find {prog} in PATH") + def db_config(db_dir): - ''' + """ Create a temporary directory to hold rserver's database, and create the configuration file rserver uses to find the database. https://docs.rstudio.com/ide/server-pro/latest/database.html https://github.com/rstudio/rstudio/tree/v1.4.1103/src/cpp/server/db - ''' + """ # create the rserver database config - db_conf = dedent(""" + db_conf = dedent( + """ provider=sqlite directory={directory} - """).format(directory=db_dir) - f = tempfile.NamedTemporaryFile(mode='w', delete=False, dir=db_dir) + """ + ).format(directory=db_dir) + f = tempfile.NamedTemporaryFile(mode="w", delete=False, dir=db_dir) db_config_name = f.name f.write(db_conf) f.close() return db_config_name + def _support_arg(arg): - ret = subprocess.check_output([get_rstudio_executable('rserver'), '--help']) + ret = subprocess.check_output([get_rstudio_executable("rserver"), "--help"]) return ret.decode().find(arg) != -1 + def _get_cmd(port): ntf = tempfile.NamedTemporaryFile() @@ -55,29 +60,30 @@ def _get_cmd(port): database_config_file = db_config(server_data_dir) cmd = [ - get_rstudio_executable('rserver'), - '--server-daemonize=0', - '--server-working-dir=' + os.getenv('HOME'), - '--auth-none=1', - '--www-frame-origin=same', - #'--www-address=0.0.0.0', - '--www-port=' + str(port), - '--www-verify-user-agent=0', - '--rsession-which-r=' + get_rstudio_executable('R'), - '--secure-cookie-key-file=' + ntf.name, - '--server-user=' + getpass.getuser(), - '--rsession-path=/opt/app-root/bin/rsession.sh', + get_rstudio_executable("rserver"), + "--server-daemonize=0", + "--server-working-dir=" + os.getenv("HOME"), + "--auth-none=1", + "--www-frame-origin=same", + # '--www-address=0.0.0.0', + "--www-port=" + str(port), + "--www-verify-user-agent=0", + "--rsession-which-r=" + get_rstudio_executable("R"), + "--secure-cookie-key-file=" + ntf.name, + "--server-user=" + getpass.getuser(), + "--rsession-path=/opt/app-root/bin/rsession.sh", ] # Support at least v1.2.1335 and up - #if _support_arg('www-root-path'): + # if _support_arg('www-root-path'): # cmd.append('--www-root-path=/rstudio/') - if _support_arg('server-data-dir'): - cmd.append(f'--server-data-dir={server_data_dir}') - if _support_arg('database-config-file'): - cmd.append(f'--database-config-file={database_config_file}') - - return(' '.join(cmd)) + if _support_arg("server-data-dir"): + cmd.append(f"--server-data-dir={server_data_dir}") + if _support_arg("database-config-file"): + cmd.append(f"--database-config-file={database_config_file}") + + return " ".join(cmd) + if __name__ == "__main__": print(_get_cmd(8787)) diff --git a/runtimes/datascience/ubi8-python-3.8/utils/bootstrapper.py b/runtimes/datascience/ubi8-python-3.8/utils/bootstrapper.py index 03680b065..e210a4312 100644 --- a/runtimes/datascience/ubi8-python-3.8/utils/bootstrapper.py +++ b/runtimes/datascience/ubi8-python-3.8/utils/bootstrapper.py @@ -1,3 +1,4 @@ +"""Boot Strap script to install and result notebook commands as scripts.""" # Copied from: https://github.com/elyra-ai/elyra/blob/main/elyra/kfp/bootstrapper.py # # Copyright 2018-2023 Elyra Authors @@ -52,7 +53,7 @@ class FileOpBase(ABC): - """Abstract base class for file-based operations""" + """Abstract base class for file-based operations.""" filepath = None cos_client = None @@ -60,7 +61,7 @@ class FileOpBase(ABC): @classmethod def get_instance(cls: Type[F], **kwargs: Any) -> F: - """Creates an appropriate subclass instance based on the extension of the filepath (-f) argument""" + """Creates an appropriate subclass instance based on the extension of the filepath (-f) argument.""" filepath = kwargs["filepath"] if ".ipynb" in filepath: return NotebookFileOp(**kwargs) @@ -72,7 +73,7 @@ def get_instance(cls: Type[F], **kwargs: Any) -> F: raise ValueError(f"Unsupported file type: {filepath}") def __init__(self, **kwargs: Any) -> None: - """Initializes the FileOpBase instance""" + """Initializes the FileOpBase instance.""" import minio from minio.credentials import providers @@ -82,7 +83,9 @@ def __init__(self, **kwargs: Any) -> None: self.cos_bucket = self.input_params.get("cos-bucket") self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) + self.pipeline_param_dict = self.convert_param_str_to_dict( + self.input_params.get("pipeline_parameters") + ) # Infer secure from the endpoint's scheme. self.secure = self.cos_endpoint.scheme == "https" @@ -93,9 +96,13 @@ def __init__(self, **kwargs: Any) -> None: access_key=self.input_params.get("cos-user"), secret_key=self.input_params.get("cos-password"), ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: + elif ( + "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ + ): cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: + elif ( + "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ + ): cred_provider = providers.IamAwsProvider() else: raise RuntimeError( @@ -104,12 +111,16 @@ def __init__(self, **kwargs: Any) -> None: ) # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) + self.cos_client = minio.Minio( + self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider + ) @abstractmethod def execute(self) -> None: - """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") + """Execute the operation relative to derived class.""" + raise NotImplementedError( + "Method 'execute()' must be implemented by subclasses!" + ) def process_dependencies(self) -> None: """Process dependencies @@ -180,7 +191,9 @@ def process_metrics_and_metadata(self) -> None: # output_path doesn't meet the requirements # treat this as a non-fatal error and log a warning logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) + OpUtil.log_operation_info( + "Aborted metrics and metadata processing", time.time() - t0 + ) return # Name of the proprietary KFP UI metadata file. @@ -225,11 +238,15 @@ def process_metrics_and_metadata(self) -> None: except ValueError as ve: # The file content could not be parsed. Log a warning # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") + logger.warning( + f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}" + ) except Exception as ex: # Something is wrong with the user-generated metadata file. # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") + logger.warning( + f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}" + ) # # Augment kfp_ui_metadata_filename with Elyra-specific information: @@ -245,13 +262,16 @@ def process_metrics_and_metadata(self) -> None: metadata = {} # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): + if metadata.get("outputs", None) is None or not isinstance( + metadata["outputs"], list + ): metadata["outputs"] = [] # Define HREF for COS bucket: # // bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" + urlunparse(self.cos_endpoint), + f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/", ) # add Elyra metadata to 'outputs' @@ -292,13 +312,20 @@ def get_file_from_object_storage(self, file_to_get: str) -> None: object_to_get = self.get_object_storage_filename(file_to_get) t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) + self.cos_client.fget_object( + bucket_name=self.cos_bucket, + object_name=object_to_get, + file_path=file_to_get, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration + f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", + duration, ) - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: + def put_file_to_object_storage( + self, file_to_upload: str, object_name: Optional[str] = None + ) -> None: """Utility function to put files into an object storage :param file_to_upload: filename @@ -311,10 +338,15 @@ def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[ object_to_upload = self.get_object_storage_filename(object_to_upload) t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) + self.cos_client.fput_object( + bucket_name=self.cos_bucket, + object_name=object_to_upload, + file_path=file_to_upload, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration + f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", + duration, ) def has_wildcard(self, filename): @@ -335,14 +367,18 @@ def process_output_file(self, output_file): else: self.put_file_to_object_storage(matched_file) - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: + def convert_param_str_to_dict( + self, pipeline_parameters: Optional[str] = None + ) -> Dict[str, Any]: """Convert INOUT-separated string of pipeline parameters into a dictionary.""" parameter_dict = {} if pipeline_parameters: parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) for parameter in parameter_list: param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): + if self.parameter_pass_method == "env" and ( + not value or not isinstance(value, str) + ): continue # env vars must be non-empty strings parameter_dict[param_name] = value return parameter_dict @@ -366,7 +402,9 @@ def execute(self) -> None: notebook_html = f"{notebook_name}.html" try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") + OpUtil.log_operation_info( + f"executing notebook using 'papermill {notebook} {notebook_output}'" + ) t0 = time.time() # Include kernel selection in execution time kernel_name = NotebookFileOp.find_best_kernel(notebook) @@ -377,7 +415,9 @@ def execute(self) -> None: import papermill - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) + papermill.execute_notebook( + notebook, notebook_output, kernel_name=kernel_name, **kwargs + ) duration = time.time() - t0 OpUtil.log_operation_info("notebook execution completed", duration) @@ -485,7 +525,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(python_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("python script execution completed", duration) @@ -511,7 +553,9 @@ def execute(self) -> None: r_script_output = f"{r_script_name}.log" try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}' to '{r_script_output}'") + OpUtil.log_operation_info( + f"executing R script using 'Rscript {r_script}' to '{r_script_output}'" + ) t0 = time.time() run_args = ["Rscript", r_script] @@ -519,7 +563,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(r_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("R script execution completed", duration) @@ -558,7 +604,9 @@ def package_install(cls, user_volume_path) -> None: continue try: version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant + except ( + version.InvalidVersion + ): # current version is not PEP-440 compliant logger.warning( f"WARNING: Source package '{package}' found already installed from " f"{current_packages[package]}. This may conflict with the required " @@ -566,7 +614,9 @@ def package_install(cls, user_volume_path) -> None: ) continue if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") + logger.info( + f"Updating {package} package from version {current_packages[package]} to {ver}..." + ) to_install_list.append(f"{package}=={ver}") elif version.Version(ver) < version.Version(current_packages[package]): logger.info( @@ -574,7 +624,9 @@ def package_install(cls, user_volume_path) -> None: f"already installed. Skipping..." ) else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") + logger.info( + f"Package not found. Installing {package} package with version {ver}..." + ) to_install_list.append(f"{package}=={ver}") if to_install_list: @@ -582,7 +634,9 @@ def package_install(cls, user_volume_path) -> None: to_install_list.insert(0, f"--target={user_volume_path}") to_install_list.append("--no-cache-dir") - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) + subprocess.run( + [sys.executable, "-m", "pip", "install"] + to_install_list, check=True + ) if user_volume_path: os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" @@ -609,16 +663,28 @@ def package_list_to_dict(cls, filename: str) -> dict: for line in fh: if line[0] != "#": if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") + package_name, package_version = line.strip("\n").split( + sep=" @ " + ) elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") + package_name, package_version = line.strip("\n").split( + sep="===" + ) elif "==" in line: package_name, package_version = line.strip("\n").split(sep="==") elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system + package_name = ( + line.strip("\n") + .replace("-e ", "") + .replace("--editable ", "") + ) + if ( + "#egg=" in package_name + ): # editable package from version control system package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory + elif ( + "/" in package_name + ): # editable package from local directory package_name = os.path.basename(package_name) package_version = None else: @@ -638,10 +704,18 @@ def parse_arguments(cls, args) -> dict: logger.debug("Parsing Arguments.....") parser = argparse.ArgumentParser() parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True + "-e", + "--cos-endpoint", + dest="cos-endpoint", + help="Cloud object storage endpoint", + required=True, ) parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True + "-b", + "--cos-bucket", + dest="cos-bucket", + help="Cloud object storage bucket to use", + required=True, ) parser.add_argument( "-d", @@ -657,9 +731,23 @@ def parse_arguments(cls, args) -> dict: help="Archive containing notebook and dependency artifacts", required=True, ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) + parser.add_argument( + "-f", "--file", dest="filepath", help="File to execute", required=True + ) + parser.add_argument( + "-o", + "--outputs", + dest="outputs", + help="Files to output to object store", + required=False, + ) + parser.add_argument( + "-i", + "--inputs", + dest="inputs", + help="Files to pull in from parent node", + required=False, + ) parser.add_argument( "-p", "--user-volume-path", @@ -694,12 +782,16 @@ def parse_arguments(cls, args) -> dict: # set pipeline name as global pipeline_name = parsed_args.get("pipeline-name") # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) + operation_name = os.path.basename( + os.path.splitext(parsed_args.get("filepath"))[0] + ) return parsed_args @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: + def log_operation_info( + cls, action_clause: str, duration_secs: Optional[float] = None + ) -> None: """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated @@ -715,13 +807,17 @@ def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = global pipeline_name, operation_name if enable_pipeline_info: duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") + logger.info( + f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}" + ) def main(): # Configure logger format, level logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG + format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", + datefmt="%H:%M:%S", + level=logging.DEBUG, ) # Setup packages and gather arguments input_params = OpUtil.parse_arguments(sys.argv[1:]) @@ -744,4 +840,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/runtimes/datascience/ubi9-python-3.9/utils/bootstrapper.py b/runtimes/datascience/ubi9-python-3.9/utils/bootstrapper.py index 03680b065..494f7b7dd 100644 --- a/runtimes/datascience/ubi9-python-3.9/utils/bootstrapper.py +++ b/runtimes/datascience/ubi9-python-3.9/utils/bootstrapper.py @@ -82,7 +82,9 @@ def __init__(self, **kwargs: Any) -> None: self.cos_bucket = self.input_params.get("cos-bucket") self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) + self.pipeline_param_dict = self.convert_param_str_to_dict( + self.input_params.get("pipeline_parameters") + ) # Infer secure from the endpoint's scheme. self.secure = self.cos_endpoint.scheme == "https" @@ -93,9 +95,13 @@ def __init__(self, **kwargs: Any) -> None: access_key=self.input_params.get("cos-user"), secret_key=self.input_params.get("cos-password"), ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: + elif ( + "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ + ): cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: + elif ( + "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ + ): cred_provider = providers.IamAwsProvider() else: raise RuntimeError( @@ -104,12 +110,16 @@ def __init__(self, **kwargs: Any) -> None: ) # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) + self.cos_client = minio.Minio( + self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider + ) @abstractmethod def execute(self) -> None: """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") + raise NotImplementedError( + "Method 'execute()' must be implemented by subclasses!" + ) def process_dependencies(self) -> None: """Process dependencies @@ -180,7 +190,9 @@ def process_metrics_and_metadata(self) -> None: # output_path doesn't meet the requirements # treat this as a non-fatal error and log a warning logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) + OpUtil.log_operation_info( + "Aborted metrics and metadata processing", time.time() - t0 + ) return # Name of the proprietary KFP UI metadata file. @@ -225,11 +237,15 @@ def process_metrics_and_metadata(self) -> None: except ValueError as ve: # The file content could not be parsed. Log a warning # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") + logger.warning( + f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}" + ) except Exception as ex: # Something is wrong with the user-generated metadata file. # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") + logger.warning( + f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}" + ) # # Augment kfp_ui_metadata_filename with Elyra-specific information: @@ -245,13 +261,16 @@ def process_metrics_and_metadata(self) -> None: metadata = {} # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): + if metadata.get("outputs", None) is None or not isinstance( + metadata["outputs"], list + ): metadata["outputs"] = [] # Define HREF for COS bucket: # // bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" + urlunparse(self.cos_endpoint), + f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/", ) # add Elyra metadata to 'outputs' @@ -292,13 +311,20 @@ def get_file_from_object_storage(self, file_to_get: str) -> None: object_to_get = self.get_object_storage_filename(file_to_get) t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) + self.cos_client.fget_object( + bucket_name=self.cos_bucket, + object_name=object_to_get, + file_path=file_to_get, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration + f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", + duration, ) - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: + def put_file_to_object_storage( + self, file_to_upload: str, object_name: Optional[str] = None + ) -> None: """Utility function to put files into an object storage :param file_to_upload: filename @@ -311,10 +337,15 @@ def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[ object_to_upload = self.get_object_storage_filename(object_to_upload) t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) + self.cos_client.fput_object( + bucket_name=self.cos_bucket, + object_name=object_to_upload, + file_path=file_to_upload, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration + f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", + duration, ) def has_wildcard(self, filename): @@ -335,14 +366,18 @@ def process_output_file(self, output_file): else: self.put_file_to_object_storage(matched_file) - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: + def convert_param_str_to_dict( + self, pipeline_parameters: Optional[str] = None + ) -> Dict[str, Any]: """Convert INOUT-separated string of pipeline parameters into a dictionary.""" parameter_dict = {} if pipeline_parameters: parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) for parameter in parameter_list: param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): + if self.parameter_pass_method == "env" and ( + not value or not isinstance(value, str) + ): continue # env vars must be non-empty strings parameter_dict[param_name] = value return parameter_dict @@ -366,7 +401,9 @@ def execute(self) -> None: notebook_html = f"{notebook_name}.html" try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") + OpUtil.log_operation_info( + f"executing notebook using 'papermill {notebook} {notebook_output}'" + ) t0 = time.time() # Include kernel selection in execution time kernel_name = NotebookFileOp.find_best_kernel(notebook) @@ -377,7 +414,9 @@ def execute(self) -> None: import papermill - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) + papermill.execute_notebook( + notebook, notebook_output, kernel_name=kernel_name, **kwargs + ) duration = time.time() - t0 OpUtil.log_operation_info("notebook execution completed", duration) @@ -485,7 +524,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(python_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("python script execution completed", duration) @@ -511,7 +552,9 @@ def execute(self) -> None: r_script_output = f"{r_script_name}.log" try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}' to '{r_script_output}'") + OpUtil.log_operation_info( + f"executing R script using 'Rscript {r_script}' to '{r_script_output}'" + ) t0 = time.time() run_args = ["Rscript", r_script] @@ -519,7 +562,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(r_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("R script execution completed", duration) @@ -558,7 +603,9 @@ def package_install(cls, user_volume_path) -> None: continue try: version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant + except ( + version.InvalidVersion + ): # current version is not PEP-440 compliant logger.warning( f"WARNING: Source package '{package}' found already installed from " f"{current_packages[package]}. This may conflict with the required " @@ -566,7 +613,9 @@ def package_install(cls, user_volume_path) -> None: ) continue if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") + logger.info( + f"Updating {package} package from version {current_packages[package]} to {ver}..." + ) to_install_list.append(f"{package}=={ver}") elif version.Version(ver) < version.Version(current_packages[package]): logger.info( @@ -574,7 +623,9 @@ def package_install(cls, user_volume_path) -> None: f"already installed. Skipping..." ) else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") + logger.info( + f"Package not found. Installing {package} package with version {ver}..." + ) to_install_list.append(f"{package}=={ver}") if to_install_list: @@ -582,7 +633,9 @@ def package_install(cls, user_volume_path) -> None: to_install_list.insert(0, f"--target={user_volume_path}") to_install_list.append("--no-cache-dir") - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) + subprocess.run( + [sys.executable, "-m", "pip", "install"] + to_install_list, check=True + ) if user_volume_path: os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" @@ -609,16 +662,28 @@ def package_list_to_dict(cls, filename: str) -> dict: for line in fh: if line[0] != "#": if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") + package_name, package_version = line.strip("\n").split( + sep=" @ " + ) elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") + package_name, package_version = line.strip("\n").split( + sep="===" + ) elif "==" in line: package_name, package_version = line.strip("\n").split(sep="==") elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system + package_name = ( + line.strip("\n") + .replace("-e ", "") + .replace("--editable ", "") + ) + if ( + "#egg=" in package_name + ): # editable package from version control system package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory + elif ( + "/" in package_name + ): # editable package from local directory package_name = os.path.basename(package_name) package_version = None else: @@ -638,10 +703,18 @@ def parse_arguments(cls, args) -> dict: logger.debug("Parsing Arguments.....") parser = argparse.ArgumentParser() parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True + "-e", + "--cos-endpoint", + dest="cos-endpoint", + help="Cloud object storage endpoint", + required=True, ) parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True + "-b", + "--cos-bucket", + dest="cos-bucket", + help="Cloud object storage bucket to use", + required=True, ) parser.add_argument( "-d", @@ -657,9 +730,23 @@ def parse_arguments(cls, args) -> dict: help="Archive containing notebook and dependency artifacts", required=True, ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) + parser.add_argument( + "-f", "--file", dest="filepath", help="File to execute", required=True + ) + parser.add_argument( + "-o", + "--outputs", + dest="outputs", + help="Files to output to object store", + required=False, + ) + parser.add_argument( + "-i", + "--inputs", + dest="inputs", + help="Files to pull in from parent node", + required=False, + ) parser.add_argument( "-p", "--user-volume-path", @@ -694,12 +781,16 @@ def parse_arguments(cls, args) -> dict: # set pipeline name as global pipeline_name = parsed_args.get("pipeline-name") # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) + operation_name = os.path.basename( + os.path.splitext(parsed_args.get("filepath"))[0] + ) return parsed_args @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: + def log_operation_info( + cls, action_clause: str, duration_secs: Optional[float] = None + ) -> None: """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated @@ -715,13 +806,17 @@ def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = global pipeline_name, operation_name if enable_pipeline_info: duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") + logger.info( + f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}" + ) def main(): # Configure logger format, level logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG + format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", + datefmt="%H:%M:%S", + level=logging.DEBUG, ) # Setup packages and gather arguments input_params = OpUtil.parse_arguments(sys.argv[1:]) @@ -744,4 +839,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/runtimes/minimal/ubi8-python-3.8/utils/bootstrapper.py b/runtimes/minimal/ubi8-python-3.8/utils/bootstrapper.py index 03680b065..494f7b7dd 100644 --- a/runtimes/minimal/ubi8-python-3.8/utils/bootstrapper.py +++ b/runtimes/minimal/ubi8-python-3.8/utils/bootstrapper.py @@ -82,7 +82,9 @@ def __init__(self, **kwargs: Any) -> None: self.cos_bucket = self.input_params.get("cos-bucket") self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) + self.pipeline_param_dict = self.convert_param_str_to_dict( + self.input_params.get("pipeline_parameters") + ) # Infer secure from the endpoint's scheme. self.secure = self.cos_endpoint.scheme == "https" @@ -93,9 +95,13 @@ def __init__(self, **kwargs: Any) -> None: access_key=self.input_params.get("cos-user"), secret_key=self.input_params.get("cos-password"), ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: + elif ( + "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ + ): cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: + elif ( + "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ + ): cred_provider = providers.IamAwsProvider() else: raise RuntimeError( @@ -104,12 +110,16 @@ def __init__(self, **kwargs: Any) -> None: ) # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) + self.cos_client = minio.Minio( + self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider + ) @abstractmethod def execute(self) -> None: """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") + raise NotImplementedError( + "Method 'execute()' must be implemented by subclasses!" + ) def process_dependencies(self) -> None: """Process dependencies @@ -180,7 +190,9 @@ def process_metrics_and_metadata(self) -> None: # output_path doesn't meet the requirements # treat this as a non-fatal error and log a warning logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) + OpUtil.log_operation_info( + "Aborted metrics and metadata processing", time.time() - t0 + ) return # Name of the proprietary KFP UI metadata file. @@ -225,11 +237,15 @@ def process_metrics_and_metadata(self) -> None: except ValueError as ve: # The file content could not be parsed. Log a warning # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") + logger.warning( + f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}" + ) except Exception as ex: # Something is wrong with the user-generated metadata file. # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") + logger.warning( + f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}" + ) # # Augment kfp_ui_metadata_filename with Elyra-specific information: @@ -245,13 +261,16 @@ def process_metrics_and_metadata(self) -> None: metadata = {} # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): + if metadata.get("outputs", None) is None or not isinstance( + metadata["outputs"], list + ): metadata["outputs"] = [] # Define HREF for COS bucket: # // bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" + urlunparse(self.cos_endpoint), + f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/", ) # add Elyra metadata to 'outputs' @@ -292,13 +311,20 @@ def get_file_from_object_storage(self, file_to_get: str) -> None: object_to_get = self.get_object_storage_filename(file_to_get) t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) + self.cos_client.fget_object( + bucket_name=self.cos_bucket, + object_name=object_to_get, + file_path=file_to_get, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration + f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", + duration, ) - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: + def put_file_to_object_storage( + self, file_to_upload: str, object_name: Optional[str] = None + ) -> None: """Utility function to put files into an object storage :param file_to_upload: filename @@ -311,10 +337,15 @@ def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[ object_to_upload = self.get_object_storage_filename(object_to_upload) t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) + self.cos_client.fput_object( + bucket_name=self.cos_bucket, + object_name=object_to_upload, + file_path=file_to_upload, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration + f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", + duration, ) def has_wildcard(self, filename): @@ -335,14 +366,18 @@ def process_output_file(self, output_file): else: self.put_file_to_object_storage(matched_file) - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: + def convert_param_str_to_dict( + self, pipeline_parameters: Optional[str] = None + ) -> Dict[str, Any]: """Convert INOUT-separated string of pipeline parameters into a dictionary.""" parameter_dict = {} if pipeline_parameters: parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) for parameter in parameter_list: param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): + if self.parameter_pass_method == "env" and ( + not value or not isinstance(value, str) + ): continue # env vars must be non-empty strings parameter_dict[param_name] = value return parameter_dict @@ -366,7 +401,9 @@ def execute(self) -> None: notebook_html = f"{notebook_name}.html" try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") + OpUtil.log_operation_info( + f"executing notebook using 'papermill {notebook} {notebook_output}'" + ) t0 = time.time() # Include kernel selection in execution time kernel_name = NotebookFileOp.find_best_kernel(notebook) @@ -377,7 +414,9 @@ def execute(self) -> None: import papermill - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) + papermill.execute_notebook( + notebook, notebook_output, kernel_name=kernel_name, **kwargs + ) duration = time.time() - t0 OpUtil.log_operation_info("notebook execution completed", duration) @@ -485,7 +524,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(python_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("python script execution completed", duration) @@ -511,7 +552,9 @@ def execute(self) -> None: r_script_output = f"{r_script_name}.log" try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}' to '{r_script_output}'") + OpUtil.log_operation_info( + f"executing R script using 'Rscript {r_script}' to '{r_script_output}'" + ) t0 = time.time() run_args = ["Rscript", r_script] @@ -519,7 +562,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(r_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("R script execution completed", duration) @@ -558,7 +603,9 @@ def package_install(cls, user_volume_path) -> None: continue try: version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant + except ( + version.InvalidVersion + ): # current version is not PEP-440 compliant logger.warning( f"WARNING: Source package '{package}' found already installed from " f"{current_packages[package]}. This may conflict with the required " @@ -566,7 +613,9 @@ def package_install(cls, user_volume_path) -> None: ) continue if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") + logger.info( + f"Updating {package} package from version {current_packages[package]} to {ver}..." + ) to_install_list.append(f"{package}=={ver}") elif version.Version(ver) < version.Version(current_packages[package]): logger.info( @@ -574,7 +623,9 @@ def package_install(cls, user_volume_path) -> None: f"already installed. Skipping..." ) else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") + logger.info( + f"Package not found. Installing {package} package with version {ver}..." + ) to_install_list.append(f"{package}=={ver}") if to_install_list: @@ -582,7 +633,9 @@ def package_install(cls, user_volume_path) -> None: to_install_list.insert(0, f"--target={user_volume_path}") to_install_list.append("--no-cache-dir") - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) + subprocess.run( + [sys.executable, "-m", "pip", "install"] + to_install_list, check=True + ) if user_volume_path: os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" @@ -609,16 +662,28 @@ def package_list_to_dict(cls, filename: str) -> dict: for line in fh: if line[0] != "#": if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") + package_name, package_version = line.strip("\n").split( + sep=" @ " + ) elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") + package_name, package_version = line.strip("\n").split( + sep="===" + ) elif "==" in line: package_name, package_version = line.strip("\n").split(sep="==") elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system + package_name = ( + line.strip("\n") + .replace("-e ", "") + .replace("--editable ", "") + ) + if ( + "#egg=" in package_name + ): # editable package from version control system package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory + elif ( + "/" in package_name + ): # editable package from local directory package_name = os.path.basename(package_name) package_version = None else: @@ -638,10 +703,18 @@ def parse_arguments(cls, args) -> dict: logger.debug("Parsing Arguments.....") parser = argparse.ArgumentParser() parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True + "-e", + "--cos-endpoint", + dest="cos-endpoint", + help="Cloud object storage endpoint", + required=True, ) parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True + "-b", + "--cos-bucket", + dest="cos-bucket", + help="Cloud object storage bucket to use", + required=True, ) parser.add_argument( "-d", @@ -657,9 +730,23 @@ def parse_arguments(cls, args) -> dict: help="Archive containing notebook and dependency artifacts", required=True, ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) + parser.add_argument( + "-f", "--file", dest="filepath", help="File to execute", required=True + ) + parser.add_argument( + "-o", + "--outputs", + dest="outputs", + help="Files to output to object store", + required=False, + ) + parser.add_argument( + "-i", + "--inputs", + dest="inputs", + help="Files to pull in from parent node", + required=False, + ) parser.add_argument( "-p", "--user-volume-path", @@ -694,12 +781,16 @@ def parse_arguments(cls, args) -> dict: # set pipeline name as global pipeline_name = parsed_args.get("pipeline-name") # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) + operation_name = os.path.basename( + os.path.splitext(parsed_args.get("filepath"))[0] + ) return parsed_args @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: + def log_operation_info( + cls, action_clause: str, duration_secs: Optional[float] = None + ) -> None: """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated @@ -715,13 +806,17 @@ def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = global pipeline_name, operation_name if enable_pipeline_info: duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") + logger.info( + f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}" + ) def main(): # Configure logger format, level logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG + format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", + datefmt="%H:%M:%S", + level=logging.DEBUG, ) # Setup packages and gather arguments input_params = OpUtil.parse_arguments(sys.argv[1:]) @@ -744,4 +839,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/runtimes/minimal/ubi9-python-3.9/utils/bootstrapper.py b/runtimes/minimal/ubi9-python-3.9/utils/bootstrapper.py index 03680b065..494f7b7dd 100644 --- a/runtimes/minimal/ubi9-python-3.9/utils/bootstrapper.py +++ b/runtimes/minimal/ubi9-python-3.9/utils/bootstrapper.py @@ -82,7 +82,9 @@ def __init__(self, **kwargs: Any) -> None: self.cos_bucket = self.input_params.get("cos-bucket") self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) + self.pipeline_param_dict = self.convert_param_str_to_dict( + self.input_params.get("pipeline_parameters") + ) # Infer secure from the endpoint's scheme. self.secure = self.cos_endpoint.scheme == "https" @@ -93,9 +95,13 @@ def __init__(self, **kwargs: Any) -> None: access_key=self.input_params.get("cos-user"), secret_key=self.input_params.get("cos-password"), ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: + elif ( + "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ + ): cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: + elif ( + "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ + ): cred_provider = providers.IamAwsProvider() else: raise RuntimeError( @@ -104,12 +110,16 @@ def __init__(self, **kwargs: Any) -> None: ) # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) + self.cos_client = minio.Minio( + self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider + ) @abstractmethod def execute(self) -> None: """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") + raise NotImplementedError( + "Method 'execute()' must be implemented by subclasses!" + ) def process_dependencies(self) -> None: """Process dependencies @@ -180,7 +190,9 @@ def process_metrics_and_metadata(self) -> None: # output_path doesn't meet the requirements # treat this as a non-fatal error and log a warning logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) + OpUtil.log_operation_info( + "Aborted metrics and metadata processing", time.time() - t0 + ) return # Name of the proprietary KFP UI metadata file. @@ -225,11 +237,15 @@ def process_metrics_and_metadata(self) -> None: except ValueError as ve: # The file content could not be parsed. Log a warning # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") + logger.warning( + f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}" + ) except Exception as ex: # Something is wrong with the user-generated metadata file. # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") + logger.warning( + f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}" + ) # # Augment kfp_ui_metadata_filename with Elyra-specific information: @@ -245,13 +261,16 @@ def process_metrics_and_metadata(self) -> None: metadata = {} # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): + if metadata.get("outputs", None) is None or not isinstance( + metadata["outputs"], list + ): metadata["outputs"] = [] # Define HREF for COS bucket: # // bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" + urlunparse(self.cos_endpoint), + f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/", ) # add Elyra metadata to 'outputs' @@ -292,13 +311,20 @@ def get_file_from_object_storage(self, file_to_get: str) -> None: object_to_get = self.get_object_storage_filename(file_to_get) t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) + self.cos_client.fget_object( + bucket_name=self.cos_bucket, + object_name=object_to_get, + file_path=file_to_get, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration + f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", + duration, ) - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: + def put_file_to_object_storage( + self, file_to_upload: str, object_name: Optional[str] = None + ) -> None: """Utility function to put files into an object storage :param file_to_upload: filename @@ -311,10 +337,15 @@ def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[ object_to_upload = self.get_object_storage_filename(object_to_upload) t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) + self.cos_client.fput_object( + bucket_name=self.cos_bucket, + object_name=object_to_upload, + file_path=file_to_upload, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration + f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", + duration, ) def has_wildcard(self, filename): @@ -335,14 +366,18 @@ def process_output_file(self, output_file): else: self.put_file_to_object_storage(matched_file) - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: + def convert_param_str_to_dict( + self, pipeline_parameters: Optional[str] = None + ) -> Dict[str, Any]: """Convert INOUT-separated string of pipeline parameters into a dictionary.""" parameter_dict = {} if pipeline_parameters: parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) for parameter in parameter_list: param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): + if self.parameter_pass_method == "env" and ( + not value or not isinstance(value, str) + ): continue # env vars must be non-empty strings parameter_dict[param_name] = value return parameter_dict @@ -366,7 +401,9 @@ def execute(self) -> None: notebook_html = f"{notebook_name}.html" try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") + OpUtil.log_operation_info( + f"executing notebook using 'papermill {notebook} {notebook_output}'" + ) t0 = time.time() # Include kernel selection in execution time kernel_name = NotebookFileOp.find_best_kernel(notebook) @@ -377,7 +414,9 @@ def execute(self) -> None: import papermill - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) + papermill.execute_notebook( + notebook, notebook_output, kernel_name=kernel_name, **kwargs + ) duration = time.time() - t0 OpUtil.log_operation_info("notebook execution completed", duration) @@ -485,7 +524,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(python_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("python script execution completed", duration) @@ -511,7 +552,9 @@ def execute(self) -> None: r_script_output = f"{r_script_name}.log" try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}' to '{r_script_output}'") + OpUtil.log_operation_info( + f"executing R script using 'Rscript {r_script}' to '{r_script_output}'" + ) t0 = time.time() run_args = ["Rscript", r_script] @@ -519,7 +562,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(r_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("R script execution completed", duration) @@ -558,7 +603,9 @@ def package_install(cls, user_volume_path) -> None: continue try: version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant + except ( + version.InvalidVersion + ): # current version is not PEP-440 compliant logger.warning( f"WARNING: Source package '{package}' found already installed from " f"{current_packages[package]}. This may conflict with the required " @@ -566,7 +613,9 @@ def package_install(cls, user_volume_path) -> None: ) continue if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") + logger.info( + f"Updating {package} package from version {current_packages[package]} to {ver}..." + ) to_install_list.append(f"{package}=={ver}") elif version.Version(ver) < version.Version(current_packages[package]): logger.info( @@ -574,7 +623,9 @@ def package_install(cls, user_volume_path) -> None: f"already installed. Skipping..." ) else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") + logger.info( + f"Package not found. Installing {package} package with version {ver}..." + ) to_install_list.append(f"{package}=={ver}") if to_install_list: @@ -582,7 +633,9 @@ def package_install(cls, user_volume_path) -> None: to_install_list.insert(0, f"--target={user_volume_path}") to_install_list.append("--no-cache-dir") - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) + subprocess.run( + [sys.executable, "-m", "pip", "install"] + to_install_list, check=True + ) if user_volume_path: os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" @@ -609,16 +662,28 @@ def package_list_to_dict(cls, filename: str) -> dict: for line in fh: if line[0] != "#": if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") + package_name, package_version = line.strip("\n").split( + sep=" @ " + ) elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") + package_name, package_version = line.strip("\n").split( + sep="===" + ) elif "==" in line: package_name, package_version = line.strip("\n").split(sep="==") elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system + package_name = ( + line.strip("\n") + .replace("-e ", "") + .replace("--editable ", "") + ) + if ( + "#egg=" in package_name + ): # editable package from version control system package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory + elif ( + "/" in package_name + ): # editable package from local directory package_name = os.path.basename(package_name) package_version = None else: @@ -638,10 +703,18 @@ def parse_arguments(cls, args) -> dict: logger.debug("Parsing Arguments.....") parser = argparse.ArgumentParser() parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True + "-e", + "--cos-endpoint", + dest="cos-endpoint", + help="Cloud object storage endpoint", + required=True, ) parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True + "-b", + "--cos-bucket", + dest="cos-bucket", + help="Cloud object storage bucket to use", + required=True, ) parser.add_argument( "-d", @@ -657,9 +730,23 @@ def parse_arguments(cls, args) -> dict: help="Archive containing notebook and dependency artifacts", required=True, ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) + parser.add_argument( + "-f", "--file", dest="filepath", help="File to execute", required=True + ) + parser.add_argument( + "-o", + "--outputs", + dest="outputs", + help="Files to output to object store", + required=False, + ) + parser.add_argument( + "-i", + "--inputs", + dest="inputs", + help="Files to pull in from parent node", + required=False, + ) parser.add_argument( "-p", "--user-volume-path", @@ -694,12 +781,16 @@ def parse_arguments(cls, args) -> dict: # set pipeline name as global pipeline_name = parsed_args.get("pipeline-name") # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) + operation_name = os.path.basename( + os.path.splitext(parsed_args.get("filepath"))[0] + ) return parsed_args @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: + def log_operation_info( + cls, action_clause: str, duration_secs: Optional[float] = None + ) -> None: """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated @@ -715,13 +806,17 @@ def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = global pipeline_name, operation_name if enable_pipeline_info: duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") + logger.info( + f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}" + ) def main(): # Configure logger format, level logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG + format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", + datefmt="%H:%M:%S", + level=logging.DEBUG, ) # Setup packages and gather arguments input_params = OpUtil.parse_arguments(sys.argv[1:]) @@ -744,4 +839,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/runtimes/pytorch/ubi8-python-3.8/utils/bootstrapper.py b/runtimes/pytorch/ubi8-python-3.8/utils/bootstrapper.py index 03680b065..494f7b7dd 100644 --- a/runtimes/pytorch/ubi8-python-3.8/utils/bootstrapper.py +++ b/runtimes/pytorch/ubi8-python-3.8/utils/bootstrapper.py @@ -82,7 +82,9 @@ def __init__(self, **kwargs: Any) -> None: self.cos_bucket = self.input_params.get("cos-bucket") self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) + self.pipeline_param_dict = self.convert_param_str_to_dict( + self.input_params.get("pipeline_parameters") + ) # Infer secure from the endpoint's scheme. self.secure = self.cos_endpoint.scheme == "https" @@ -93,9 +95,13 @@ def __init__(self, **kwargs: Any) -> None: access_key=self.input_params.get("cos-user"), secret_key=self.input_params.get("cos-password"), ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: + elif ( + "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ + ): cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: + elif ( + "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ + ): cred_provider = providers.IamAwsProvider() else: raise RuntimeError( @@ -104,12 +110,16 @@ def __init__(self, **kwargs: Any) -> None: ) # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) + self.cos_client = minio.Minio( + self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider + ) @abstractmethod def execute(self) -> None: """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") + raise NotImplementedError( + "Method 'execute()' must be implemented by subclasses!" + ) def process_dependencies(self) -> None: """Process dependencies @@ -180,7 +190,9 @@ def process_metrics_and_metadata(self) -> None: # output_path doesn't meet the requirements # treat this as a non-fatal error and log a warning logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) + OpUtil.log_operation_info( + "Aborted metrics and metadata processing", time.time() - t0 + ) return # Name of the proprietary KFP UI metadata file. @@ -225,11 +237,15 @@ def process_metrics_and_metadata(self) -> None: except ValueError as ve: # The file content could not be parsed. Log a warning # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") + logger.warning( + f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}" + ) except Exception as ex: # Something is wrong with the user-generated metadata file. # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") + logger.warning( + f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}" + ) # # Augment kfp_ui_metadata_filename with Elyra-specific information: @@ -245,13 +261,16 @@ def process_metrics_and_metadata(self) -> None: metadata = {} # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): + if metadata.get("outputs", None) is None or not isinstance( + metadata["outputs"], list + ): metadata["outputs"] = [] # Define HREF for COS bucket: # // bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" + urlunparse(self.cos_endpoint), + f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/", ) # add Elyra metadata to 'outputs' @@ -292,13 +311,20 @@ def get_file_from_object_storage(self, file_to_get: str) -> None: object_to_get = self.get_object_storage_filename(file_to_get) t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) + self.cos_client.fget_object( + bucket_name=self.cos_bucket, + object_name=object_to_get, + file_path=file_to_get, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration + f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", + duration, ) - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: + def put_file_to_object_storage( + self, file_to_upload: str, object_name: Optional[str] = None + ) -> None: """Utility function to put files into an object storage :param file_to_upload: filename @@ -311,10 +337,15 @@ def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[ object_to_upload = self.get_object_storage_filename(object_to_upload) t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) + self.cos_client.fput_object( + bucket_name=self.cos_bucket, + object_name=object_to_upload, + file_path=file_to_upload, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration + f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", + duration, ) def has_wildcard(self, filename): @@ -335,14 +366,18 @@ def process_output_file(self, output_file): else: self.put_file_to_object_storage(matched_file) - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: + def convert_param_str_to_dict( + self, pipeline_parameters: Optional[str] = None + ) -> Dict[str, Any]: """Convert INOUT-separated string of pipeline parameters into a dictionary.""" parameter_dict = {} if pipeline_parameters: parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) for parameter in parameter_list: param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): + if self.parameter_pass_method == "env" and ( + not value or not isinstance(value, str) + ): continue # env vars must be non-empty strings parameter_dict[param_name] = value return parameter_dict @@ -366,7 +401,9 @@ def execute(self) -> None: notebook_html = f"{notebook_name}.html" try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") + OpUtil.log_operation_info( + f"executing notebook using 'papermill {notebook} {notebook_output}'" + ) t0 = time.time() # Include kernel selection in execution time kernel_name = NotebookFileOp.find_best_kernel(notebook) @@ -377,7 +414,9 @@ def execute(self) -> None: import papermill - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) + papermill.execute_notebook( + notebook, notebook_output, kernel_name=kernel_name, **kwargs + ) duration = time.time() - t0 OpUtil.log_operation_info("notebook execution completed", duration) @@ -485,7 +524,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(python_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("python script execution completed", duration) @@ -511,7 +552,9 @@ def execute(self) -> None: r_script_output = f"{r_script_name}.log" try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}' to '{r_script_output}'") + OpUtil.log_operation_info( + f"executing R script using 'Rscript {r_script}' to '{r_script_output}'" + ) t0 = time.time() run_args = ["Rscript", r_script] @@ -519,7 +562,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(r_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("R script execution completed", duration) @@ -558,7 +603,9 @@ def package_install(cls, user_volume_path) -> None: continue try: version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant + except ( + version.InvalidVersion + ): # current version is not PEP-440 compliant logger.warning( f"WARNING: Source package '{package}' found already installed from " f"{current_packages[package]}. This may conflict with the required " @@ -566,7 +613,9 @@ def package_install(cls, user_volume_path) -> None: ) continue if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") + logger.info( + f"Updating {package} package from version {current_packages[package]} to {ver}..." + ) to_install_list.append(f"{package}=={ver}") elif version.Version(ver) < version.Version(current_packages[package]): logger.info( @@ -574,7 +623,9 @@ def package_install(cls, user_volume_path) -> None: f"already installed. Skipping..." ) else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") + logger.info( + f"Package not found. Installing {package} package with version {ver}..." + ) to_install_list.append(f"{package}=={ver}") if to_install_list: @@ -582,7 +633,9 @@ def package_install(cls, user_volume_path) -> None: to_install_list.insert(0, f"--target={user_volume_path}") to_install_list.append("--no-cache-dir") - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) + subprocess.run( + [sys.executable, "-m", "pip", "install"] + to_install_list, check=True + ) if user_volume_path: os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" @@ -609,16 +662,28 @@ def package_list_to_dict(cls, filename: str) -> dict: for line in fh: if line[0] != "#": if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") + package_name, package_version = line.strip("\n").split( + sep=" @ " + ) elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") + package_name, package_version = line.strip("\n").split( + sep="===" + ) elif "==" in line: package_name, package_version = line.strip("\n").split(sep="==") elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system + package_name = ( + line.strip("\n") + .replace("-e ", "") + .replace("--editable ", "") + ) + if ( + "#egg=" in package_name + ): # editable package from version control system package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory + elif ( + "/" in package_name + ): # editable package from local directory package_name = os.path.basename(package_name) package_version = None else: @@ -638,10 +703,18 @@ def parse_arguments(cls, args) -> dict: logger.debug("Parsing Arguments.....") parser = argparse.ArgumentParser() parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True + "-e", + "--cos-endpoint", + dest="cos-endpoint", + help="Cloud object storage endpoint", + required=True, ) parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True + "-b", + "--cos-bucket", + dest="cos-bucket", + help="Cloud object storage bucket to use", + required=True, ) parser.add_argument( "-d", @@ -657,9 +730,23 @@ def parse_arguments(cls, args) -> dict: help="Archive containing notebook and dependency artifacts", required=True, ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) + parser.add_argument( + "-f", "--file", dest="filepath", help="File to execute", required=True + ) + parser.add_argument( + "-o", + "--outputs", + dest="outputs", + help="Files to output to object store", + required=False, + ) + parser.add_argument( + "-i", + "--inputs", + dest="inputs", + help="Files to pull in from parent node", + required=False, + ) parser.add_argument( "-p", "--user-volume-path", @@ -694,12 +781,16 @@ def parse_arguments(cls, args) -> dict: # set pipeline name as global pipeline_name = parsed_args.get("pipeline-name") # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) + operation_name = os.path.basename( + os.path.splitext(parsed_args.get("filepath"))[0] + ) return parsed_args @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: + def log_operation_info( + cls, action_clause: str, duration_secs: Optional[float] = None + ) -> None: """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated @@ -715,13 +806,17 @@ def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = global pipeline_name, operation_name if enable_pipeline_info: duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") + logger.info( + f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}" + ) def main(): # Configure logger format, level logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG + format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", + datefmt="%H:%M:%S", + level=logging.DEBUG, ) # Setup packages and gather arguments input_params = OpUtil.parse_arguments(sys.argv[1:]) @@ -744,4 +839,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/runtimes/pytorch/ubi9-python-3.9/utils/bootstrapper.py b/runtimes/pytorch/ubi9-python-3.9/utils/bootstrapper.py index 03680b065..494f7b7dd 100644 --- a/runtimes/pytorch/ubi9-python-3.9/utils/bootstrapper.py +++ b/runtimes/pytorch/ubi9-python-3.9/utils/bootstrapper.py @@ -82,7 +82,9 @@ def __init__(self, **kwargs: Any) -> None: self.cos_bucket = self.input_params.get("cos-bucket") self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) + self.pipeline_param_dict = self.convert_param_str_to_dict( + self.input_params.get("pipeline_parameters") + ) # Infer secure from the endpoint's scheme. self.secure = self.cos_endpoint.scheme == "https" @@ -93,9 +95,13 @@ def __init__(self, **kwargs: Any) -> None: access_key=self.input_params.get("cos-user"), secret_key=self.input_params.get("cos-password"), ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: + elif ( + "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ + ): cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: + elif ( + "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ + ): cred_provider = providers.IamAwsProvider() else: raise RuntimeError( @@ -104,12 +110,16 @@ def __init__(self, **kwargs: Any) -> None: ) # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) + self.cos_client = minio.Minio( + self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider + ) @abstractmethod def execute(self) -> None: """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") + raise NotImplementedError( + "Method 'execute()' must be implemented by subclasses!" + ) def process_dependencies(self) -> None: """Process dependencies @@ -180,7 +190,9 @@ def process_metrics_and_metadata(self) -> None: # output_path doesn't meet the requirements # treat this as a non-fatal error and log a warning logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) + OpUtil.log_operation_info( + "Aborted metrics and metadata processing", time.time() - t0 + ) return # Name of the proprietary KFP UI metadata file. @@ -225,11 +237,15 @@ def process_metrics_and_metadata(self) -> None: except ValueError as ve: # The file content could not be parsed. Log a warning # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") + logger.warning( + f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}" + ) except Exception as ex: # Something is wrong with the user-generated metadata file. # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") + logger.warning( + f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}" + ) # # Augment kfp_ui_metadata_filename with Elyra-specific information: @@ -245,13 +261,16 @@ def process_metrics_and_metadata(self) -> None: metadata = {} # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): + if metadata.get("outputs", None) is None or not isinstance( + metadata["outputs"], list + ): metadata["outputs"] = [] # Define HREF for COS bucket: # // bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" + urlunparse(self.cos_endpoint), + f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/", ) # add Elyra metadata to 'outputs' @@ -292,13 +311,20 @@ def get_file_from_object_storage(self, file_to_get: str) -> None: object_to_get = self.get_object_storage_filename(file_to_get) t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) + self.cos_client.fget_object( + bucket_name=self.cos_bucket, + object_name=object_to_get, + file_path=file_to_get, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration + f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", + duration, ) - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: + def put_file_to_object_storage( + self, file_to_upload: str, object_name: Optional[str] = None + ) -> None: """Utility function to put files into an object storage :param file_to_upload: filename @@ -311,10 +337,15 @@ def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[ object_to_upload = self.get_object_storage_filename(object_to_upload) t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) + self.cos_client.fput_object( + bucket_name=self.cos_bucket, + object_name=object_to_upload, + file_path=file_to_upload, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration + f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", + duration, ) def has_wildcard(self, filename): @@ -335,14 +366,18 @@ def process_output_file(self, output_file): else: self.put_file_to_object_storage(matched_file) - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: + def convert_param_str_to_dict( + self, pipeline_parameters: Optional[str] = None + ) -> Dict[str, Any]: """Convert INOUT-separated string of pipeline parameters into a dictionary.""" parameter_dict = {} if pipeline_parameters: parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) for parameter in parameter_list: param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): + if self.parameter_pass_method == "env" and ( + not value or not isinstance(value, str) + ): continue # env vars must be non-empty strings parameter_dict[param_name] = value return parameter_dict @@ -366,7 +401,9 @@ def execute(self) -> None: notebook_html = f"{notebook_name}.html" try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") + OpUtil.log_operation_info( + f"executing notebook using 'papermill {notebook} {notebook_output}'" + ) t0 = time.time() # Include kernel selection in execution time kernel_name = NotebookFileOp.find_best_kernel(notebook) @@ -377,7 +414,9 @@ def execute(self) -> None: import papermill - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) + papermill.execute_notebook( + notebook, notebook_output, kernel_name=kernel_name, **kwargs + ) duration = time.time() - t0 OpUtil.log_operation_info("notebook execution completed", duration) @@ -485,7 +524,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(python_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("python script execution completed", duration) @@ -511,7 +552,9 @@ def execute(self) -> None: r_script_output = f"{r_script_name}.log" try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}' to '{r_script_output}'") + OpUtil.log_operation_info( + f"executing R script using 'Rscript {r_script}' to '{r_script_output}'" + ) t0 = time.time() run_args = ["Rscript", r_script] @@ -519,7 +562,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(r_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("R script execution completed", duration) @@ -558,7 +603,9 @@ def package_install(cls, user_volume_path) -> None: continue try: version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant + except ( + version.InvalidVersion + ): # current version is not PEP-440 compliant logger.warning( f"WARNING: Source package '{package}' found already installed from " f"{current_packages[package]}. This may conflict with the required " @@ -566,7 +613,9 @@ def package_install(cls, user_volume_path) -> None: ) continue if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") + logger.info( + f"Updating {package} package from version {current_packages[package]} to {ver}..." + ) to_install_list.append(f"{package}=={ver}") elif version.Version(ver) < version.Version(current_packages[package]): logger.info( @@ -574,7 +623,9 @@ def package_install(cls, user_volume_path) -> None: f"already installed. Skipping..." ) else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") + logger.info( + f"Package not found. Installing {package} package with version {ver}..." + ) to_install_list.append(f"{package}=={ver}") if to_install_list: @@ -582,7 +633,9 @@ def package_install(cls, user_volume_path) -> None: to_install_list.insert(0, f"--target={user_volume_path}") to_install_list.append("--no-cache-dir") - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) + subprocess.run( + [sys.executable, "-m", "pip", "install"] + to_install_list, check=True + ) if user_volume_path: os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" @@ -609,16 +662,28 @@ def package_list_to_dict(cls, filename: str) -> dict: for line in fh: if line[0] != "#": if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") + package_name, package_version = line.strip("\n").split( + sep=" @ " + ) elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") + package_name, package_version = line.strip("\n").split( + sep="===" + ) elif "==" in line: package_name, package_version = line.strip("\n").split(sep="==") elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system + package_name = ( + line.strip("\n") + .replace("-e ", "") + .replace("--editable ", "") + ) + if ( + "#egg=" in package_name + ): # editable package from version control system package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory + elif ( + "/" in package_name + ): # editable package from local directory package_name = os.path.basename(package_name) package_version = None else: @@ -638,10 +703,18 @@ def parse_arguments(cls, args) -> dict: logger.debug("Parsing Arguments.....") parser = argparse.ArgumentParser() parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True + "-e", + "--cos-endpoint", + dest="cos-endpoint", + help="Cloud object storage endpoint", + required=True, ) parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True + "-b", + "--cos-bucket", + dest="cos-bucket", + help="Cloud object storage bucket to use", + required=True, ) parser.add_argument( "-d", @@ -657,9 +730,23 @@ def parse_arguments(cls, args) -> dict: help="Archive containing notebook and dependency artifacts", required=True, ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) + parser.add_argument( + "-f", "--file", dest="filepath", help="File to execute", required=True + ) + parser.add_argument( + "-o", + "--outputs", + dest="outputs", + help="Files to output to object store", + required=False, + ) + parser.add_argument( + "-i", + "--inputs", + dest="inputs", + help="Files to pull in from parent node", + required=False, + ) parser.add_argument( "-p", "--user-volume-path", @@ -694,12 +781,16 @@ def parse_arguments(cls, args) -> dict: # set pipeline name as global pipeline_name = parsed_args.get("pipeline-name") # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) + operation_name = os.path.basename( + os.path.splitext(parsed_args.get("filepath"))[0] + ) return parsed_args @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: + def log_operation_info( + cls, action_clause: str, duration_secs: Optional[float] = None + ) -> None: """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated @@ -715,13 +806,17 @@ def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = global pipeline_name, operation_name if enable_pipeline_info: duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") + logger.info( + f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}" + ) def main(): # Configure logger format, level logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG + format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", + datefmt="%H:%M:%S", + level=logging.DEBUG, ) # Setup packages and gather arguments input_params = OpUtil.parse_arguments(sys.argv[1:]) @@ -744,4 +839,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/runtimes/tensorflow/ubi8-python-3.8/utils/bootstrapper.py b/runtimes/tensorflow/ubi8-python-3.8/utils/bootstrapper.py index 03680b065..494f7b7dd 100644 --- a/runtimes/tensorflow/ubi8-python-3.8/utils/bootstrapper.py +++ b/runtimes/tensorflow/ubi8-python-3.8/utils/bootstrapper.py @@ -82,7 +82,9 @@ def __init__(self, **kwargs: Any) -> None: self.cos_bucket = self.input_params.get("cos-bucket") self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) + self.pipeline_param_dict = self.convert_param_str_to_dict( + self.input_params.get("pipeline_parameters") + ) # Infer secure from the endpoint's scheme. self.secure = self.cos_endpoint.scheme == "https" @@ -93,9 +95,13 @@ def __init__(self, **kwargs: Any) -> None: access_key=self.input_params.get("cos-user"), secret_key=self.input_params.get("cos-password"), ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: + elif ( + "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ + ): cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: + elif ( + "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ + ): cred_provider = providers.IamAwsProvider() else: raise RuntimeError( @@ -104,12 +110,16 @@ def __init__(self, **kwargs: Any) -> None: ) # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) + self.cos_client = minio.Minio( + self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider + ) @abstractmethod def execute(self) -> None: """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") + raise NotImplementedError( + "Method 'execute()' must be implemented by subclasses!" + ) def process_dependencies(self) -> None: """Process dependencies @@ -180,7 +190,9 @@ def process_metrics_and_metadata(self) -> None: # output_path doesn't meet the requirements # treat this as a non-fatal error and log a warning logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) + OpUtil.log_operation_info( + "Aborted metrics and metadata processing", time.time() - t0 + ) return # Name of the proprietary KFP UI metadata file. @@ -225,11 +237,15 @@ def process_metrics_and_metadata(self) -> None: except ValueError as ve: # The file content could not be parsed. Log a warning # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") + logger.warning( + f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}" + ) except Exception as ex: # Something is wrong with the user-generated metadata file. # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") + logger.warning( + f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}" + ) # # Augment kfp_ui_metadata_filename with Elyra-specific information: @@ -245,13 +261,16 @@ def process_metrics_and_metadata(self) -> None: metadata = {} # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): + if metadata.get("outputs", None) is None or not isinstance( + metadata["outputs"], list + ): metadata["outputs"] = [] # Define HREF for COS bucket: # // bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" + urlunparse(self.cos_endpoint), + f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/", ) # add Elyra metadata to 'outputs' @@ -292,13 +311,20 @@ def get_file_from_object_storage(self, file_to_get: str) -> None: object_to_get = self.get_object_storage_filename(file_to_get) t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) + self.cos_client.fget_object( + bucket_name=self.cos_bucket, + object_name=object_to_get, + file_path=file_to_get, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration + f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", + duration, ) - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: + def put_file_to_object_storage( + self, file_to_upload: str, object_name: Optional[str] = None + ) -> None: """Utility function to put files into an object storage :param file_to_upload: filename @@ -311,10 +337,15 @@ def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[ object_to_upload = self.get_object_storage_filename(object_to_upload) t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) + self.cos_client.fput_object( + bucket_name=self.cos_bucket, + object_name=object_to_upload, + file_path=file_to_upload, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration + f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", + duration, ) def has_wildcard(self, filename): @@ -335,14 +366,18 @@ def process_output_file(self, output_file): else: self.put_file_to_object_storage(matched_file) - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: + def convert_param_str_to_dict( + self, pipeline_parameters: Optional[str] = None + ) -> Dict[str, Any]: """Convert INOUT-separated string of pipeline parameters into a dictionary.""" parameter_dict = {} if pipeline_parameters: parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) for parameter in parameter_list: param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): + if self.parameter_pass_method == "env" and ( + not value or not isinstance(value, str) + ): continue # env vars must be non-empty strings parameter_dict[param_name] = value return parameter_dict @@ -366,7 +401,9 @@ def execute(self) -> None: notebook_html = f"{notebook_name}.html" try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") + OpUtil.log_operation_info( + f"executing notebook using 'papermill {notebook} {notebook_output}'" + ) t0 = time.time() # Include kernel selection in execution time kernel_name = NotebookFileOp.find_best_kernel(notebook) @@ -377,7 +414,9 @@ def execute(self) -> None: import papermill - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) + papermill.execute_notebook( + notebook, notebook_output, kernel_name=kernel_name, **kwargs + ) duration = time.time() - t0 OpUtil.log_operation_info("notebook execution completed", duration) @@ -485,7 +524,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(python_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("python script execution completed", duration) @@ -511,7 +552,9 @@ def execute(self) -> None: r_script_output = f"{r_script_name}.log" try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}' to '{r_script_output}'") + OpUtil.log_operation_info( + f"executing R script using 'Rscript {r_script}' to '{r_script_output}'" + ) t0 = time.time() run_args = ["Rscript", r_script] @@ -519,7 +562,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(r_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("R script execution completed", duration) @@ -558,7 +603,9 @@ def package_install(cls, user_volume_path) -> None: continue try: version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant + except ( + version.InvalidVersion + ): # current version is not PEP-440 compliant logger.warning( f"WARNING: Source package '{package}' found already installed from " f"{current_packages[package]}. This may conflict with the required " @@ -566,7 +613,9 @@ def package_install(cls, user_volume_path) -> None: ) continue if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") + logger.info( + f"Updating {package} package from version {current_packages[package]} to {ver}..." + ) to_install_list.append(f"{package}=={ver}") elif version.Version(ver) < version.Version(current_packages[package]): logger.info( @@ -574,7 +623,9 @@ def package_install(cls, user_volume_path) -> None: f"already installed. Skipping..." ) else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") + logger.info( + f"Package not found. Installing {package} package with version {ver}..." + ) to_install_list.append(f"{package}=={ver}") if to_install_list: @@ -582,7 +633,9 @@ def package_install(cls, user_volume_path) -> None: to_install_list.insert(0, f"--target={user_volume_path}") to_install_list.append("--no-cache-dir") - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) + subprocess.run( + [sys.executable, "-m", "pip", "install"] + to_install_list, check=True + ) if user_volume_path: os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" @@ -609,16 +662,28 @@ def package_list_to_dict(cls, filename: str) -> dict: for line in fh: if line[0] != "#": if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") + package_name, package_version = line.strip("\n").split( + sep=" @ " + ) elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") + package_name, package_version = line.strip("\n").split( + sep="===" + ) elif "==" in line: package_name, package_version = line.strip("\n").split(sep="==") elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system + package_name = ( + line.strip("\n") + .replace("-e ", "") + .replace("--editable ", "") + ) + if ( + "#egg=" in package_name + ): # editable package from version control system package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory + elif ( + "/" in package_name + ): # editable package from local directory package_name = os.path.basename(package_name) package_version = None else: @@ -638,10 +703,18 @@ def parse_arguments(cls, args) -> dict: logger.debug("Parsing Arguments.....") parser = argparse.ArgumentParser() parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True + "-e", + "--cos-endpoint", + dest="cos-endpoint", + help="Cloud object storage endpoint", + required=True, ) parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True + "-b", + "--cos-bucket", + dest="cos-bucket", + help="Cloud object storage bucket to use", + required=True, ) parser.add_argument( "-d", @@ -657,9 +730,23 @@ def parse_arguments(cls, args) -> dict: help="Archive containing notebook and dependency artifacts", required=True, ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) + parser.add_argument( + "-f", "--file", dest="filepath", help="File to execute", required=True + ) + parser.add_argument( + "-o", + "--outputs", + dest="outputs", + help="Files to output to object store", + required=False, + ) + parser.add_argument( + "-i", + "--inputs", + dest="inputs", + help="Files to pull in from parent node", + required=False, + ) parser.add_argument( "-p", "--user-volume-path", @@ -694,12 +781,16 @@ def parse_arguments(cls, args) -> dict: # set pipeline name as global pipeline_name = parsed_args.get("pipeline-name") # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) + operation_name = os.path.basename( + os.path.splitext(parsed_args.get("filepath"))[0] + ) return parsed_args @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: + def log_operation_info( + cls, action_clause: str, duration_secs: Optional[float] = None + ) -> None: """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated @@ -715,13 +806,17 @@ def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = global pipeline_name, operation_name if enable_pipeline_info: duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") + logger.info( + f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}" + ) def main(): # Configure logger format, level logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG + format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", + datefmt="%H:%M:%S", + level=logging.DEBUG, ) # Setup packages and gather arguments input_params = OpUtil.parse_arguments(sys.argv[1:]) @@ -744,4 +839,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/runtimes/tensorflow/ubi9-python-3.9/utils/bootstrapper.py b/runtimes/tensorflow/ubi9-python-3.9/utils/bootstrapper.py index 03680b065..494f7b7dd 100644 --- a/runtimes/tensorflow/ubi9-python-3.9/utils/bootstrapper.py +++ b/runtimes/tensorflow/ubi9-python-3.9/utils/bootstrapper.py @@ -82,7 +82,9 @@ def __init__(self, **kwargs: Any) -> None: self.cos_bucket = self.input_params.get("cos-bucket") self.parameter_pass_method = self.input_params.get("parameter_pass_method") - self.pipeline_param_dict = self.convert_param_str_to_dict(self.input_params.get("pipeline_parameters")) + self.pipeline_param_dict = self.convert_param_str_to_dict( + self.input_params.get("pipeline_parameters") + ) # Infer secure from the endpoint's scheme. self.secure = self.cos_endpoint.scheme == "https" @@ -93,9 +95,13 @@ def __init__(self, **kwargs: Any) -> None: access_key=self.input_params.get("cos-user"), secret_key=self.input_params.get("cos-password"), ) - elif "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ: + elif ( + "AWS_ACCESS_KEY_ID" in os.environ and "AWS_SECRET_ACCESS_KEY" in os.environ + ): cred_provider = providers.EnvAWSProvider() - elif "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ: + elif ( + "AWS_ROLE_ARN" in os.environ and "AWS_WEB_IDENTITY_TOKEN_FILE" in os.environ + ): cred_provider = providers.IamAwsProvider() else: raise RuntimeError( @@ -104,12 +110,16 @@ def __init__(self, **kwargs: Any) -> None: ) # get minio client - self.cos_client = minio.Minio(self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider) + self.cos_client = minio.Minio( + self.cos_endpoint.netloc, secure=self.secure, credentials=cred_provider + ) @abstractmethod def execute(self) -> None: """Execute the operation relative to derived class""" - raise NotImplementedError("Method 'execute()' must be implemented by subclasses!") + raise NotImplementedError( + "Method 'execute()' must be implemented by subclasses!" + ) def process_dependencies(self) -> None: """Process dependencies @@ -180,7 +190,9 @@ def process_metrics_and_metadata(self) -> None: # output_path doesn't meet the requirements # treat this as a non-fatal error and log a warning logger.warning(f'Cannot create files in "{output_path}".') - OpUtil.log_operation_info("Aborted metrics and metadata processing", time.time() - t0) + OpUtil.log_operation_info( + "Aborted metrics and metadata processing", time.time() - t0 + ) return # Name of the proprietary KFP UI metadata file. @@ -225,11 +237,15 @@ def process_metrics_and_metadata(self) -> None: except ValueError as ve: # The file content could not be parsed. Log a warning # and treat this as a non-fatal error. - logger.warning(f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}") + logger.warning( + f"Ignoring incompatible {str(src)} produced by {self.filepath}: {ve} {str(ve)}" + ) except Exception as ex: # Something is wrong with the user-generated metadata file. # Log a warning and treat this as a non-fatal error. - logger.warning(f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}") + logger.warning( + f"Error processing {str(src)} produced by {self.filepath}: {ex} {str(ex)}" + ) # # Augment kfp_ui_metadata_filename with Elyra-specific information: @@ -245,13 +261,16 @@ def process_metrics_and_metadata(self) -> None: metadata = {} # Assure the 'output' property exists and is of the correct type - if metadata.get("outputs", None) is None or not isinstance(metadata["outputs"], list): + if metadata.get("outputs", None) is None or not isinstance( + metadata["outputs"], list + ): metadata["outputs"] = [] # Define HREF for COS bucket: # // bucket_url = urljoin( - urlunparse(self.cos_endpoint), f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/" + urlunparse(self.cos_endpoint), + f"{self.cos_bucket}/{self.input_params.get('cos-directory', '')}/", ) # add Elyra metadata to 'outputs' @@ -292,13 +311,20 @@ def get_file_from_object_storage(self, file_to_get: str) -> None: object_to_get = self.get_object_storage_filename(file_to_get) t0 = time.time() - self.cos_client.fget_object(bucket_name=self.cos_bucket, object_name=object_to_get, file_path=file_to_get) + self.cos_client.fget_object( + bucket_name=self.cos_bucket, + object_name=object_to_get, + file_path=file_to_get, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", duration + f"downloaded {file_to_get} from bucket: {self.cos_bucket}, object: {object_to_get}", + duration, ) - def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[str] = None) -> None: + def put_file_to_object_storage( + self, file_to_upload: str, object_name: Optional[str] = None + ) -> None: """Utility function to put files into an object storage :param file_to_upload: filename @@ -311,10 +337,15 @@ def put_file_to_object_storage(self, file_to_upload: str, object_name: Optional[ object_to_upload = self.get_object_storage_filename(object_to_upload) t0 = time.time() - self.cos_client.fput_object(bucket_name=self.cos_bucket, object_name=object_to_upload, file_path=file_to_upload) + self.cos_client.fput_object( + bucket_name=self.cos_bucket, + object_name=object_to_upload, + file_path=file_to_upload, + ) duration = time.time() - t0 OpUtil.log_operation_info( - f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", duration + f"uploaded {file_to_upload} to bucket: {self.cos_bucket} object: {object_to_upload}", + duration, ) def has_wildcard(self, filename): @@ -335,14 +366,18 @@ def process_output_file(self, output_file): else: self.put_file_to_object_storage(matched_file) - def convert_param_str_to_dict(self, pipeline_parameters: Optional[str] = None) -> Dict[str, Any]: + def convert_param_str_to_dict( + self, pipeline_parameters: Optional[str] = None + ) -> Dict[str, Any]: """Convert INOUT-separated string of pipeline parameters into a dictionary.""" parameter_dict = {} if pipeline_parameters: parameter_list = pipeline_parameters.split(INOUT_SEPARATOR) for parameter in parameter_list: param_name, value = parameter.split("=", 1) - if self.parameter_pass_method == "env" and (not value or not isinstance(value, str)): + if self.parameter_pass_method == "env" and ( + not value or not isinstance(value, str) + ): continue # env vars must be non-empty strings parameter_dict[param_name] = value return parameter_dict @@ -366,7 +401,9 @@ def execute(self) -> None: notebook_html = f"{notebook_name}.html" try: - OpUtil.log_operation_info(f"executing notebook using 'papermill {notebook} {notebook_output}'") + OpUtil.log_operation_info( + f"executing notebook using 'papermill {notebook} {notebook_output}'" + ) t0 = time.time() # Include kernel selection in execution time kernel_name = NotebookFileOp.find_best_kernel(notebook) @@ -377,7 +414,9 @@ def execute(self) -> None: import papermill - papermill.execute_notebook(notebook, notebook_output, kernel_name=kernel_name, **kwargs) + papermill.execute_notebook( + notebook, notebook_output, kernel_name=kernel_name, **kwargs + ) duration = time.time() - t0 OpUtil.log_operation_info("notebook execution completed", duration) @@ -485,7 +524,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(python_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("python script execution completed", duration) @@ -511,7 +552,9 @@ def execute(self) -> None: r_script_output = f"{r_script_name}.log" try: - OpUtil.log_operation_info(f"executing R script using 'Rscript {r_script}' to '{r_script_output}'") + OpUtil.log_operation_info( + f"executing R script using 'Rscript {r_script}' to '{r_script_output}'" + ) t0 = time.time() run_args = ["Rscript", r_script] @@ -519,7 +562,9 @@ def execute(self) -> None: self.set_parameters_in_env() with open(r_script_output, "w") as log_file: - subprocess.run(run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True) + subprocess.run( + run_args, stdout=log_file, stderr=subprocess.STDOUT, check=True + ) duration = time.time() - t0 OpUtil.log_operation_info("R script execution completed", duration) @@ -558,7 +603,9 @@ def package_install(cls, user_volume_path) -> None: continue try: version.Version(current_packages[package]) - except version.InvalidVersion: # current version is not PEP-440 compliant + except ( + version.InvalidVersion + ): # current version is not PEP-440 compliant logger.warning( f"WARNING: Source package '{package}' found already installed from " f"{current_packages[package]}. This may conflict with the required " @@ -566,7 +613,9 @@ def package_install(cls, user_volume_path) -> None: ) continue if version.Version(ver) > version.Version(current_packages[package]): - logger.info(f"Updating {package} package from version {current_packages[package]} to {ver}...") + logger.info( + f"Updating {package} package from version {current_packages[package]} to {ver}..." + ) to_install_list.append(f"{package}=={ver}") elif version.Version(ver) < version.Version(current_packages[package]): logger.info( @@ -574,7 +623,9 @@ def package_install(cls, user_volume_path) -> None: f"already installed. Skipping..." ) else: - logger.info(f"Package not found. Installing {package} package with version {ver}...") + logger.info( + f"Package not found. Installing {package} package with version {ver}..." + ) to_install_list.append(f"{package}=={ver}") if to_install_list: @@ -582,7 +633,9 @@ def package_install(cls, user_volume_path) -> None: to_install_list.insert(0, f"--target={user_volume_path}") to_install_list.append("--no-cache-dir") - subprocess.run([sys.executable, "-m", "pip", "install"] + to_install_list, check=True) + subprocess.run( + [sys.executable, "-m", "pip", "install"] + to_install_list, check=True + ) if user_volume_path: os.environ["PIP_CONFIG_FILE"] = f"{user_volume_path}/pip.conf" @@ -609,16 +662,28 @@ def package_list_to_dict(cls, filename: str) -> dict: for line in fh: if line[0] != "#": if " @ " in line: - package_name, package_version = line.strip("\n").split(sep=" @ ") + package_name, package_version = line.strip("\n").split( + sep=" @ " + ) elif "===" in line: - package_name, package_version = line.strip("\n").split(sep="===") + package_name, package_version = line.strip("\n").split( + sep="===" + ) elif "==" in line: package_name, package_version = line.strip("\n").split(sep="==") elif line.startswith("-e ") or line.startswith("--editable "): - package_name = line.strip("\n").replace("-e ", "").replace("--editable ", "") - if "#egg=" in package_name: # editable package from version control system + package_name = ( + line.strip("\n") + .replace("-e ", "") + .replace("--editable ", "") + ) + if ( + "#egg=" in package_name + ): # editable package from version control system package_name = package_name.split("=")[-1] - elif "/" in package_name: # editable package from local directory + elif ( + "/" in package_name + ): # editable package from local directory package_name = os.path.basename(package_name) package_version = None else: @@ -638,10 +703,18 @@ def parse_arguments(cls, args) -> dict: logger.debug("Parsing Arguments.....") parser = argparse.ArgumentParser() parser.add_argument( - "-e", "--cos-endpoint", dest="cos-endpoint", help="Cloud object storage endpoint", required=True + "-e", + "--cos-endpoint", + dest="cos-endpoint", + help="Cloud object storage endpoint", + required=True, ) parser.add_argument( - "-b", "--cos-bucket", dest="cos-bucket", help="Cloud object storage bucket to use", required=True + "-b", + "--cos-bucket", + dest="cos-bucket", + help="Cloud object storage bucket to use", + required=True, ) parser.add_argument( "-d", @@ -657,9 +730,23 @@ def parse_arguments(cls, args) -> dict: help="Archive containing notebook and dependency artifacts", required=True, ) - parser.add_argument("-f", "--file", dest="filepath", help="File to execute", required=True) - parser.add_argument("-o", "--outputs", dest="outputs", help="Files to output to object store", required=False) - parser.add_argument("-i", "--inputs", dest="inputs", help="Files to pull in from parent node", required=False) + parser.add_argument( + "-f", "--file", dest="filepath", help="File to execute", required=True + ) + parser.add_argument( + "-o", + "--outputs", + dest="outputs", + help="Files to output to object store", + required=False, + ) + parser.add_argument( + "-i", + "--inputs", + dest="inputs", + help="Files to pull in from parent node", + required=False, + ) parser.add_argument( "-p", "--user-volume-path", @@ -694,12 +781,16 @@ def parse_arguments(cls, args) -> dict: # set pipeline name as global pipeline_name = parsed_args.get("pipeline-name") # operation/node name is the basename of the non-suffixed filepath, set as global - operation_name = os.path.basename(os.path.splitext(parsed_args.get("filepath"))[0]) + operation_name = os.path.basename( + os.path.splitext(parsed_args.get("filepath"))[0] + ) return parsed_args @classmethod - def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = None) -> None: + def log_operation_info( + cls, action_clause: str, duration_secs: Optional[float] = None + ) -> None: """Produces a formatted log INFO message used entirely for support purposes. This method is intended to be called for any entries that should be captured across aggregated @@ -715,13 +806,17 @@ def log_operation_info(cls, action_clause: str, duration_secs: Optional[float] = global pipeline_name, operation_name if enable_pipeline_info: duration_clause = f"({duration_secs:.3f} secs)" if duration_secs else "" - logger.info(f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}") + logger.info( + f"'{pipeline_name}':'{operation_name}' - {action_clause} {duration_clause}" + ) def main(): # Configure logger format, level logging.basicConfig( - format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", datefmt="%H:%M:%S", level=logging.DEBUG + format="[%(levelname)1.1s %(asctime)s.%(msecs).03d] %(message)s", + datefmt="%H:%M:%S", + level=logging.DEBUG, ) # Setup packages and gather arguments input_params = OpUtil.parse_arguments(sys.argv[1:]) @@ -744,4 +839,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main()