-
Notifications
You must be signed in to change notification settings - Fork 64
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add experimental-jobs-as-code template (#2177)
## Changes Add experimental-jobs-as-code template allowing defining jobs using Python instead of YAML through the `databricks-bundles` PyPI package. ## Tests Manually and acceptance tests.
- Loading branch information
Showing
36 changed files
with
1,182 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 5 additions & 0 deletions
5
acceptance/bundle/templates/experimental-jobs-as-code/input.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
{ | ||
"project_name": "my_jobs_as_code", | ||
"include_notebook": "yes", | ||
"include_python": "yes" | ||
} |
85 changes: 85 additions & 0 deletions
85
acceptance/bundle/templates/experimental-jobs-as-code/output.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
|
||
>>> $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output | ||
|
||
Welcome to (EXPERIMENTAL) "Jobs as code" template for Databricks Asset Bundles! | ||
Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): $DATABRICKS_URL | ||
|
||
✨ Your new project has been created in the 'my_jobs_as_code' directory! | ||
|
||
Please refer to the README.md file for "getting started" instructions. | ||
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html. | ||
|
||
>>> $CLI bundle validate -t dev --output json | ||
{ | ||
"jobs": { | ||
"my_jobs_as_code_job": { | ||
"deployment": { | ||
"kind": "BUNDLE", | ||
"metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/state/metadata.json" | ||
}, | ||
"edit_mode": "UI_LOCKED", | ||
"email_notifications": { | ||
"on_failure": [ | ||
"$USERNAME" | ||
] | ||
}, | ||
"format": "MULTI_TASK", | ||
"job_clusters": [ | ||
{ | ||
"job_cluster_key": "job_cluster", | ||
"new_cluster": { | ||
"autoscale": { | ||
"max_workers": 4, | ||
"min_workers": 1 | ||
}, | ||
"node_type_id": "i3.xlarge", | ||
"spark_version": "15.4.x-scala2.12" | ||
} | ||
} | ||
], | ||
"max_concurrent_runs": 4, | ||
"name": "[dev $USERNAME] my_jobs_as_code_job", | ||
"permissions": [], | ||
"queue": { | ||
"enabled": true | ||
}, | ||
"tags": { | ||
"dev": "$USERNAME" | ||
}, | ||
"tasks": [ | ||
{ | ||
"job_cluster_key": "job_cluster", | ||
"notebook_task": { | ||
"notebook_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/files/src/notebook" | ||
}, | ||
"task_key": "notebook_task" | ||
}, | ||
{ | ||
"depends_on": [ | ||
{ | ||
"task_key": "notebook_task" | ||
} | ||
], | ||
"job_cluster_key": "job_cluster", | ||
"libraries": [ | ||
{ | ||
"whl": "dist/*.whl" | ||
} | ||
], | ||
"python_wheel_task": { | ||
"entry_point": "main", | ||
"package_name": "my_jobs_as_code" | ||
}, | ||
"task_key": "main_task" | ||
} | ||
], | ||
"trigger": { | ||
"pause_status": "PAUSED", | ||
"periodic": { | ||
"interval": 1, | ||
"unit": "DAYS" | ||
} | ||
} | ||
} | ||
} | ||
} |
8 changes: 8 additions & 0 deletions
8
acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
.databricks/ | ||
build/ | ||
dist/ | ||
__pycache__/ | ||
*.egg-info | ||
.venv/ | ||
scratch/** | ||
!scratch/README.md |
58 changes: 58 additions & 0 deletions
58
...nce/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# my_jobs_as_code | ||
|
||
The 'my_jobs_as_code' project was generated by using the "Jobs as code" template. | ||
|
||
## Prerequisites | ||
|
||
1. Install Databricks CLI 0.238 or later. | ||
See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html). | ||
|
||
2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/). | ||
We use uv to create a virtual environment and install the required dependencies. | ||
|
||
3. Authenticate to your Databricks workspace if you have not done so already: | ||
``` | ||
$ databricks configure | ||
``` | ||
4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from | ||
https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for | ||
**Databricks Connect** for instructions on running the included Python code from a different IDE. | ||
5. For documentation on the Databricks Asset Bundles format used | ||
for this project, and for CI/CD configuration, see | ||
https://docs.databricks.com/dev-tools/bundles/index.html. | ||
## Deploy and run jobs | ||
1. Create a new virtual environment and install the required dependencies: | ||
``` | ||
$ uv sync | ||
``` | ||
2. To deploy the bundle to the development target: | ||
``` | ||
$ databricks bundle deploy --target dev | ||
``` | ||
*(Note that "dev" is the default target, so the `--target` parameter is optional here.)* | ||
This deploys everything that's defined for this project. | ||
For example, the default template would deploy a job called | ||
`[dev yourname] my_jobs_as_code_job` to your workspace. | ||
You can find that job by opening your workspace and clicking on **Workflows**. | ||
3. Similarly, to deploy a production copy, type: | ||
``` | ||
$ databricks bundle deploy --target prod | ||
``` | ||
Note that the default job from the template has a schedule that runs every day | ||
(defined in resources/my_jobs_as_code_job.py). The schedule | ||
is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes]( | ||
https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)). | ||
4. To run a job: | ||
``` | ||
$ databricks bundle run | ||
``` |
48 changes: 48 additions & 0 deletions
48
acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# This is a Databricks asset bundle definition for my_jobs_as_code. | ||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. | ||
bundle: | ||
name: my_jobs_as_code | ||
uuid: <UUID> | ||
|
||
experimental: | ||
python: | ||
# Activate virtual environment before loading resources defined in Python. | ||
# If disabled, defaults to using the Python interpreter available in the current shell. | ||
venv_path: .venv | ||
# Functions called to load resources defined in Python. See resources/__init__.py | ||
resources: | ||
- "resources:load_resources" | ||
|
||
artifacts: | ||
default: | ||
type: whl | ||
path: . | ||
# We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.) | ||
# to ensure that changes to wheel package are picked up when used on all-purpose clusters | ||
build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build | ||
|
||
include: | ||
- resources/*.yml | ||
|
||
targets: | ||
dev: | ||
# The default target uses 'mode: development' to create a development copy. | ||
# - Deployed resources get prefixed with '[dev my_user_name]' | ||
# - Any job schedules and triggers are paused by default. | ||
# See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. | ||
mode: development | ||
default: true | ||
workspace: | ||
host: $DATABRICKS_URL | ||
|
||
prod: | ||
mode: production | ||
workspace: | ||
host: $DATABRICKS_URL | ||
# We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy. | ||
root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target} | ||
permissions: | ||
- user_name: $USERNAME | ||
level: CAN_MANAGE | ||
run_as: | ||
user_name: $USERNAME |
22 changes: 22 additions & 0 deletions
22
...tance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Fixtures | ||
|
||
This folder is reserved for fixtures, such as CSV files. | ||
|
||
Below is an example of how to load fixtures as a data frame: | ||
|
||
``` | ||
import pandas as pd | ||
import os | ||
|
||
def get_absolute_path(*relative_parts): | ||
if 'dbutils' in globals(): | ||
base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore | ||
path = os.path.normpath(os.path.join(base_dir, *relative_parts)) | ||
return path if path.startswith("/Workspace") else "/Workspace" + path | ||
else: | ||
return os.path.join(*relative_parts) | ||
|
||
csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") | ||
df = pd.read_csv(csv_file) | ||
display(df) | ||
``` |
49 changes: 49 additions & 0 deletions
49
acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
[build-system] | ||
requires = ["setuptools>=61.0"] | ||
build-backend = "setuptools.build_meta" | ||
|
||
[project] | ||
name = "my_jobs_as_code" | ||
requires-python = ">=3.10" | ||
description = "wheel file based on my_jobs_as_code" | ||
|
||
# Dependencies in case the output wheel file is used as a library dependency. | ||
# For defining dependencies, when this package is used in Databricks, see: | ||
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html | ||
# | ||
# Example: | ||
# dependencies = [ | ||
# "requests==x.y.z", | ||
# ] | ||
dependencies = [ | ||
] | ||
|
||
# see setup.py | ||
dynamic = ["version"] | ||
|
||
[project.entry-points.packages] | ||
main = "my_jobs_as_code.main:main" | ||
|
||
[tool.setuptools] | ||
py-modules = ["resources", "my_jobs_as_code"] | ||
|
||
[tool.uv] | ||
## Dependencies for local development | ||
dev-dependencies = [ | ||
"databricks-bundles==0.7.0", | ||
|
||
## Add code completion support for DLT | ||
# "databricks-dlt", | ||
|
||
## databricks-connect can be used to run parts of this project locally. | ||
## See https://docs.databricks.com/dev-tools/databricks-connect.html. | ||
## | ||
## Uncomment line below to install a version of db-connect that corresponds to | ||
## the Databricks Runtime version used for this project. | ||
# "databricks-connect>=15.4,<15.5", | ||
] | ||
|
||
override-dependencies = [ | ||
# pyspark package conflicts with 'databricks-connect' | ||
"pyspark; sys_platform == 'never'", | ||
] |
16 changes: 16 additions & 0 deletions
16
...e/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from databricks.bundles.core import ( | ||
Bundle, | ||
Resources, | ||
load_resources_from_current_package_module, | ||
) | ||
|
||
|
||
def load_resources(bundle: Bundle) -> Resources: | ||
""" | ||
'load_resources' function is referenced in databricks.yml and is responsible for loading | ||
bundle resources defined in Python code. This function is called by Databricks CLI during | ||
bundle deployment. After deployment, this function is not used. | ||
""" | ||
|
||
# the default implementation loads all Python files in 'resources' directory | ||
return load_resources_from_current_package_module() |
Oops, something went wrong.