Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First look for databricks.yml before falling back to bundle.yml #580

Merged
merged 5 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion bundle/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ type Bundle struct {

func Load(path string) (*Bundle, error) {
bundle := &Bundle{}
err := bundle.Config.Load(filepath.Join(path, config.FileName))
configFile, err := config.FileNames.FindInPath(path)
if err != nil {
return nil, err
}
err = bundle.Config.Load(configFile)
if err != nil {
return nil, err
}
Expand Down
4 changes: 2 additions & 2 deletions bundle/bundle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func TestLoadExists(t *testing.T) {

func TestBundleCacheDir(t *testing.T) {
projectDir := t.TempDir()
f1, err := os.Create(filepath.Join(projectDir, "bundle.yml"))
fjakobs marked this conversation as resolved.
Show resolved Hide resolved
f1, err := os.Create(filepath.Join(projectDir, "databricks.yml"))
require.NoError(t, err)
f1.Close()

Expand All @@ -47,7 +47,7 @@ func TestBundleCacheDir(t *testing.T) {
func TestBundleCacheDirOverride(t *testing.T) {
projectDir := t.TempDir()
bundleTmpDir := t.TempDir()
f1, err := os.Create(filepath.Join(projectDir, "bundle.yml"))
f1, err := os.Create(filepath.Join(projectDir, "databricks.yml"))
require.NoError(t, err)
f1.Close()

Expand Down
6 changes: 4 additions & 2 deletions bundle/config/mutator/process_root_includes.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) error
var out []bundle.Mutator

// Map with files we've already seen to avoid loading them twice.
var seen = map[string]bool{
config.FileName: true,
var seen = map[string]bool{}

for _, file := range config.FileNames {
seen[file] = true
}

// Maintain list of files in order of files being loaded.
Expand Down
2 changes: 1 addition & 1 deletion bundle/config/mutator/process_root_includes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func TestProcessRootIncludesSingleGlob(t *testing.T) {
},
}

touch(t, bundle.Config.Path, "bundle.yml")
touch(t, bundle.Config.Path, "databricks.yml")
touch(t, bundle.Config.Path, "a.yml")
touch(t, bundle.Config.Path, "b.yml")

Expand Down
40 changes: 35 additions & 5 deletions bundle/config/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,39 @@ import (
"github.com/imdario/mergo"
)

// FileName is the name of bundle configuration file.
const FileName = "bundle.yml"
type ConfigFileNames []string

// FileNames contains allowed names of bundle configuration files.
var FileNames = ConfigFileNames{"databricks.yml", "databricks.yaml", "bundle.yml", "bundle.yaml"}

func (c ConfigFileNames) FindInPath(path string) (string, error) {
result := ""
var firstErr error

for _, file := range c {
filePath := filepath.Join(path, file)
_, err := os.Stat(filePath)
if err == nil {
if result != "" {
return "", fmt.Errorf("multiple bundle root configuration files found in %s", path)
}
result = filePath
} else {
if firstErr == nil {
firstErr = err
}
}
}

if result == "" {
return "", firstErr
}
return result, nil
}

type Root struct {
// Path contains the directory path to the root of the bundle.
// It is set when loading `bundle.yml`.
// It is set when loading `databricks.yml`.
Path string `json:"-" bundle:"readonly"`

// Contains user defined variables
Expand All @@ -27,7 +54,7 @@ type Root struct {
Bundle Bundle `json:"bundle"`

// Include specifies a list of patterns of file names to load and
// merge into the this configuration. If not set in `bundle.yml`,
// merge into the this configuration. If not set in `databricks.yml`,
// it defaults to loading `*.yml` and `*/*.yml`.
//
// Also see [mutator.DefineDefaultInclude].
Expand Down Expand Up @@ -62,7 +89,10 @@ func Load(path string) (*Root, error) {

// If we were given a directory, assume this is the bundle root.
if stat.IsDir() {
path = filepath.Join(path, FileName)
path, err = FileNames.FindInPath(path)
if err != nil {
return nil, err
}
}

if err := r.Load(path); err != nil {
Expand Down
73 changes: 68 additions & 5 deletions bundle/config/root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ package config

import (
"encoding/json"
"os"
"path/filepath"
"reflect"
"runtime"
"strings"
"testing"

"github.com/databricks/cli/bundle/config/variable"
Expand All @@ -26,7 +30,7 @@ func TestRootMarshalUnmarshal(t *testing.T) {

func TestRootLoad(t *testing.T) {
root := &Root{}
err := root.Load("../tests/basic/bundle.yml")
err := root.Load("../tests/basic/databricks.yml")
require.NoError(t, err)
assert.Equal(t, "basic", root.Bundle.Name)
}
Expand Down Expand Up @@ -78,21 +82,21 @@ func TestRootMergeMap(t *testing.T) {

func TestDuplicateIdOnLoadReturnsError(t *testing.T) {
root := &Root{}
err := root.Load("./testdata/duplicate_resource_names_in_root/bundle.yml")
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/bundle.yml, pipeline at ./testdata/duplicate_resource_names_in_root/bundle.yml)")
err := root.Load("./testdata/duplicate_resource_names_in_root/databricks.yml")
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/databricks.yml, pipeline at ./testdata/duplicate_resource_names_in_root/databricks.yml)")
}

func TestDuplicateIdOnMergeReturnsError(t *testing.T) {
root := &Root{}
err := root.Load("./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml")
err := root.Load("./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml")
require.NoError(t, err)

other := &Root{}
err = other.Load("./testdata/duplicate_resource_name_in_subconfiguration/resources.yml")
require.NoError(t, err)

err = root.Merge(other)
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)")
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)")
}

func TestInitializeVariables(t *testing.T) {
Expand Down Expand Up @@ -163,3 +167,62 @@ func TestRootMergeEnvironmentWithMode(t *testing.T) {
require.NoError(t, root.MergeEnvironment(env))
assert.Equal(t, Development, root.Bundle.Mode)
}

func TestConfigFileNames_FindInPath(t *testing.T) {
testCases := []struct {
name string
files []string
expected string
err string
}{
{
name: "file found",
files: []string{"databricks.yml"},
expected: "BASE/databricks.yml",
err: "",
},
fjakobs marked this conversation as resolved.
Show resolved Hide resolved
{
name: "file found",
files: []string{"bundle.yml"},
expected: "BASE/bundle.yml",
err: "",
},
{
name: "multiple files found",
files: []string{"databricks.yaml", "bundle.yml"},
expected: "",
err: "multiple bundle root configuration files found",
},
{
name: "file not found",
files: []string{},
expected: "",
err: "no such file or directory",
},
}

if runtime.GOOS == "windows" {
testCases[3].err = "The system cannot find the file specified."
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
projectDir := t.TempDir()
for _, file := range tc.files {
f1, _ := os.Create(filepath.Join(projectDir, file))
f1.Close()
}

result, err := FileNames.FindInPath(projectDir)

expected := strings.Replace(tc.expected, "BASE/", projectDir+string(os.PathSeparator), 1)
assert.Equal(t, expected, result)

if tc.err != "" {
assert.ErrorContains(t, err, tc.err)
} else {
assert.NoError(t, err)
}
})
}
}
12 changes: 8 additions & 4 deletions bundle/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,15 @@ func getRootWithTraversal() (string, error) {
if err != nil {
return "", err
}
path, err := folders.FindDirWithLeaf(wd, config.FileName)
if err != nil {
return "", fmt.Errorf(`unable to locate bundle root: %s not found`, config.FileName)

for _, file := range config.FileNames {
path, err := folders.FindDirWithLeaf(wd, file)
if err == nil {
return path, nil
}
}
return path, nil

return "", fmt.Errorf(`unable to locate bundle root: %s not found`, config.FileNames[0])
}

// mustGetRoot returns a bundle root or an error if one cannot be found.
Expand Down
4 changes: 2 additions & 2 deletions bundle/root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ func TestRootLookup(t *testing.T) {

chdir(t, t.TempDir())

// Create bundle.yml file.
f, err := os.Create(config.FileName)
// Create databricks.yml file.
f, err := os.Create(config.FileNames[0])
pietern marked this conversation as resolved.
Show resolved Hide resolved
require.NoError(t, err)
defer f.Close()

Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions bundle/tests/conflicting_resource_ids_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ import (

func TestConflictingResourceIdsNoSubconfig(t *testing.T) {
_, err := bundle.Load("./conflicting_resource_ids/no_subconfigurations")
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/no_subconfigurations/bundle.yml")
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/no_subconfigurations/databricks.yml")
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, bundleConfigPath))
}

func TestConflictingResourceIdsOneSubconfig(t *testing.T) {
b, err := bundle.Load("./conflicting_resource_ids/one_subconfiguration")
require.NoError(t, err)
err = bundle.Apply(context.Background(), b, bundle.Seq(mutator.DefaultMutators()...))
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/bundle.yml")
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/databricks.yml")
resourcesConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/resources.yml")
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, resourcesConfigPath))
}
Expand Down
8 changes: 4 additions & 4 deletions bundle/tests/job_and_pipeline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ func TestJobAndPipelineDevelopment(t *testing.T) {
assert.Len(t, b.Config.Resources.Pipelines, 1)

p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(p.ConfigFilePath))
assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(p.ConfigFilePath))
assert.Equal(t, b.Config.Bundle.Mode, config.Development)
assert.True(t, p.Development)
require.Len(t, p.Libraries, 1)
Expand All @@ -29,7 +29,7 @@ func TestJobAndPipelineStaging(t *testing.T) {
assert.Len(t, b.Config.Resources.Pipelines, 1)

p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(p.ConfigFilePath))
assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(p.ConfigFilePath))
assert.False(t, p.Development)
require.Len(t, p.Libraries, 1)
assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path)
Expand All @@ -42,14 +42,14 @@ func TestJobAndPipelineProduction(t *testing.T) {
assert.Len(t, b.Config.Resources.Pipelines, 1)

p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(p.ConfigFilePath))
assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(p.ConfigFilePath))
assert.False(t, p.Development)
require.Len(t, p.Libraries, 1)
assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path)
assert.Equal(t, "nyc_taxi_production", p.Target)

j := b.Config.Resources.Jobs["pipeline_schedule"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(j.ConfigFilePath))
assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(j.ConfigFilePath))
assert.Equal(t, "Daily refresh of production pipeline", j.Name)
require.Len(t, j.Tasks, 1)
assert.NotEmpty(t, j.Tasks[0].PipelineTask.PipelineId)
Expand Down
2 changes: 1 addition & 1 deletion cmd/sync/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ var syncCmd = &cobra.Command{

//
// To be uncommented and used once our VS Code extension is bundle aware.
// Until then, this could interfere with extension usage where a `bundle.yml` file is present.
// Until then, this could interfere with extension usage where a `databricks.yml` file is present.
// See https://github.com/databricks/cli/pull/207.
//
// b := bundle.GetOrNil(cmd.Context())
Expand Down