Refactor Handling of Name<->ID Mapping in OpenAPI Generator (#547)

## Changes This PR proposes a refactor to the x-databricks-id and x-databricks-name extensions to the OpenAPI specification to support schema reuse while still providing the name <-> ID mapping features. Many resources in Databricks have user-facing names (like job name, cluster name, etc.). However, for many API calls, this name cannot be used to identify the resource; instead, a system-generated resource ID must be used instead (like job ID, cluster ID, etc.). For resources that support listing, the presence of these two extensions in the response body triggers the generation of additional code in the SDKs as convenience features for interacting with these resources, namely: * GetByName methods that allow looking up a resource by its user-facing name, and * XToYMap methods that materialize a mapping from name to resource. Previously, these extensions were defined at the level of a schema, where a field within a schema marked with these extensions would be a candidate for the above functionality. However, there is a problem with this when a component is reused in multiple APIs. First, it is possible to define a single field as a component. This is useful if the field is present in different related resources but the documentation/type of the field should be consistent. Beyond this, it is possible for a list response to contain multiple fields corresponding to IDs to Databricks resources, but in this scenario, only one of those IDs is actually the ID of the underlying resource. If the response body contains two or more fields marked with x-databricks-id or x-databricks-name, it is impossible for our code generator to know which was intended as the ID/Name for the resource. Furthermore, there are instances where these extensions are not used correctly: users would annotate path parameters with these extensions, but these only make sense in the context of a list response. Ultimately, the fields used for name and ID are specific to the operation itself and not to the schema. This allows multiple ID/name fields to be reused as components within a listing response, and the definition of which to use for name-to-ID mapping is fixed at the operation level. This way, different operations can define different fields to use for this mapping. This PR implements this change by moving the name & ID fields from the Schema level to the Operation level. The type of this extension changes from a boolean to a list of strings, corresponding to the path of fields to be traversed to get to the respective field in the response entity. The starting point for this traversal is the underlying resource itself, as opposed to the response body. Additionally, it adds support for nested ID fields (by preserving the implementation between the treatment of name and ID). Unfortunately, it also updates the Go SDK to a relatively recently OpenAPI commit as well, so there are additional changes here. Next steps: - [ ] Update OpenAPI spec in Universe, refactoring these extensions to the Operation level. - [ ] Update Go SDK with this change. - [ ] Update Python SDK with this change. - [ ] Update Java SDK with this change. - [ ] Update go/openapi/spec with this change. ## Tests  - [ ] `make test` passing - [ ] `make fmt` applied - [ ] relevant integration tests applied
databricks · Jul 12, 2023 · f84de61 · f84de61
1 parent c37a894
commit f84de61
Show file tree

Hide file tree

Showing 27 changed files with 926 additions and 203 deletions.
diff --git a/.codegen/api.go.tmpl b/.codegen/api.go.tmpl
@@ -168,9 +168,9 @@ func (a *{{.Service.Name}}API) {{.PascalName}}All(ctx context.Context{{if .Reque
 	var totalCount {{template "type" .Pagination.Limit.Entity}} = 0
 	{{ end -}}
 	ctx = useragent.InContext(ctx, "sdk-feature", "pagination")
-	{{if .Pagination.NeedsOffsetDedupe -}}
+	{{if .NeedsOffsetDedupe -}}
 	// deduplicate items that may have been added during iteration
-	seen := map[{{template "type" .Pagination.Entity.IdentifierField.Entity}}]bool{}
+	seen := map[{{template "type" .IdentifierField.Entity}}]bool{}
 	{{end}}{{if eq .Pagination.Increment 1 -}}
 	request.{{.Pagination.Offset.PascalName}} = 1 // start iterating from the first page
 	{{end}}for {
@@ -182,8 +182,8 @@ func (a *{{.Service.Name}}API) {{.PascalName}}All(ctx context.Context{{if .Reque
 			break
 		}
 		for _, v := range response.{{.Pagination.Results.PascalName}} {
-			{{- if .Pagination.NeedsOffsetDedupe -}}
-			id := v.{{.Pagination.Entity.IdentifierField.PascalName}}
+			{{- if .NeedsOffsetDedupe -}}
+			id := v{{ template "field-path" .IdFieldPath }}
 			if seen[id] {
 				// item was added during iteration
 				continue
@@ -228,9 +228,9 @@ func (a *{{.Service.Name}}API) {{.PascalName}}All(ctx context.Context{{if .Reque
 	return response.{{.Pagination.Results.PascalName}}, nil{{else}}return a.impl.{{.PascalName}}(ctx, request){{end}}
 }
 {{end}}{{if .NamedIdMap}}
-// {{.NamedIdMap.PascalName}} calls [{{.Service.Name}}API.{{.PascalName}}{{if not .NamedIdMap.Direct}}All{{end -}}] and creates a map of results with [{{.NamedIdMap.Entity.PascalName}}]{{range .NamedIdMap.NamePath}}.{{.PascalName}}{{end}} as key and [{{.NamedIdMap.Entity.PascalName}}].{{.NamedIdMap.Id.PascalName}} as value.
+// {{.NamedIdMap.PascalName}} calls [{{.Service.Name}}API.{{.PascalName}}{{if not .NamedIdMap.Direct}}All{{end -}}] and creates a map of results with [{{.NamedIdMap.Entity.PascalName}}]{{ template "field-path" .NamedIdMap.NamePath }} as key and [{{.NamedIdMap.Entity.PascalName}}]{{ template "field-path" .NamedIdMap.IdPath}} as value.
 //
-// Returns an error if there's more than one [{{.NamedIdMap.Entity.PascalName}}] with the same {{range .NamedIdMap.NamePath}}.{{.PascalName}}{{end}}.
+// Returns an error if there's more than one [{{.NamedIdMap.Entity.PascalName}}] with the same {{ template "field-path" .NamedIdMap.NamePath }}.
 //
 // Note: All [{{.NamedIdMap.Entity.PascalName}}] instances are loaded into memory before creating a map.
 // 
@@ -243,19 +243,19 @@ func (a *{{.Service.Name}}API) {{.NamedIdMap.PascalName}}(ctx context.Context{{i
 		return nil, err
 	}
 	for _, v := range result {
-		key := v{{range .NamedIdMap.NamePath}}.{{.PascalName}}{{end}}
+		key := v{{ template "field-path" .NamedIdMap.NamePath }}
 		_, duplicate := mapping[key]
 		if duplicate {
-			return nil, fmt.Errorf("duplicate {{range .NamedIdMap.NamePath}}.{{.PascalName}}{{end}}: %s", key)
+			return nil, fmt.Errorf("duplicate {{ template "field-path" .NamedIdMap.NamePath }}: %s", key)
 		}
-		mapping[key] = v.{{.NamedIdMap.Id.PascalName}}
+		mapping[key] = v{{ template "field-path" .NamedIdMap.IdPath }}
 	}
 	return mapping, nil
 }
 {{end}}{{if .GetByName}}
 // GetBy{{range .NamedIdMap.NamePath}}{{.PascalName}}{{end}} calls [{{.Service.Name}}API.{{.NamedIdMap.Entity.PascalName}}{{range .NamedIdMap.NamePath}}{{.PascalName}}{{end}}To{{.NamedIdMap.Id.PascalName}}Map] and returns a single [{{.GetByName.PascalName}}].
 //
-// Returns an error if there's more than one [{{.NamedIdMap.Entity.PascalName}}] with the same {{range .NamedIdMap.NamePath}}.{{.PascalName}}{{end}}.
+// Returns an error if there's more than one [{{.NamedIdMap.Entity.PascalName}}] with the same {{ template "field-path" .NamedIdMap.NamePath }}.
 //
 // Note: All [{{.NamedIdMap.Entity.PascalName}}] instances are loaded into memory before returning matching by name.
 // 
@@ -268,7 +268,7 @@ func (a *{{.Service.Name}}API) GetBy{{range .NamedIdMap.NamePath}}{{.PascalName}
 	}
 	tmp := map[string][]{{.GetByName.PascalName}}{}
 	for _, v := range result {
-		key := v{{range .NamedIdMap.NamePath}}.{{.PascalName}}{{end}}
+		key := v{{ template "field-path" .NamedIdMap.NamePath }}
 		tmp[key] = append(tmp[key], v)
 	}
 	alternatives, ok := tmp[name]
@@ -298,4 +298,7 @@ func (a *{{.Service.Name}}API) {{.Shortcut.PascalName}}AndWait(ctx context.Conte
 {{end}}
 {{end -}}
 
-{{- end}}
+{{- end}}
+{{- define "field-path" -}}
+	{{- range .}}.{{.PascalName}}{{end}}
+{{- end -}}
diff --git a/account_client.go b/account_client.go
diff --git a/openapi/code/entity.go b/openapi/code/entity.go
@@ -57,12 +57,6 @@ type Entity struct {
 	// this field does not have a concrete type
 	IsAny bool
 
-	// this field holds the identifier of the entity
-	IsIdentifier bool
-
-	// this field holds a name of the entity
-	IsName bool
-
 	// this field is computed on the platform side
 	IsComputed bool
 
@@ -111,6 +105,26 @@ func (e *Entity) Field(name string) *Field {
 	return &field
 }
 
+// Given a list of field names, return the list of *Field objects which result
+// from following the path of fields in the entity.
+func (e *Entity) GetUnderlyingFields(path []string) ([]*Field, error) {
+	if len(path) == 0 {
+		return nil, fmt.Errorf("empty path is not allowed (entity: %s)", e.FullName())
+	}
+	if len(path) == 1 {
+		return []*Field{e.Field(path[0])}, nil
+	}
+	field := e.Field(path[0])
+	if field == nil {
+		return nil, fmt.Errorf("field %s not found in entity %s", path[0], e.FullName())
+	}
+	rest, err := field.Entity.GetUnderlyingFields(path[1:])
+	if err != nil {
+		return nil, err
+	}
+	return append([]*Field{field}, rest...), nil
+}
+
 // IsObject returns true if entity is not a Mpa and has more than zero fields
 func (e *Entity) IsObject() bool {
 	return e.MapValue == nil && len(e.fields) > 0
@@ -182,42 +196,6 @@ func (e *Entity) HasJsonField() bool {
 	return false
 }
 
-// Does this type have x-databricks-id field?
-func (e *Entity) HasIdentifierField() bool {
-	return e.IdentifierField() != nil
-}
-
-// Return field with x-databricks-id
-func (e *Entity) IdentifierField() *Field {
-	for _, v := range e.fields {
-		if v.Entity.IsIdentifier {
-			return &v
-		}
-	}
-	return nil
-}
-
-// Does this type have x-databricks-name field?
-func (e *Entity) HasNameField() bool {
-	for _, v := range e.fields {
-		if v.Entity.IsName {
-			return true
-		}
-	}
-	return false
-}
-
-// Does this type have a single x-databricks-name field?
-func (e *Entity) HasSingleNameField() bool {
-	count := 0
-	for _, v := range e.fields {
-		if v.Entity.IsName {
-			count++
-		}
-	}
-	return (count == 1)
-}
-
 // Enum returns all entries for enum entities
 func (e *Entity) Enum() (enum []EnumEntry) {
 	for _, v := range e.enum {

diff --git a/openapi/code/method.go b/openapi/code/method.go
@@ -23,10 +23,19 @@ type Method struct {
 	// Response type representation
 	Response          *Entity
 	EmptyResponseName Named
-	wait              *openapi.Wait
-	pagination        *openapi.Pagination
-	operation         *openapi.Operation
-	shortcut          bool
+
+	// For list APIs, the path of fields in the response entity to follow to get
+	// the resource ID.
+	IdFieldPath []*Field
+
+	// For list APIs, the path of fields in the response entity to follow to get
+	// the user-friendly name of the resource.
+	NameFieldPath []*Field
+
+	wait       *openapi.Wait
+	pagination *openapi.Pagination
+	operation  *openapi.Operation
+	shortcut   bool
 }
 
 // Shortcut holds definition of "shortcut" methods, that are generated for
@@ -58,7 +67,7 @@ type Pagination struct {
 // drop-downs or any other selectors.
 type NamedIdMap struct {
 	Named
-	Id       *Field
+	IdPath   []*Field
 	NamePath []*Field
 	Entity   *Entity
 
@@ -158,6 +167,21 @@ func (m *Method) IsJsonOnly() bool {
 	return m.operation.JsonOnly
 }
 
+func (m *Method) HasIdentifierField() bool {
+	return len(m.IdFieldPath) > 0
+}
+
+func (m *Method) IdentifierField() *Field {
+	if !m.HasIdentifierField() {
+		return nil
+	}
+	return m.IdFieldPath[len(m.IdFieldPath)-1]
+}
+
+func (m *Method) HasNameField() bool {
+	return len(m.NameFieldPath) > 0
+}
+
 // Wait returns definition for long-running operation
 func (m *Method) Wait() *Wait {
 	if m.wait == nil {
@@ -228,8 +252,9 @@ func (m *Method) paginationItem() *Entity {
 	return p.Entity
 }
 
-func (p *Pagination) NeedsOffsetDedupe() bool {
-	return p.Offset != nil && p.Entity.HasIdentifierField()
+func (m *Method) NeedsOffsetDedupe() bool {
+	p := m.Pagination()
+	return p.Offset != nil && m.HasIdentifierField()
 }
 
 func (p *Pagination) MultiRequest() bool {
@@ -240,66 +265,32 @@ func (p *Pagination) MultiRequest() bool {
 // entities of a type
 func (m *Method) NamedIdMap() *NamedIdMap {
 	entity := m.paginationItem()
-	if entity == nil {
-		return nil
-	}
-	if !entity.HasIdentifierField() {
-		return nil
-	}
-	if !entity.HasSingleNameField() {
-		return nil
-	}
-	var id *Field
-	var namePath []*Field
-	for _, f := range entity.fields {
-		if f.Schema == nil {
-			continue
-		}
-		local := f
-		if f.Schema.IsIdentifier {
-			id = &local
-		}
-		if f.Entity.IsName {
-			namePath = append(namePath, &local)
-			if !f.Entity.IsObject() {
-				continue
-			}
-			if !f.Entity.HasNameField() {
-				continue
-			}
-			// job list: {"id": 1234, "settings": {"name": "..."}}
-			for _, innerField := range f.Entity.fields {
-				if innerField.Schema == nil {
-					continue
-				}
-				if innerField.Schema.IsName {
-					local2 := innerField
-					namePath = append(namePath, &local2)
-				}
-			}
-		}
-	}
-	if len(namePath) == 0 {
+	if entity == nil || !m.HasIdentifierField() || !m.HasNameField() {
 		return nil
 	}
+	namePath := m.NameFieldPath
 	nameParts := []string{entity.PascalName()}
 	for _, v := range namePath {
 		nameParts = append(nameParts, v.PascalName())
 	}
 	nameParts = append(nameParts, "To")
-	nameParts = append(nameParts, id.PascalName())
+	nameParts = append(nameParts, m.IdentifierField().PascalName())
 	nameParts = append(nameParts, "Map")
 	return &NamedIdMap{
 		Named: Named{
 			Name: strings.Join(nameParts, ""),
 		},
-		Id:       id,
+		IdPath:   m.IdFieldPath,
 		NamePath: namePath,
 		Entity:   entity,
 		Direct:   m.Response.ArrayValue != nil,
 	}
 }
 
+func (n *NamedIdMap) Id() *Field {
+	return n.IdPath[len(n.IdPath)-1]
+}
+
 // GetByName returns entity from the same service with x-databricks-crud:read
 func (m *Method) GetByName() *Entity {
 	n := m.NamedIdMap()

diff --git a/openapi/code/package.go b/openapi/code/package.go
@@ -153,8 +153,6 @@ func (pkg *Package) schemaToEntity(s *openapi.Schema, path []string, hasName boo
 	}
 	e.IsEmpty = s.IsEmpty()
 	e.IsAny = s.IsAny || s.Type == "object" && s.IsEmpty()
-	e.IsIdentifier = s.IsIdentifier
-	e.IsName = s.IsName
 	e.IsComputed = s.IsComputed
 	e.RequiredOrder = s.Required
 	// enum

diff --git a/openapi/code/service.go b/openapi/code/service.go
@@ -256,6 +256,23 @@ func (svc *Service) newMethod(verb, path string, params []openapi.Parameter, op
 		}
 		description = fmt.Sprintf("%s\n\n%s", summary, description)
 	}
+
+	var nameFieldPath, idFieldPath []*Field
+	respEntity := getPaginationEntity(response, op.Pagination)
+	if op.HasNameField() && respEntity != nil {
+		nameField, err := respEntity.GetUnderlyingFields(op.NameField)
+		if err != nil {
+			panic(fmt.Errorf("[%s] could not find name field %q: %w", op.OperationId, op.NameField, err))
+		}
+		nameFieldPath = nameField
+	}
+	if op.HasIdentifierField() && respEntity != nil {
+		idField, err := respEntity.GetUnderlyingFields(op.IdField)
+		if err != nil {
+			panic(fmt.Errorf("[%s] could not find id field %q: %w", op.OperationId, op.IdField, err))
+		}
+		idFieldPath = idField
+	}
 	return &Method{
 		Named:             Named{name, description},
 		Service:           svc,
@@ -265,6 +282,8 @@ func (svc *Service) newMethod(verb, path string, params []openapi.Parameter, op
 		PathParts:         svc.paramPath(path, request, params),
 		Response:          response,
 		EmptyResponseName: emptyResponse,
+		NameFieldPath:     nameFieldPath,
+		IdFieldPath:       idFieldPath,
 		wait:              op.Wait,
 		operation:         op,
 		pagination:        op.Pagination,
@@ -309,3 +328,13 @@ func (svc *Service) IsPrivatePreview() bool {
 func (svc *Service) IsPublicPreview() bool {
 	return isPublicPreview(&svc.tag.Node)
 }
+
+func getPaginationEntity(entity *Entity, pagination *openapi.Pagination) *Entity {
+	if pagination == nil {
+		return nil
+	}
+	if pagination.Inline {
+		return entity.ArrayValue
+	}
+	return entity.Field(pagination.Results).Entity.ArrayValue
+}