Skip to content

Commit

Permalink
feat: introduce compound (parameterizable) extension types and variat…
Browse files Browse the repository at this point in the history
…ions
  • Loading branch information
jvanstraten committed Jun 21, 2022
1 parent af0b452 commit df1df3c
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 2 deletions.
8 changes: 8 additions & 0 deletions proto/substrait/algebra.proto
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,10 @@ message Expression {
// directly declare the type variation).
uint32 type_variation_reference = 51;

// The parameters to be bound to the type variation, if a type variation is
// specified and it is defined to be parameterizable.
repeated Type.Parameter type_variation_parameters = 52;

message VarChar {
string value = 1;
uint32 length = 2;
Expand Down Expand Up @@ -472,6 +476,10 @@ message Expression {
// points to a type_anchor defined in this plan
uint32 type_reference = 1;

// The parameters to be bound to the type class, if the type class is
// parameterizable.
repeated Type.Parameter type_parameters = 3;

// the value of the literal, serialized using some type-specific
// protobuf message
google.protobuf.Any value = 2;
Expand Down
44 changes: 44 additions & 0 deletions proto/substrait/type.proto
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ syntax = "proto3";

package substrait;

import "google/protobuf/empty.proto";

option csharp_namespace = "Substrait.Protobuf";
option go_package = "github.com/substrait-io/substrait-go/proto";
option java_multiple_files = true;
Expand Down Expand Up @@ -53,133 +55,175 @@ message Type {

message Boolean {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message I8 {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message I16 {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message I32 {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message I64 {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message FP32 {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message FP64 {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message String {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message Binary {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message Timestamp {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message Date {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message Time {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message TimestampTZ {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message IntervalYear {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message IntervalDay {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

message UUID {
uint32 type_variation_reference = 1;
repeated Parameter type_variation_parameters = 3;
Nullability nullability = 2;
}

// Start compound types.
message FixedChar {
int32 length = 1;
uint32 type_variation_reference = 2;
repeated Parameter type_variation_parameters = 4;
Nullability nullability = 3;
}

message VarChar {
int32 length = 1;
uint32 type_variation_reference = 2;
repeated Parameter type_variation_parameters = 4;
Nullability nullability = 3;
}

message FixedBinary {
int32 length = 1;
uint32 type_variation_reference = 2;
repeated Parameter type_variation_parameters = 4;
Nullability nullability = 3;
}

message Decimal {
int32 scale = 1;
int32 precision = 2;
uint32 type_variation_reference = 3;
repeated Parameter type_variation_parameters = 5;
Nullability nullability = 4;
}

message Struct {
repeated Type types = 1;
uint32 type_variation_reference = 2;
repeated Parameter type_variation_parameters = 4;
Nullability nullability = 3;
}

message List {
Type type = 1;
uint32 type_variation_reference = 2;
repeated Parameter type_variation_parameters = 4;
Nullability nullability = 3;
}

message Map {
Type key = 1;
Type value = 2;
uint32 type_variation_reference = 3;
repeated Parameter type_variation_parameters = 5;
Nullability nullability = 4;
}

message UserDefined {
uint32 type_reference = 1;
uint32 type_variation_reference = 2;
repeated Parameter type_variation_parameters = 4;
Nullability nullability = 3;
repeated Parameter type_parameters = 5;
}

message Parameter {
oneof parameter {
// Explicitly null/unspecified parameter, to select the default value (if
// any).
google.protobuf.Empty null = 1;

// Data type parameters, like the i32 in LIST<i32>.
Type data_type = 2;

// Value parameters, like the 10 in VARCHAR<10>.
bool boolean = 3;
int64 integer = 4;
string enum = 5;
string string = 6;
}
}
}

Expand Down
77 changes: 77 additions & 0 deletions site/docs/types/type_classes.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,80 @@ A YAML example of an extension type is below:
This declares a new type (namespaced to the associated YAML file) called "point". This type is composed of two `i32` values named longitude and latitude. Once a type has been declared, it can be used in function declarations. [TBD: should field references be allowed to dereference the components of a user defined type?]

Literals for user-defined types are represented using protobuf [Any](https://developers.google.com/protocol-buffers/docs/proto3#any) messages.

### Parameterization

User-defined types may be parameterized, in the same way in which the built-in compound types are parameterizable. The supported "meta-types" for parameters are data types, booleans, integers, enumerations, and strings. Using parameters, we could redefine "point" with different types of coordinates. For example:

```yaml
name: point
parameters:
- name: T
description: |
The type used for the longitude and latitude
components of the point.
type: type
```

or:

```yaml
name: point
parameters:
- name: coordinate_type
type: enum
options:
- integer
- double
```

or:

```yaml
name: point
parameters:
- name: LONG
type: type
- name: LAT
type: type
```

We can't specify the internal structure in this case, because there is currently no support for derived types in the structure.

The allowed range can be limited for integer parameters. For example:

```yaml
name: vector
parameters:
- name: T
type: type
- name: dimensions
type: integer
min: 2
max: 3
```

This specifies a vector that can be either 2- or 3-dimensional.

Similar to function arguments, the last parameter may be specified to be variadic, allowing it to be specified one or more times instead of only once. For example:

```yaml
name: union
parameters:
- name: T
type: type
variadic: true
```

This defines a type that can be parameterized with one or more other data types, for example `union<i32, i64>` but also `union<bool>`. Zero or more is also possible, by making the last argument optional:

```yaml
name: tuple
parameters:
- name: T
type: type
optional: true
variadic: true
```

This would also allow for `tuple<>`, to define a zero-tuple.
1 change: 1 addition & 0 deletions site/docs/types/type_variations.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ All variations except the "system-preferred" variation (a.k.a. `[0]`, see [Type
| Name | The name used to reference this type. Should be unique within type variations for this parent type within a simple extension. |
| Description | A human description of the purpose of this type variation. |
| Function Behavior | **INHERITS** or **SEPARATE**: whether functions that support the system-preferred variation implicitly also support this variation, or whether functions should be resolved independently. For example, if one has the function `add(i8,i8)` defined and then defines an `i8` variation, this determines whether the `i8` variation can be bound to the base `add` operation (inherits) or whether a specialized version of `add` needs to be defined specifically for this variation (separate). Defaults to inherits. |
| Parameterization | Type variations can be parameterized. For example, an implementation may support storing `timestamp_tz` using any timezone, in which case it might not be convenient to create a variation for every possible timezone. Parameterizations for type variations work the same as parameterizations for [compound user-defined types](type_classes.md#parameterization). |
73 changes: 71 additions & 2 deletions text/simple_extensions_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ properties:
type: object
additionalProperties:
$ref: "#/$defs/type"
parameters: # parameter list for compound types
$ref: "#/$defs/type_param_defs"
variadic: # when set, last parameter may be specified one or more times
type: boolean
type_variations:
type: array
minItems: 1
Expand All @@ -25,14 +29,18 @@ properties:
required: [parent, name]
properties:
parent:
type: string
$ref: "#/$defs/type"
name:
type: string
description:
type: string
functions:
type: string
enum: [INHERITS, SEPARATE]
parameters: # parameter list for compound type variations
$ref: "#/$defs/type_param_defs"
variadic: # when set, last parameter may be specified one or more times
type: boolean
scalar_functions:
type: array
items:
Expand All @@ -45,8 +53,69 @@ properties:
$defs:
type:
oneOf:
- type: string
- type: string # shorthand form for when only name is needed
- type: object
properties:
name: # name: a Substrait type name, or name of a type previously defined in this extension
type: string
nullable: # set to true to make the type nullable
type: boolean
variation: # type variation, if any
$ref: "#/$defs/variation"
parameters: # parameters for compound types
$ref: "#/$defs/type_param_values"
variation:
oneOf:
- type: string # shorthand form for when only name is needed
- type: object
properties:
name: # name of a type variation previously defined in this extension
type: string
parameters: # parameters for compound type variations
$ref: "#/$defs/type_param_values"
type_param_defs: # an array of compound type (variation) parameter definitions
type: array
items:
type: object
required: [type]
properties:
name: # name of the parameter (for documentation only)
type: string
description: # description (for documentation only)
type: string
type: # expected metatype for the parameter
type: string
enum:
- type
- boolean
- integer
- enumeration
- string
min: # for integers, the minimum supported value (inclusive)
type: number
max: # for integers, the maximum supported value (inclusive)
type: number
options: # for enums, the list of supported values
type: array
minItems: 1
uniqueItems: true
items:
type: string
optional: # when set to true, the parameter may be omitted at the end or skipped using null
type: boolean
type_param_values: # an array of compound type (variation) parameter definitions
type: array
items:
oneOf:
- type: "null" # use to skip optional parameters
- type: boolean # for boolean parameters
- type: number # for integer parameters
- type: string # for string and enum parameters
- type: object # for data type parameters
required: [ type ]
properties:
type:
$ref: "#/$defs/type"
arguments: # an array of arguments
type: array
items:
Expand Down

0 comments on commit df1df3c

Please sign in to comment.