forked from feast-dev/feast
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDataSource.proto
142 lines (116 loc) · 4.78 KB
/
DataSource.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
//
// Copyright 2020 The Feast Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
syntax = "proto3";
package feast.core;
option go_package = "github.com/feast-dev/feast/sdk/go/protos/feast/core";
option java_outer_classname = "DataSourceProto";
option java_package = "feast.proto.core";
import "feast/core/DataFormat.proto";
// Defines a Data Source that can be used source Feature data
message DataSource {
// Field indexes should *not* be reused. Not sure if fields 6-10 were used previously or not,
// but they are going to be reserved for backwards compatibility.
reserved 6 to 10;
// Type of Data Source.
enum SourceType {
INVALID = 0;
BATCH_FILE = 1;
BATCH_BIGQUERY = 2;
STREAM_KAFKA = 3;
STREAM_KINESIS = 4;
BATCH_REDSHIFT = 5;
CUSTOM_SOURCE = 6;
}
SourceType type = 1;
// Defines mapping between fields in the sourced data
// and fields in parent FeatureTable.
map<string, string> field_mapping = 2;
// Must specify event timestamp column name
string event_timestamp_column = 3;
// (Optional) Specify partition column
// useful for file sources
string date_partition_column = 4;
// Must specify creation timestamp column name
string created_timestamp_column = 5;
// This is an internal field that is represents the python class for the data source object a proto object represents.
// This should be set by feast, and not by users.
string data_source_class_type = 17;
// Defines options for DataSource that sources features from a file
message FileOptions {
FileFormat file_format = 1;
// Target URL of file to retrieve and source features from.
// s3://path/to/file for AWS S3 storage
// gs://path/to/file for GCP GCS storage
// file:///path/to/file for local storage
string file_url = 2;
// override AWS S3 storage endpoint with custom S3 endpoint
string s3_endpoint_override = 3;
}
// Defines options for DataSource that sources features from a BigQuery Query
message BigQueryOptions {
// Full table reference in the form of [project:dataset.table]
string table_ref = 1;
// SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective
// entity columns
string query = 2;
}
// Defines options for DataSource that sources features from Kafka messages.
// Each message should be a Protobuf that can be decoded with the generated
// Java Protobuf class at the given class path
message KafkaOptions {
// Comma separated list of Kafka bootstrap servers. Used for feature tables without a defined source host[:port]]
string bootstrap_servers = 1;
// Kafka topic to collect feature data from.
string topic = 2;
// Defines the stream data format encoding feature/entity data in Kafka messages.
StreamFormat message_format = 3;
}
// Defines options for DataSource that sources features from Kinesis records.
// Each record should be a Protobuf that can be decoded with the generated
// Java Protobuf class at the given class path
message KinesisOptions {
// AWS region of the Kinesis stream
string region = 1;
// Name of the Kinesis stream to obtain feature data from.
string stream_name = 2;
// Defines the data format encoding the feature/entity data in Kinesis records.
// Kinesis Data Sources support Avro and Proto as data formats.
StreamFormat record_format = 3;
}
// Defines options for DataSource that sources features from a Redshift Query
message RedshiftOptions {
// Redshift table name
string table = 1;
// SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective
// entity columns
string query = 2;
}
// Defines configuration for custom third-party data sources.
message CustomSourceOptions {
// Serialized configuration information for the data source. The implementer of the custom data source is
// responsible for serializing and deserializing data from bytes
bytes configuration = 1;
}
// DataSource options.
oneof options {
FileOptions file_options = 11;
BigQueryOptions bigquery_options = 12;
KafkaOptions kafka_options = 13;
KinesisOptions kinesis_options = 14;
RedshiftOptions redshift_options = 15;
CustomSourceOptions custom_options = 16;
}
}