|
| 1 | +from datetime import timedelta |
| 2 | + |
| 3 | +from feast import Entity, Feature, FeatureView, RedshiftSource, ValueType |
| 4 | + |
| 5 | +# Define an entity for the driver. Entities can be thought of as primary keys used to |
| 6 | +# retrieve features. Entities are also used to join multiple tables/views during the |
| 7 | +# construction of feature vectors |
| 8 | +driver = Entity( |
| 9 | + # Name of the entity. Must be unique within a project |
| 10 | + name="driver_id", |
| 11 | + # The join key of an entity describes the storage level field/column on which |
| 12 | + # features can be looked up. The join key is also used to join feature |
| 13 | + # tables/views when building feature vectors |
| 14 | + join_key="driver_id", |
| 15 | + # The storage level type for an entity |
| 16 | + value_type=ValueType.INT64, |
| 17 | +) |
| 18 | + |
| 19 | +# Indicates a data source from which feature values can be retrieved. Sources are queried when building training |
| 20 | +# datasets or materializing features into an online store. |
| 21 | +driver_stats_source = RedshiftSource( |
| 22 | + # The Redshift table where features can be found |
| 23 | + table="feast_driver_hourly_stats", |
| 24 | + # The event timestamp is used for point-in-time joins and for ensuring only |
| 25 | + # features within the TTL are returned |
| 26 | + event_timestamp_column="event_timestamp", |
| 27 | + # The (optional) created timestamp is used to ensure there are no duplicate |
| 28 | + # feature rows in the offline store or when building training datasets |
| 29 | + created_timestamp_column="created", |
| 30 | +) |
| 31 | + |
| 32 | +# Feature views are a grouping based on how features are stored in either the |
| 33 | +# online or offline store. |
| 34 | +driver_stats_fv = FeatureView( |
| 35 | + # The unique name of this feature view. Two feature views in a single |
| 36 | + # project cannot have the same name |
| 37 | + name="driver_hourly_stats", |
| 38 | + # The list of entities specifies the keys required for joining or looking |
| 39 | + # up features from this feature view. The reference provided in this field |
| 40 | + # correspond to the name of a defined entity (or entities) |
| 41 | + entities=["driver_id"], |
| 42 | + # The timedelta is the maximum age that each feature value may have |
| 43 | + # relative to its lookup time. For historical features (used in training), |
| 44 | + # TTL is relative to each timestamp provided in the entity dataframe. |
| 45 | + # TTL also allows for eviction of keys from online stores and limits the |
| 46 | + # amount of historical scanning required for historical feature values |
| 47 | + # during retrieval |
| 48 | + ttl=timedelta(weeks=52), |
| 49 | + # The list of features defined below act as a schema to both define features |
| 50 | + # for both materialization of features into a store, and are used as references |
| 51 | + # during retrieval for building a training dataset or serving features |
| 52 | + features=[ |
| 53 | + Feature(name="conv_rate", dtype=ValueType.FLOAT), |
| 54 | + Feature(name="acc_rate", dtype=ValueType.FLOAT), |
| 55 | + Feature(name="avg_daily_trips", dtype=ValueType.INT64), |
| 56 | + ], |
| 57 | + # Batch sources are used to find feature values. In the case of this feature |
| 58 | + # view we will query a source table on Redshift for driver statistics |
| 59 | + # features |
| 60 | + batch_source=driver_stats_source, |
| 61 | + # Tags are user defined key/value pairs that are attached to each |
| 62 | + # feature view |
| 63 | + tags={"team": "driver_performance"}, |
| 64 | +) |
0 commit comments