11
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
# See the License for the specific language governing permissions and
13
13
# limitations under the License.
14
+ import asyncio
14
15
import itertools
15
16
import os
16
17
import warnings
33
34
import pandas as pd
34
35
import pyarrow as pa
35
36
from colorama import Fore , Style
37
+ from fastapi .concurrency import run_in_threadpool
36
38
from google .protobuf .timestamp_pb2 import Timestamp
37
39
from tqdm import tqdm
38
40
@@ -1423,26 +1425,13 @@ def tqdm_builder(length):
1423
1425
end_date ,
1424
1426
)
1425
1427
1426
- def push (
1427
- self ,
1428
- push_source_name : str ,
1429
- df : pd .DataFrame ,
1430
- allow_registry_cache : bool = True ,
1431
- to : PushMode = PushMode .ONLINE ,
1432
- ):
1433
- """
1434
- Push features to a push source. This updates all the feature views that have the push source as stream source.
1435
-
1436
- Args:
1437
- push_source_name: The name of the push source we want to push data to.
1438
- df: The data being pushed.
1439
- allow_registry_cache: Whether to allow cached versions of the registry.
1440
- to: Whether to push to online or offline store. Defaults to online store only.
1441
- """
1428
+ def _fvs_for_push_source_or_raise (
1429
+ self , push_source_name : str , allow_cache : bool
1430
+ ) -> set [FeatureView ]:
1442
1431
from feast .data_source import PushSource
1443
1432
1444
- all_fvs = self .list_feature_views (allow_cache = allow_registry_cache )
1445
- all_fvs += self .list_stream_feature_views (allow_cache = allow_registry_cache )
1433
+ all_fvs = self .list_feature_views (allow_cache = allow_cache )
1434
+ all_fvs += self .list_stream_feature_views (allow_cache = allow_cache )
1446
1435
1447
1436
fvs_with_push_sources = {
1448
1437
fv
@@ -1457,7 +1446,27 @@ def push(
1457
1446
if not fvs_with_push_sources :
1458
1447
raise PushSourceNotFoundException (push_source_name )
1459
1448
1460
- for fv in fvs_with_push_sources :
1449
+ return fvs_with_push_sources
1450
+
1451
+ def push (
1452
+ self ,
1453
+ push_source_name : str ,
1454
+ df : pd .DataFrame ,
1455
+ allow_registry_cache : bool = True ,
1456
+ to : PushMode = PushMode .ONLINE ,
1457
+ ):
1458
+ """
1459
+ Push features to a push source. This updates all the feature views that have the push source as stream source.
1460
+
1461
+ Args:
1462
+ push_source_name: The name of the push source we want to push data to.
1463
+ df: The data being pushed.
1464
+ allow_registry_cache: Whether to allow cached versions of the registry.
1465
+ to: Whether to push to online or offline store. Defaults to online store only.
1466
+ """
1467
+ for fv in self ._fvs_for_push_source_or_raise (
1468
+ push_source_name , allow_registry_cache
1469
+ ):
1461
1470
if to == PushMode .ONLINE or to == PushMode .ONLINE_AND_OFFLINE :
1462
1471
self .write_to_online_store (
1463
1472
fv .name , df , allow_registry_cache = allow_registry_cache
@@ -1467,22 +1476,42 @@ def push(
1467
1476
fv .name , df , allow_registry_cache = allow_registry_cache
1468
1477
)
1469
1478
1470
- def write_to_online_store (
1479
+ async def push_async (
1480
+ self ,
1481
+ push_source_name : str ,
1482
+ df : pd .DataFrame ,
1483
+ allow_registry_cache : bool = True ,
1484
+ to : PushMode = PushMode .ONLINE ,
1485
+ ):
1486
+ fvs = self ._fvs_for_push_source_or_raise (push_source_name , allow_registry_cache )
1487
+
1488
+ if to == PushMode .ONLINE or to == PushMode .ONLINE_AND_OFFLINE :
1489
+ _ = await asyncio .gather (
1490
+ * [
1491
+ self .write_to_online_store_async (
1492
+ fv .name , df , allow_registry_cache = allow_registry_cache
1493
+ )
1494
+ for fv in fvs
1495
+ ]
1496
+ )
1497
+
1498
+ if to == PushMode .OFFLINE or to == PushMode .ONLINE_AND_OFFLINE :
1499
+
1500
+ def _offline_write ():
1501
+ for fv in fvs :
1502
+ self .write_to_offline_store (
1503
+ fv .name , df , allow_registry_cache = allow_registry_cache
1504
+ )
1505
+
1506
+ await run_in_threadpool (_offline_write )
1507
+
1508
+ def _get_feature_view_and_df_for_online_write (
1471
1509
self ,
1472
1510
feature_view_name : str ,
1473
1511
df : Optional [pd .DataFrame ] = None ,
1474
1512
inputs : Optional [Union [Dict [str , List [Any ]], pd .DataFrame ]] = None ,
1475
1513
allow_registry_cache : bool = True ,
1476
1514
):
1477
- """
1478
- Persists a dataframe to the online store.
1479
-
1480
- Args:
1481
- feature_view_name: The feature view to which the dataframe corresponds.
1482
- df: The dataframe to be persisted.
1483
- inputs: Optional the dictionary object to be written
1484
- allow_registry_cache (optional): Whether to allow retrieving feature views from a cached registry.
1485
- """
1486
1515
feature_view_dict = {
1487
1516
fv_proto .name : fv_proto
1488
1517
for fv_proto in self .list_all_feature_views (allow_registry_cache )
@@ -1509,10 +1538,60 @@ def write_to_online_store(
1509
1538
df = pd .DataFrame (df )
1510
1539
except Exception as _ :
1511
1540
raise DataFrameSerializationError (df )
1541
+ return feature_view , df
1542
+
1543
+ def write_to_online_store (
1544
+ self ,
1545
+ feature_view_name : str ,
1546
+ df : Optional [pd .DataFrame ] = None ,
1547
+ inputs : Optional [Union [Dict [str , List [Any ]], pd .DataFrame ]] = None ,
1548
+ allow_registry_cache : bool = True ,
1549
+ ):
1550
+ """
1551
+ Persists a dataframe to the online store.
1512
1552
1553
+ Args:
1554
+ feature_view_name: The feature view to which the dataframe corresponds.
1555
+ df: The dataframe to be persisted.
1556
+ inputs: Optional the dictionary object to be written
1557
+ allow_registry_cache (optional): Whether to allow retrieving feature views from a cached registry.
1558
+ """
1559
+
1560
+ feature_view , df = self ._get_feature_view_and_df_for_online_write (
1561
+ feature_view_name = feature_view_name ,
1562
+ df = df ,
1563
+ inputs = inputs ,
1564
+ allow_registry_cache = allow_registry_cache ,
1565
+ )
1513
1566
provider = self ._get_provider ()
1514
1567
provider .ingest_df (feature_view , df )
1515
1568
1569
+ async def write_to_online_store_async (
1570
+ self ,
1571
+ feature_view_name : str ,
1572
+ df : Optional [pd .DataFrame ] = None ,
1573
+ inputs : Optional [Union [Dict [str , List [Any ]], pd .DataFrame ]] = None ,
1574
+ allow_registry_cache : bool = True ,
1575
+ ):
1576
+ """
1577
+ Persists a dataframe to the online store asynchronously.
1578
+
1579
+ Args:
1580
+ feature_view_name: The feature view to which the dataframe corresponds.
1581
+ df: The dataframe to be persisted.
1582
+ inputs: Optional the dictionary object to be written
1583
+ allow_registry_cache (optional): Whether to allow retrieving feature views from a cached registry.
1584
+ """
1585
+
1586
+ feature_view , df = self ._get_feature_view_and_df_for_online_write (
1587
+ feature_view_name = feature_view_name ,
1588
+ df = df ,
1589
+ inputs = inputs ,
1590
+ allow_registry_cache = allow_registry_cache ,
1591
+ )
1592
+ provider = self ._get_provider ()
1593
+ await provider .ingest_df_async (feature_view , df )
1594
+
1516
1595
def write_to_offline_store (
1517
1596
self ,
1518
1597
feature_view_name : str ,
0 commit comments