18
18
import datetime
19
19
from typing import Dict , List , Optional , Sequence , Tuple , Union
20
20
import uuid
21
+ from google .protobuf import timestamp_pb2
21
22
22
23
from google .auth import credentials as auth_credentials
23
24
from google .protobuf import field_mask_pb2
@@ -1575,6 +1576,7 @@ def write_feature_values(
1575
1576
],
1576
1577
"pd.DataFrame" , # type: ignore # noqa: F821 - skip check for undefined name 'pd'
1577
1578
],
1579
+ feature_time : Union [str , datetime .datetime ] = None ,
1578
1580
) -> "EntityType" : # noqa: F821
1579
1581
"""Streaming ingestion. Write feature values directly to Feature Store.
1580
1582
@@ -1584,7 +1586,8 @@ def write_feature_values(
1584
1586
featurestore_id="my_featurestore_id",
1585
1587
)
1586
1588
1587
- # writing feature values from a pandas DataFrame
1589
+ # writing feature values from a pandas DataFrame without feature timestamp column.
1590
+ # In this case, current timestamp will be applied to all data.
1588
1591
my_dataframe = pd.DataFrame(
1589
1592
data = [
1590
1593
{"entity_id": "movie_01", "average_rating": 4.9}
@@ -1597,7 +1600,40 @@ def write_feature_values(
1597
1600
instances=my_df
1598
1601
)
1599
1602
1600
- # writing feature values from a Python dict
1603
+ # writing feature values from a pandas DataFrame with feature timestamp column
1604
+ # Example of datetime creation.
1605
+ feature_time = datetime.datetime(year=2022, month=1, day=1, hour=11, minute=59, second=59)
1606
+ or
1607
+ feature_time_str = datetime.datetime.now().isoformat(sep=" ", timespec="milliseconds")
1608
+ feature_time = datetime.datetime.strptime(feature_time_str, "%Y-%m-%d %H:%M:%S.%f")
1609
+
1610
+ my_dataframe = pd.DataFrame(
1611
+ data = [
1612
+ {"entity_id": "movie_01", "average_rating": 4.9,
1613
+ "feature_timestamp": feature_time}
1614
+ ],
1615
+ columns=["entity_id", "average_rating", "feature_timestamp"],
1616
+ )
1617
+
1618
+ my_dataframe = my_df.set_index("entity_id")
1619
+ my_entity_type.write_feature_values(
1620
+ instances=my_df, feature_time="feature_timestamp"
1621
+ )
1622
+
1623
+ # writing feature values with a timestamp. The timestamp will be applied to the entire Dataframe.
1624
+ my_dataframe = pd.DataFrame(
1625
+ data = [
1626
+ {"entity_id": "movie_01", "average_rating": 4.9}
1627
+ ],
1628
+ columns=["entity_id", "average_rating"],
1629
+ )
1630
+ my_dataframe = my_df.set_index("entity_id")
1631
+ my_entity_type.write_feature_values(
1632
+ instances=my_df, feature_time=feature_time
1633
+ )
1634
+
1635
+ # writing feature values from a Python dict without timestamp column.
1636
+ # In this case, current timestamp will be applied to all data.
1601
1637
my_data_dict = {
1602
1638
"movie_02" : {"average_rating": 3.7}
1603
1639
}
@@ -1606,16 +1642,40 @@ def write_feature_values(
1606
1642
instances=my_data_dict
1607
1643
)
1608
1644
1645
+ # writing feature values from a Python dict with timestamp column
1646
+ my_data_dict = {
1647
+ "movie_02" : {"average_rating": 3.7, "feature_timestamp": timestmap}}
1648
+ }
1649
+
1650
+ my_entity_type.write_feature_values(
1651
+ instances=my_data_dict, feature_time="feature_timestamp"
1652
+ )
1653
+
1654
+ # writing feature values from a Python dict and apply the same Feature_Timestamp
1655
+ my_data_dict = {
1656
+ "movie_02" : {"average_rating": 3.7}
1657
+ }
1658
+
1659
+ my_entity_type.write_feature_values(
1660
+ instances=my_data_dict, feature_time=feature_time
1661
+ )
1662
+
1609
1663
# writing feature values from a list of WriteFeatureValuesPayload objects
1610
1664
payloads = [
1611
1665
gca_featurestore_online_service.WriteFeatureValuesPayload(
1612
1666
entity_id="movie_03",
1613
- feature_values=gca_featurestore_online_service.FeatureValue(
1614
- double_value=4.9
1615
- )
1667
+ feature_values={
1668
+ "average_rating": featurestore_online_service.FeatureValue(
1669
+ string_value="test",
1670
+ metadata=featurestore_online_service.FeatureValue.Metadata(
1671
+ generate_time=timestmap
1672
+ )
1673
+ }
1674
+ }
1616
1675
)
1617
1676
]
1618
-
1677
+ # when instance is WriteFeatureValuesPayload,
1678
+ # feature_time param of write_feature_values() is ignored.
1619
1679
my_entity_type.write_feature_values(
1620
1680
instances=payloads
1621
1681
)
@@ -1641,18 +1701,27 @@ def write_feature_values(
1641
1701
in the pandas Dataframe represents an entity, which has an entity ID
1642
1702
and its associated feature values. Currently, a single payload can be
1643
1703
written in a single request.
1704
+ feature_time Union[str, datetime.datetime]:
1705
+ Optional. Either column name in DataFrame or Dict which contains timestamp value,
1706
+ or datetime to apply to the entire DataFrame or Dict.
1707
+ Timestamp will be applied to generate_timestmap in all FeatureValue.
1708
+ If not provided, curreent timestamp is used. This param is not used
1709
+ when instances is List[WriteFeatureValuesPayload].
1644
1710
1645
1711
Returns:
1646
1712
EntityType - The updated EntityType object.
1647
1713
"""
1648
-
1649
1714
if isinstance (instances , Dict ):
1650
- payloads = self ._generate_payloads (instances = instances )
1715
+ payloads = self ._generate_payloads (
1716
+ instances = instances , feature_time = feature_time
1717
+ )
1651
1718
elif isinstance (instances , List ):
1652
1719
payloads = instances
1653
1720
else :
1654
1721
instances_dict = instances .to_dict (orient = "index" )
1655
- payloads = self ._generate_payloads (instances = instances_dict )
1722
+ payloads = self ._generate_payloads (
1723
+ instances = instances_dict , feature_time = feature_time
1724
+ )
1656
1725
1657
1726
_LOGGER .log_action_start_against_resource (
1658
1727
"Writing" ,
@@ -1688,6 +1757,7 @@ def _generate_payloads(
1688
1757
],
1689
1758
],
1690
1759
],
1760
+ feature_time : Union [str , datetime .datetime ] = None ,
1691
1761
) -> List [gca_featurestore_online_service .WriteFeatureValuesPayload ]:
1692
1762
"""Helper method used to generate GAPIC WriteFeatureValuesPayloads from
1693
1763
a Python dict.
@@ -1696,18 +1766,39 @@ def _generate_payloads(
1696
1766
instances (Dict[str, Dict[str, Union[int, str, float, bool, bytes,
1697
1767
List[int], List[str], List[float], List[bool]]]]):
1698
1768
Required. Dict mapping entity IDs to their corresponding features.
1699
-
1769
+ feature_time Union[str, datetime.datetime]:
1770
+ Optional. Either string representing column name which stores
1771
+ feature timestamp, or timestamp to apply to entire DataFrame or
1772
+ Dict.
1700
1773
Returns:
1701
1774
List[gca_featurestore_online_service.WriteFeatureValuesPayload] -
1702
1775
A list of WriteFeatureValuesPayload objects ready to be written to the Feature Store.
1703
1776
"""
1704
1777
payloads = []
1778
+ timestamp_to_all_field = None
1779
+ if feature_time and cls ._is_timestamp (feature_time ):
1780
+ # timestamp_to_all_field will be applied to all FeatureValues.
1781
+ timestamp_to_all_field = feature_time
1782
+
1705
1783
for entity_id , features in instances .items ():
1706
1784
feature_values = {}
1707
1785
for feature_id , value in features .items ():
1786
+ if feature_id == feature_time :
1787
+ continue
1708
1788
feature_value = cls ._convert_value_to_gapic_feature_value (
1709
1789
feature_id = feature_id , value = value
1710
1790
)
1791
+ # Create a FeatureValue Metadata with generate_time if
1792
+ # valid feature_time param is provided.
1793
+ timestamp = cls ._apply_feature_timestamp (
1794
+ cls , features , timestamp_to_all_field , feature_time
1795
+ )
1796
+ if timestamp :
1797
+ feature_value .metadata = (
1798
+ gca_featurestore_online_service .FeatureValue .Metadata (
1799
+ generate_time = timestamp
1800
+ )
1801
+ )
1711
1802
feature_values [feature_id ] = feature_value
1712
1803
payload = gca_featurestore_online_service .WriteFeatureValuesPayload (
1713
1804
entity_id = entity_id , feature_values = feature_values
@@ -1716,6 +1807,43 @@ def _generate_payloads(
1716
1807
1717
1808
return payloads
1718
1809
1810
+ @staticmethod
1811
+ def _apply_feature_timestamp (
1812
+ cls ,
1813
+ features : Union [
1814
+ int ,
1815
+ str ,
1816
+ float ,
1817
+ bool ,
1818
+ bytes ,
1819
+ List [int ],
1820
+ List [str ],
1821
+ List [float ],
1822
+ List [bool ],
1823
+ ],
1824
+ timestamp_to_all_field : datetime .datetime = None ,
1825
+ feature_time : str = None ,
1826
+ ) -> Union [datetime .datetime , timestamp_pb2 .Timestamp ]:
1827
+ if feature_time is None :
1828
+ return None
1829
+ if timestamp_to_all_field :
1830
+ return timestamp_to_all_field
1831
+
1832
+ # Return a timestamp in Dict or Dataframe if it is valid.
1833
+ if feature_time in features .keys () and cls ._is_timestamp (
1834
+ features [feature_time ]
1835
+ ):
1836
+ return features [feature_time ]
1837
+ return None
1838
+
1839
+ @staticmethod
1840
+ def _is_timestamp (
1841
+ timestamp : Union [datetime .datetime , timestamp_pb2 .Timestamp ]
1842
+ ) -> bool :
1843
+ return isinstance (timestamp , datetime .datetime ) or isinstance (
1844
+ timestamp , timestamp_pb2 .Timestamp
1845
+ )
1846
+
1719
1847
@classmethod
1720
1848
def _convert_value_to_gapic_feature_value (
1721
1849
cls ,
0 commit comments