Introduction#
GitHub this note shows basic concepts and applications of SageMaker Feature Store
- Feature group
- Ingest dataframe into a feature group
- Feature group, Glue Catalog and Athena Query
- AmazonSageMakerFeatureStoreAccess policy
Create a Feature Group#
from sagemaker.feature_store.feature_group import FeatureGroupcustomers_feature_group = FeatureGroup(name=customers_feature_group_name,sagemaker_session=sagemaker_session)orders_feature_group = FeatureGroup(name=orders_feature_group_name,sagemaker_session=sagemaker_session)
Create definition for feature groups
customers_feature_group.load_feature_definitions(data_frame=customer_data)orders_feature_group.load_feature_definitions(data_frame=orders_data)
Ingest dataframe into feature groups
customers_feature_group.create(s3_uri=f"s3://{s3_bucket_name}/{prefix}",record_identifier_name=record_identifier_feature_name,event_time_feature_name="EventTime",role_arn=role,enable_online_store=True,)orders_feature_group.create(s3_uri=f"s3://{s3_bucket_name}/{prefix}",record_identifier_name=record_identifier_feature_name,event_time_feature_name="EventTime",role_arn=role,enable_online_store=True,)
Add Metadata
from sagemaker.feature_store.inputs import FeatureParametercustomers_feature_group.update_feature_metadata(feature_name="customer_id",description="The ID of a customer. It is also used in orders_feature_group.",parameter_additions=[FeatureParameter("idType", "primaryKey")],)customers_feature_group.describe_feature_metadata(feature_name="customer_id")
Feature Store API#
Case 1) use Feature Store API to list existing feature groups
from sagemaker.feature_store.feature_store import FeatureStorefrom sagemaker.feature_store.inputs import Identifierfeature_store = FeatureStore(sagemaker_session=sagemaker_session)feature_store.list_feature_groups()
Case 2) Get Batch Records
feature_store.batch_get_record(identifiers=[Identifier(feature_group_name="customers-feature-group-28-03-41-44",record_identifiers_value_as_string= ["573291", "109382", "828400", "124013"])])
Case 3) Athena Query
from sagemaker.feature_store.feature_group import AthenaQuerycustomer_query = AthenaQuery(sagemaker_session=sagemaker_session,catalog="AwsDataCatalog",database="sagemaker_featurestore",table_name="customers_feature_group_28_03_41_44_1685245305")customer_query.run('select * from customers_feature_group_28_03_41_44_1685245305',output_location=f"s3://{s3_bucket_name}/notebook-athena-result/")customer_df = customer_query.as_dataframe()customer_df.head(10)