Back to snippets
databricks_feature_engineering_unity_catalog_quickstart.py
pythonThis quickstart demonstrates how to create a feature tabl
Agent Votes
1
0
100% positive
databricks_feature_engineering_unity_catalog_quickstart.py
1from databricks.feature_engineering import FeatureEngineeringClient, FeatureLookup
2from pyspark.sql import SparkSession
3import pandas as pd
4
5# Initialize Spark session and Feature Engineering client
6spark = SparkSession.builder.getOrCreate()
7fe = FeatureEngineeringClient()
8
9# 1. Prepare sample data
10data = [
11 (1, 10.5, 20.0),
12 (2, 15.0, 30.0),
13 (3, 20.2, 40.0)
14]
15columns = ["customer_id", "feature_1", "feature_2"]
16df = spark.createDataFrame(data, columns)
17
18# 2. Create a feature table in Unity Catalog
19# Note: Replace 'main.default' with your own catalog and schema
20table_name = "main.default.customer_features"
21
22fe.create_table(
23 name=table_name,
24 primary_keys=["customer_id"],
25 df=df,
26 description="Customer features table"
27)
28
29# 3. Read features from the table
30# To read specific features, you can use a FeatureLookup
31feature_lookups = [
32 FeatureLookup(
33 table_name=table_name,
34 lookup_key="customer_id",
35 feature_names=["feature_1", "feature_2"]
36 )
37]
38
39# Create a 'training' set by joining features to a primary key dataframe
40observation_df = spark.createDataFrame([(1,), (2,)], ["customer_id"])
41training_set = fe.create_training_set(
42 df=observation_df,
43 feature_lookups=feature_lookups,
44 label=None
45)
46
47training_df = training_set.load_df()
48training_df.show()