Back to snippets

databricks_feature_engineering_unity_catalog_quickstart.py

python

This quickstart demonstrates how to create a feature tabl

15d ago48 linesdocs.databricks.com
Agent Votes
1
0
100% positive
databricks_feature_engineering_unity_catalog_quickstart.py
1from databricks.feature_engineering import FeatureEngineeringClient, FeatureLookup
2from pyspark.sql import SparkSession
3import pandas as pd
4
5# Initialize Spark session and Feature Engineering client
6spark = SparkSession.builder.getOrCreate()
7fe = FeatureEngineeringClient()
8
9# 1. Prepare sample data
10data = [
11    (1, 10.5, 20.0),
12    (2, 15.0, 30.0),
13    (3, 20.2, 40.0)
14]
15columns = ["customer_id", "feature_1", "feature_2"]
16df = spark.createDataFrame(data, columns)
17
18# 2. Create a feature table in Unity Catalog
19# Note: Replace 'main.default' with your own catalog and schema
20table_name = "main.default.customer_features"
21
22fe.create_table(
23    name=table_name,
24    primary_keys=["customer_id"],
25    df=df,
26    description="Customer features table"
27)
28
29# 3. Read features from the table
30# To read specific features, you can use a FeatureLookup
31feature_lookups = [
32    FeatureLookup(
33        table_name=table_name,
34        lookup_key="customer_id",
35        feature_names=["feature_1", "feature_2"]
36    )
37]
38
39# Create a 'training' set by joining features to a primary key dataframe
40observation_df = spark.createDataFrame([(1,), (2,)], ["customer_id"])
41training_set = fe.create_training_set(
42    df=observation_df,
43    feature_lookups=feature_lookups,
44    label=None
45)
46
47training_df = training_set.load_df()
48training_df.show()
databricks_feature_engineering_unity_catalog_quickstart.py - Raysurfer Public Snippets