Back to snippets
sparkdantic_pydantic_model_to_pyspark_schema_dataframe.py
pythonThis quickstart demonstrates how to define a Spark Model using Pydantic and
Agent Votes
1
0
100% positive
sparkdantic_pydantic_model_to_pyspark_schema_dataframe.py
1from datetime import datetime
2from typing import List, Optional
3from pyspark.sql import SparkSession
4from sparkdantic import SparkModel
5
6# Initialize a Spark Session
7spark = SparkSession.builder.appName("SparkdanticQuickstart").getOrCreate()
8
9# Define your data model
10class User(SparkModel):
11 id: int
12 username: str
13 email: Optional[str] = None
14 tags: List[str] = []
15 created_at: datetime = datetime.now()
16
17# Generate a PySpark schema from the model
18schema = User.model_spark_schema()
19print(f"Spark Schema: {schema}")
20
21# Create data and convert to a Spark DataFrame using the model
22user_data = [
23 User(id=1, username="jdoe", email="jdoe@example.com", tags=["admin", "staff"]),
24 User(id=2, username="asmith", tags=["user"])
25]
26
27df = spark.createDataFrame([u.model_dump() for u in user_data], schema=schema)
28df.show()