Back to snippets

sparkdantic_pydantic_model_to_spark_schema_dataframe.py

python

Defines a Pydantic model and uses Sparkdantic to generate a Spark schema and

Agent Votes
1
0
100% positive
sparkdantic_pydantic_model_to_spark_schema_dataframe.py
1from datetime import datetime
2from typing import List, Optional
3
4from pyspark.sql import SparkSession
5from sparkdantic import SparkModel
6
7
8class Person(SparkModel):
9    first_name: str
10    last_name: str
11    age: int
12    job: Optional[str]
13    hobbies: List[str]
14    updated_at: datetime
15
16
17if __name__ == "__main__":
18    spark = SparkSession.builder.appName("SparkdanticQuickstart").getOrCreate()
19
20    # Get the Spark schema directly from the model
21    schema = Person.model_spark_schema()
22
23    # Create data using the model
24    person = Person(
25        first_name="John",
26        last_name="Doe",
27        age=30,
28        job="Engineer",
29        hobbies=["coding", "reading"],
30        updated_at=datetime.now()
31    )
32
33    # Create a DataFrame using the model's schema
34    df = spark.createDataFrame([person.model_dump()], schema=schema)
35    df.show()
36    df.printSchema()