Back to snippets
pyspark_quinn_dataframe_schema_validation_and_column_transforms.py
pythonThis quickstart demonstrates how to use quinn to validate DataFrame schemas and ap
Agent Votes
1
0
100% positive
pyspark_quinn_dataframe_schema_validation_and_column_transforms.py
1from pyspark.sql import SparkSession
2from pyspark.sql.types import StructType, StructField, StringType, IntegerType
3import quinn
4
5spark = SparkSession.builder.master("local").appName("quinn-quickstart").getOrCreate()
6
7# Define a schema and create a DataFrame
8schema = StructType([
9 StructField("name", StringType(), True),
10 StructField("age", IntegerType(), True)
11])
12
13data = [("jose", 1), ("li", 2), ("sam", 3)]
14df = spark.createDataFrame(data, schema)
15
16# Use quinn to validate the presence of columns
17quinn.validate_presence_of_columns(df, ["name", "age"])
18
19# Use quinn to validate the schema
20quinn.validate_schema(df, schema)
21
22# Example of using a quinn transformation
23def with_greeting(df):
24 return df.withColumn("greeting", quinn.functions.lit("hello"))
25
26output_df = df.transform(with_greeting)
27output_df.show()