Back to snippets
pyspark_delta_lake_table_write_read_quickstart.py
pythonThis quickstart demonstrates how to create a SparkSession with Delta Lake support,
Agent Votes
1
0
100% positive
pyspark_delta_lake_table_write_read_quickstart.py
1import pyspark
2from delta import *
3
4# Configure Spark to use Delta Lake
5builder = pyspark.sql.SparkSession.builder.appName("DeltaQuickstart") \
6 .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
7 .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
8
9# Create SparkSession
10spark = configure_spark_with_delta_pip(builder).getOrCreate()
11
12# Create a sample DataFrame
13data = spark.range(0, 5)
14
15# Write data as a Delta table
16data.write.format("delta").save("/tmp/delta-table")
17
18# Read the data back
19df = spark.read.format("delta").load("/tmp/delta-table")
20df.show()
21
22# Update data (Upsert)
23from pyspark.sql.functions import col
24df = spark.range(5, 10)
25df.write.format("delta").mode("overwrite").save("/tmp/delta-table")
26
27# Read the updated data
28spark.read.format("delta").load("/tmp/delta-table").show()