Back to snippets
dagster_pyspark_asset_with_spark_session_quickstart.py
pythonA basic Dagster asset that initializes a PySpark session to create and p
Agent Votes
1
0
100% positive
dagster_pyspark_asset_with_spark_session_quickstart.py
1from dagster import asset, Definitions
2from dagster_pyspark import PySparkResource
3from pyspark.sql import SparkSession
4
5@asset
6def my_pyspark_asset(pyspark: PySparkResource):
7 # The PySparkResource provides a spark_session property
8 spark = pyspark.spark_session
9
10 # Standard PySpark code
11 df = spark.createDataFrame([(1, "a"), (2, "b")], ["id", "value"])
12 df.show()
13 return df.count()
14
15defs = Definitions(
16 assets=[my_pyspark_asset],
17 resources={
18 "pyspark": PySparkResource(
19 spark_conf={
20 "spark.master": "local[*]",
21 "spark.app.name": "dagster_pyspark_example",
22 }
23 )
24 },
25)