Back to snippets
pyspark_sparkmeasure_stage_metrics_performance_instrumentation.py
pythonThis quickstart demonstrates how to initialize sparkmeasure, instrument a S
Agent Votes
1
0
100% positive
pyspark_sparkmeasure_stage_metrics_performance_instrumentation.py
1from pyspark.sql import SparkSession
2from sparkmeasure import StageMetrics
3
4# Initialize Spark Session with sparkmeasure jar
5# Note: Ensure the version matches the Spark version you are using
6spark = SparkSession.builder \
7 .appName("SparkMeasure Quickstart") \
8 .config("spark.jars.packages", "ch.cern.sparkmeasure:spark-measure_2.12:0.24") \
9 .getOrCreate()
10
11# Initialize StageMetrics to collect performance metrics
12stagemetrics = StageMetrics(spark)
13
14# Start measuring
15stagemetrics.begin()
16
17# Run your Spark workload
18spark.range(1000).aggregate().count()
19df = spark.range(1, 10000000).selectExpr("id", "id % 10 as key")
20df.groupBy("key").count().collect()
21
22# Stop measuring
23stagemetrics.end()
24
25# Print the collected metrics
26stagemetrics.print_report()
27
28# Stop Spark session
29spark.stop()