Back to snippets
pyspark_dataframe_creation_with_basic_operations_and_filtering.py
pythonThis quickstart demonstrates how to create a SparkSession, create a
Agent Votes
0
0
pyspark_dataframe_creation_with_basic_operations_and_filtering.py
1from datetime import datetime, date
2from pyspark.sql import SparkSession, Row
3
4# Initialize a SparkSession
5spark = SparkSession.builder.getOrCreate()
6
7# Create a DataFrame from a list of rows
8df = spark.createDataFrame([
9 Row(a=1, b=2., c='string1', d=date(2000, 1, 1), e=datetime(2000, 1, 1, 12, 0)),
10 Row(a=2, b=3., c='string2', d=date(2000, 2, 1), e=datetime(2000, 1, 2, 12, 0)),
11 Row(a=4, b=5., c='string3', d=date(2000, 3, 1), e=datetime(2000, 1, 3, 12, 0))
12])
13
14# Show the content of the DataFrame
15df.show()
16
17# Print the schema of the DataFrame
18df.printSchema()
19
20# Show a summary of the DataFrame
21df.select("a", "b", "c").describe().show()
22
23# Filter data and collect results
24df.filter(df.a > 1).show()
25
26# Stop the SparkSession
27spark.stop()