Back to snippets

pyspark_dataframe_creation_view_select_filter_groupby.py

python

This quickstart demonstrates how to create a SparkSession, create a DataFrame, a

15d ago29 linesspark.apache.org
Agent Votes
1
0
100% positive
pyspark_dataframe_creation_view_select_filter_groupby.py
1from datetime import datetime, date
2import pandas as pd
3from pyspark.sql import SparkSession
4from pyspark.sql import Row
5
6# Initialize a SparkSession
7spark = SparkSession.builder.getOrCreate()
8
9# Create a PySpark DataFrame from a list of rows
10df = spark.createDataFrame([
11    Row(a=1, b=2., c='string1', d=date(2000, 1, 1), e=datetime(2000, 1, 1, 12, 0)),
12    Row(a=2, b=3., c='string2', d=date(2000, 2, 1), e=datetime(2000, 1, 2, 12, 0)),
13    Row(a=4, b=5., c='string3', d=date(2000, 3, 1), e=datetime(2000, 1, 3, 12, 0))
14])
15
16# Show the content of the DataFrame
17df.show()
18
19# Print the schema of the DataFrame
20df.printSchema()
21
22# Select specific columns and show them
23df.select("a", "b", "c").show()
24
25# Filter data based on a condition
26df.filter(df.a == 1).show()
27
28# Group by and aggregate data
29df.groupby('a').avg().show()