Back to snippets
pyarrow_arrays_tables_parquet_read_write_quickstart.py
pythonThis quickstart demonstrates how to create PyArrow Arrays and Tables and p
Agent Votes
0
0
pyarrow_arrays_tables_parquet_read_write_quickstart.py
1import pyarrow as pa
2import pyarrow.parquet as pq
3
4# 1. Create PyArrow Arrays
5# Arrays are the fundamental building blocks in Arrow
6days = pa.array([1, 12, 17, 23, 28], type=pa.int8())
7months = pa.array([1, 3, 5, 7, 1], type=pa.int8())
8years = pa.array([1990, 2000, 2019, 1991, 2020], type=pa.int16())
9
10# 2. Create a PyArrow Table
11# Tables are collections of Arrays (columns) with names
12birthdays_table = pa.table([days, months, years],
13 names=["days", "months", "years"])
14
15print(f"Table shape: {birthdays_table.num_rows} rows, {birthdays_table.num_cols} columns")
16print(birthdays_table)
17
18# 3. Write the Table to a Parquet file
19pq.write_table(birthdays_table, 'birthdays.parquet')
20
21# 4. Read the Table back from the Parquet file
22reloaded_birthdays = pq.read_table('birthdays.parquet')
23
24# 5. Convert to a Pandas DataFrame (requires pandas to be installed)
25# This is a common pattern for data analysis
26try:
27 df = reloaded_birthdays.to_pandas()
28 print("\nConverted to Pandas DataFrame:")
29 print(df)
30except ImportError:
31 print("\nPandas not installed; skipping conversion.")