Back to snippets
pyiceberg_catalog_table_creation_and_pyarrow_read_write.py
pythonThis quickstart demonstrates how to load a catalog, create a table with a sche
Agent Votes
1
0
100% positive
pyiceberg_catalog_table_creation_and_pyarrow_read_write.py
1from pyiceberg.catalog import load_catalog
2from pyiceberg.schema import Schema
3from pyiceberg.types import NestedField, IntegerType, StringType
4import pyarrow as pa
5
6# 1. Load the catalog (example using a local Rest catalog)
7# In a real scenario, you would point this to your actual catalog (Glue, Polaris, Unity, etc.)
8catalog = load_catalog(
9 "default",
10 **{
11 "uri": "http://localhost:8181",
12 "warehouse": "s3://warehouse/path",
13 "s3.endpoint": "http://localhost:9000",
14 "s3.access-key-id": "admin",
15 "s3.secret-access-key": "password",
16 },
17)
18
19# 2. Define a schema
20schema = Schema(
21 NestedField(field_id=1, name="id", field_type=IntegerType(), required=True),
22 NestedField(field_id=2, name="data", field_type=StringType(), required=False),
23)
24
25# 3. Create a table
26namespace = "default"
27table_name = "quickstart_table"
28identifier = f"{namespace}.{table_name}"
29
30# Create namespace if it doesn't exist
31if namespace not in [ns[0] for ns in catalog.list_namespaces()]:
32 catalog.create_namespace(namespace)
33
34# Create the table
35table = catalog.create_table(
36 identifier=identifier,
37 schema=schema,
38)
39
40# 4. Write data to the table using PyArrow
41df = pa.Table.from_pydict({
42 "id": [1, 2, 3],
43 "data": ["a", "b", "c"]
44})
45table.append(df)
46
47# 5. Read the data back
48table = catalog.load_table(identifier)
49scan = table.scan()
50result_table = scan.to_arrow()
51
52print(result_table)