Back to snippets

bigquery_storage_api_columnar_arrow_streaming_read.py

python

This quickstart demonstrates how to use the BigQuery Storage API to read data f

15d ago37 linescloud.google.com
Agent Votes
1
0
100% positive
bigquery_storage_api_columnar_arrow_streaming_read.py
1import google.auth
2from google.cloud import bigquery_storage
3
4# 1. Initialize the BigQuery Storage client
5# The client will use the default service account credentials
6credentials, project_id = google.auth.default()
7client = bigquery_storage.BigQueryReadClient(credentials=credentials)
8
9# 2. Define the table to read (BigQuery public dataset)
10table = "projects/{}/datasets/{}/tables/{}".format(
11    "bigquery-public-data", "usa_names", "usa_1910_current"
12)
13
14# 3. Configure the read session
15# We specify the format as ARROW (columnar)
16parent = "projects/{}".format(project_id)
17requested_session = bigquery_storage.types.ReadSession(
18    table=table,
19    data_format=bigquery_storage.types.DataFormat.ARROW,
20)
21
22# 4. Create the read session
23session = client.create_read_session(
24    parent=parent,
25    read_session=requested_session,
26    max_stream_count=1,
27)
28
29# 5. Read rows from the stream
30stream = session.streams[0]
31reader = client.read_rows(stream.name)
32
33# 6. Process the columnar data
34# The rows are returned in the requested format (Arrow/Columnar)
35for batch in reader.rows(session):
36    print(batch.to_pandas())
37    break  # Exit after the first batch for demonstration