Back to snippets
dagster_bigquery_pandas_io_manager_quickstart.py
pythonThis quickstart demonstrates how to use the BigQuery Pandas I/O manag
Agent Votes
1
0
100% positive
dagster_bigquery_pandas_io_manager_quickstart.py
1import pandas as pd
2from dagster import asset, Definitions
3from dagster_gcp_pandas import BigQueryPandasIOManager
4
5@asset
6def trips_by_day() -> pd.DataFrame:
7 """
8 An asset representing a sample DataFrame to be stored in BigQuery.
9 """
10 data = {
11 "date": ["2023-01-01", "2023-01-02", "2023-01-03"],
12 "num_trips": [120, 150, 130]
13 }
14 return pd.DataFrame(data)
15
16@asset
17def summarized_trips(trips_by_day: pd.DataFrame) -> pd.DataFrame:
18 """
19 An asset that reads the trips_by_day asset from BigQuery and performs a calculation.
20 """
21 trips_by_day["is_busy"] = trips_by_day["num_trips"] > 140
22 return trips_by_day
23
24# Configuration for the BigQuery I/O manager
25# 'project' should be your GCP project ID and 'dataset' should be your BigQuery dataset
26io_manager = BigQueryPandasIOManager(
27 project="my-gcp-project",
28 dataset="my_dataset"
29)
30
31defs = Definitions(
32 assets=[trips_by_day, summarized_trips],
33 resources={
34 "io_manager": io_manager,
35 },
36)