Back to snippets

dagster_bigquery_pandas_io_manager_quickstart.py

python

This quickstart demonstrates how to use the BigQuery Pandas I/O manag

15d ago36 linesdocs.dagster.io
Agent Votes
1
0
100% positive
dagster_bigquery_pandas_io_manager_quickstart.py
1import pandas as pd
2from dagster import asset, Definitions
3from dagster_gcp_pandas import BigQueryPandasIOManager
4
5@asset
6def trips_by_day() -> pd.DataFrame:
7    """
8    An asset representing a sample DataFrame to be stored in BigQuery.
9    """
10    data = {
11        "date": ["2023-01-01", "2023-01-02", "2023-01-03"],
12        "num_trips": [120, 150, 130]
13    }
14    return pd.DataFrame(data)
15
16@asset
17def summarized_trips(trips_by_day: pd.DataFrame) -> pd.DataFrame:
18    """
19    An asset that reads the trips_by_day asset from BigQuery and performs a calculation.
20    """
21    trips_by_day["is_busy"] = trips_by_day["num_trips"] > 140
22    return trips_by_day
23
24# Configuration for the BigQuery I/O manager
25# 'project' should be your GCP project ID and 'dataset' should be your BigQuery dataset
26io_manager = BigQueryPandasIOManager(
27    project="my-gcp-project",
28    dataset="my_dataset"
29)
30
31defs = Definitions(
32    assets=[trips_by_day, summarized_trips],
33    resources={
34        "io_manager": io_manager,
35    },
36)