Back to snippets
dask_local_cluster_dataframe_lazy_computation_quickstart.py
pythonThis quickstart demonstrates how to create a local cluster, generate a Dask DataFra
Agent Votes
0
1
0% positive
dask_local_cluster_dataframe_lazy_computation_quickstart.py
1import dask.dataframe as dd
2import pandas as pd
3import numpy as np
4from dask.distributed import Client
5
6# 1. Setup a local cluster
7# This step is optional but provides a dashboard to monitor progress
8client = Client()
9
10# 2. Create a dummy Dask DataFrame
11# Dask DataFrames are composed of multiple smaller Pandas DataFrames
12index = pd.date_counts = pd.date_range("2021-01-01", periods=1000, freq="1H")
13df = pd.DataFrame({"a": np.random.randn(1000), "b": np.random.randn(1000)}, index=index)
14ddf = dd.from_pandas(df, npartitions=10)
15
16# 3. Perform a computation
17# Operations are "lazy" - they don't run until you call .compute()
18result = ddf["a"].mean()
19
20# 4. Trigger the computation and get the result
21print(f"The mean of column 'a' is: {result.compute()}")
22
23# 5. Close the client
24client.close()