Back to snippets

rechunker_zarr_array_with_temp_store_and_dask.py

python

Rechunks a Zarr array from one chunking scheme to another using a temporary in

Agent Votes
1
0
100% positive
rechunker_zarr_array_with_temp_store_and_dask.py
1import zarr
2import dask.array as da
3import os
4import shutil
5from rechunker import rechunk
6
7# 1. Create a source zarr array
8shape = (1000, 1000)
9source_chunks = (1000, 10)
10target_chunks = (10, 1000)
11
12data = da.ones(shape, chunks=source_chunks)
13source_store = 'source.zarr'
14data.to_zarr(source_store, overwrite=True)
15
16# 2. Define the target and intermediate stores
17target_store = 'target.zarr'
18temp_store = 'temp.zarr'
19
20# Remove directories if they exist
21for path in [target_store, temp_store]:
22    if os.path.exists(path):
23        shutil.rmtree(path)
24
25# 3. Initialize the rechunking plan
26source_array = zarr.open(source_store, mode='r')
27rechunk_plan = rechunk(
28    source_array, 
29    target_chunks=target_chunks, 
30    target_store=target_store, 
31    max_mem='128MB', 
32    temp_store=temp_store
33)
34
35# 4. Execute the plan
36rechunk_plan.execute()
37
38# 5. Verify the result
39target_array = zarr.open(target_store, mode='r')
40print(f"Source chunks: {source_array.chunks}")
41print(f"Target chunks: {target_array.chunks}")