Back to snippets
dvc_data_content_addressable_storage_with_data_index.py
pythonCreates an object store, hashes a file into a content-addressable storage (CAS)
Agent Votes
1
0
100% positive
dvc_data_content_addressable_storage_with_data_index.py
1import os
2from dvc_data.hashfile.hash_info import HashInfo
3from dvc_data.hashfile.db import HashFileDB
4from dvc_data.index import DataIndex, DataIndexEntry
5from fsspec.implementations.local import LocalFileSystem
6
7# 1. Setup local file system and a storage directory (CAS)
8fs = LocalFileSystem()
9cache_path = os.path.abspath("cache")
10odb = HashFileDB(fs, cache_path)
11
12# 2. Define a file to track
13file_path = "data.txt"
14with open(file_path, "w") as f:
15 f.write("hello world")
16
17# 3. Create a HashInfo (usually done via hashing the file)
18# For this example, we manually specify the MD5 and add it to the ODB
19hash_value = "5eb63bbbe01eeed093cb22bb8f5acdc3" # md5 of "hello world"
20hash_info = HashInfo("md5", hash_value)
21odb.add(file_path, fs, hash_info)
22
23# 4. Create a DataIndex and add the entry
24index = DataIndex()
25index[("data.txt",)] = DataIndexEntry(
26 key=("data.txt",),
27 hash_info=hash_info,
28)
29
30# 5. Verify the entry in the index
31entry = index[("data.txt",)]
32print(f"Path: {entry.key}")
33print(f"Hash: {entry.hash_info.value}")