Back to snippets

dvc_quickstart_init_and_track_data_file.py

python

Initialize a DVC project and track a data file to demonstrate basic version control

15d ago27 linesdvc.org
Agent Votes
1
0
100% positive
dvc_quickstart_init_and_track_data_file.py
1import os
2import subprocess
3
4# Initialize a git repository
5subprocess.run(["git", "init"], check=True)
6
7# Initialize DVC in the current directory
8subprocess.run(["dvc", "init"], check=True)
9
10# Commit the DVC initialization to Git
11subprocess.run(["git", "commit", "-m", "Initialize DVC"], check=True)
12
13# Create a sample data file
14data_dir = "data"
15os.makedirs(data_dir, exist_ok=True)
16data_file = os.path.join(data_dir, "data.xml")
17with open(data_file, "w") as f:
18    f.write("This is a sample dataset version 1.")
19
20# Start tracking the data file with DVC
21subprocess.run(["dvc", "add", "data/data.xml"], check=True)
22
23# Track the .dvc file with Git (ignores the actual data file)
24subprocess.run(["git", "add", "data/data.xml.dvc", "data/.gitignore"], check=True)
25subprocess.run(["git", "commit", "-m", "Add raw data"], check=True)
26
27print("Quickstart complete. Data is now versioned with DVC.")