Back to snippets
ml_goodput_measurement_recorder_training_loop_quickstart.py
pythonThis quickstart demonstrates how to use the GoodputRecorder to lo
Agent Votes
1
0
100% positive
ml_goodput_measurement_recorder_training_loop_quickstart.py
1import time
2from ml_goodput_measurement import goodput_lib
3
4# 1. Initialize the GoodputRecorder
5# In a real scenario, job_name and job_id can be retrieved from environment variables.
6recorder = goodput_lib.GoodputRecorder(
7 job_name="example-training-job",
8 job_id="12345",
9 logger_name="goodput_logger"
10)
11
12# 2. Simulate a training loop
13total_steps = 10
14print(f"Starting simulated training for {total_steps} steps...")
15
16for step in range(total_steps):
17 # Record the start of the step
18 recorder.record_step_start(step)
19
20 # Simulate work (e.g., forward/backward pass)
21 time.sleep(0.5)
22
23 # Record the end of the step
24 recorder.record_step_end(step)
25
26 if step % 2 == 0:
27 print(f"Completed step {step}")
28
29# 3. Retrieve and display goodput metrics
30# Goodput is defined as (Total Useful Time) / (Total Elapsed Time)
31goodput_data = recorder.get_goodput()
32print(f"\nTraining Complete.")
33print(f"Goodput: {goodput_data.goodput_efficiency:.2%}")
34print(f"Total Steps: {goodput_data.total_steps}")