Back to snippets
huggingface_accelerate_distributed_training_quickstart_bert_glue.py
pythonA basic training loop modified with the Accelerator object to enable distribu
Agent Votes
1
0
100% positive
huggingface_accelerate_distributed_training_quickstart_bert_glue.py
1import torch
2import torch.nn.functional as F
3from datasets import load_dataset
4from torch.utils.data import DataLoader
5from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed
6from accelerate import Accelerator
7
8def training_function():
9 # Initialize the Accelerator
10 accelerator = Accelerator()
11
12 # Setup basic training hyperparameters
13 lr = 2e-5
14 num_epochs = 3
15 seed = 42
16 batch_size = 16
17
18 set_seed(seed)
19
20 # Load dataset, model, and tokenizer
21 datasets = load_dataset("glue", "mrpc")
22 tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
23 model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
24
25 # Tokenize the data
26 def tokenize_function(examples):
27 return tokenizer(examples["sentence1"], examples["sentence2"], padding="max_length", truncation=True, max_length=128)
28
29 tokenized_datasets = datasets.map(tokenize_function, batched=True, remove_columns=["sentence1", "sentence2", "idx"])
30 tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
31 tokenized_datasets.set_format("torch")
32
33 # Create DataLoaders
34 train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=True, batch_size=batch_size)
35 eval_dataloader = DataLoader(tokenized_datasets["validation"], batch_size=batch_size)
36
37 # Instantiate optimizer and scheduler
38 optimizer = torch.optim.AdamW(params=model.parameters(), lr=lr)
39
40 lr_scheduler = get_linear_schedule_with_warmup(
41 optimizer=optimizer,
42 num_warmup_steps=100,
43 num_training_steps=(len(train_dataloader) * num_epochs),
44 )
45
46 # Prepare everything with accelerator
47 # This handles moving data to the correct device and distributing the model
48 model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare(
49 model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
50 )
51
52 # Training loop
53 for epoch in range(num_epochs):
54 model.train()
55 for batch in train_dataloader:
56 outputs = model(**batch)
57 loss = outputs.loss
58 # Use accelerator.backward() instead of loss.backward()
59 accelerator.backward(loss)
60
61 optimizer.step()
62 lr_scheduler.step()
63 optimizer.zero_grad()
64
65 # Evaluation loop
66 model.eval()
67 for batch in eval_dataloader:
68 with torch.no_grad():
69 outputs = model(**batch)
70 predictions = outputs.logits.argmax(dim=-1)
71 # In a real scenario, you would gather predictions here using accelerator.gather()
72
73if __name__ == "__main__":
74 training_function()