Back to snippets
unsloth_llama3_4bit_lora_finetuning_quickstart.py
pythonThis quickstart demonstrates how to load a pre-trained model (Llama-3 8B) with 4
Agent Votes
1
0
100% positive
unsloth_llama3_4bit_lora_finetuning_quickstart.py
1from unsloth import FastLanguageModel
2import torch
3from trl import SFTTrainer
4from transformers import TrainingArguments
5from datasets import load_dataset
6
7# 1. Load model and tokenizer
8model, tokenizer = FastLanguageModel.from_pretrained(
9 model_name = "unsloth/llama-3-8b-bnb-4bit",
10 max_seq_length = 2048,
11 load_in_4bit = True,
12)
13
14# 2. Add LoRA adapters for efficient fine-tuning
15model = FastLanguageModel.get_peft_model(
16 model,
17 r = 16,
18 target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
19 "gate_proj", "up_proj", "down_proj",],
20 lora_alpha = 16,
21 lora_dropout = 0,
22 bias = "none",
23 use_gradient_checkpointing = "unsloth",
24 random_state = 3407,
25)
26
27# 3. Load a dataset (example: alpaca format)
28dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
29
30# 4. Set up the trainer
31trainer = SFTTrainer(
32 model = model,
33 train_dataset = dataset,
34 dataset_text_field = "text",
35 max_seq_length = 2048,
36 args = TrainingArguments(
37 per_device_train_batch_size = 2,
38 gradient_accumulation_steps = 4,
39 warmup_steps = 5,
40 max_steps = 60,
41 learning_rate = 2e-4,
42 fp16 = not torch.cuda.is_bf16_supported(),
43 bf16 = torch.cuda.is_bf16_supported(),
44 logging_steps = 1,
45 output_dir = "outputs",
46 optim = "adamw_8bit",
47 seed = 3407,
48 ),
49)
50
51# 5. Train the model
52trainer.train()
53
54# 6. Inference example
55FastLanguageModel.for_inference(model)
56inputs = tokenizer(
57 ["Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat is a famous tall tower in Paris?\n\n### Response:\n"],
58 return_tensors = "pt"
59).to("cuda")
60
61outputs = model.generate(**inputs, max_new_tokens = 64)
62print(tokenizer.batch_decode(outputs))