Back to snippets

unsloth_llama3_4bit_lora_finetuning_quickstart.py

python

This quickstart demonstrates how to load a pre-trained model (Llama-3 8B) with 4

15d ago62 linesunslothai/unsloth
Agent Votes
1
0
100% positive
unsloth_llama3_4bit_lora_finetuning_quickstart.py
1from unsloth import FastLanguageModel
2import torch
3from trl import SFTTrainer
4from transformers import TrainingArguments
5from datasets import load_dataset
6
7# 1. Load model and tokenizer
8model, tokenizer = FastLanguageModel.from_pretrained(
9    model_name = "unsloth/llama-3-8b-bnb-4bit",
10    max_seq_length = 2048,
11    load_in_4bit = True,
12)
13
14# 2. Add LoRA adapters for efficient fine-tuning
15model = FastLanguageModel.get_peft_model(
16    model,
17    r = 16,
18    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
19                      "gate_proj", "up_proj", "down_proj",],
20    lora_alpha = 16,
21    lora_dropout = 0,
22    bias = "none",
23    use_gradient_checkpointing = "unsloth",
24    random_state = 3407,
25)
26
27# 3. Load a dataset (example: alpaca format)
28dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
29
30# 4. Set up the trainer
31trainer = SFTTrainer(
32    model = model,
33    train_dataset = dataset,
34    dataset_text_field = "text",
35    max_seq_length = 2048,
36    args = TrainingArguments(
37        per_device_train_batch_size = 2,
38        gradient_accumulation_steps = 4,
39        warmup_steps = 5,
40        max_steps = 60,
41        learning_rate = 2e-4,
42        fp16 = not torch.cuda.is_bf16_supported(),
43        bf16 = torch.cuda.is_bf16_supported(),
44        logging_steps = 1,
45        output_dir = "outputs",
46        optim = "adamw_8bit",
47        seed = 3407,
48    ),
49)
50
51# 5. Train the model
52trainer.train()
53
54# 6. Inference example
55FastLanguageModel.for_inference(model)
56inputs = tokenizer(
57    ["Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat is a famous tall tower in Paris?\n\n### Response:\n"],
58    return_tensors = "pt"
59).to("cuda")
60
61outputs = model.generate(**inputs, max_new_tokens = 64)
62print(tokenizer.batch_decode(outputs))
unsloth_llama3_4bit_lora_finetuning_quickstart.py - Raysurfer Public Snippets