Master LLM fine-tuning techniques including supervised fine-tuning (SFT), parameter-efficient methods, and instruction tuning for specialized tasks.
1import langtrain2from langtrain.trainers import SFTTrainer3from langtrain.data import InstructionDataset45# Load pre-trained LLM6model = langtrain.AutoModelForCausalLM.from_pretrained(7 "llama-2-7b-hf",8 torch_dtype="auto",9 device_map="auto"10)1112tokenizer = langtrain.AutoTokenizer.from_pretrained("llama-2-7b-hf")13tokenizer.pad_token = tokenizer.eos_token1415# Prepare instruction dataset16dataset = InstructionDataset.from_json(17 "alpaca_data.json",18 instruction_template="### Instruction:\n{instruction}\n\n### Response:\n{output}",19 max_seq_length=51220)2122# Configure SFT training23training_args = langtrain.TrainingArguments(24 output_dir="./sft-llama-2-7b",25 learning_rate=2e-4,26 per_device_train_batch_size=4,27 gradient_accumulation_steps=8,28 max_steps=1000,29 warmup_steps=100,30 logging_steps=10,31 save_steps=500,32 fp16=True,33 optim="adamw_torch",34 lr_scheduler_type="cosine",35 max_grad_norm=1.036)3738# Start supervised fine-tuning39trainer = SFTTrainer(40 model=model,41 tokenizer=tokenizer,42 train_dataset=dataset,43 args=training_args,44 packing=True, # Pack multiple samples per sequence45 dataset_text_field="text"46)4748trainer.train()
1from peft import LoraConfig, get_peft_model, TaskType2from langtrain.trainers import SFTTrainer3import torch45# Load base model6model = langtrain.AutoModelForCausalLM.from_pretrained(7 "mistral-7b-v0.1",8 torch_dtype=torch.bfloat16,9 device_map="auto",10 attn_implementation="flash_attention_2" # Use FlashAttention for efficiency11)1213# Configure LoRA with optimal settings14lora_config = LoraConfig(15 task_type=TaskType.CAUSAL_LM,16 r=64, # Rank - higher for complex tasks17 lora_alpha=128, # Scaling factor (typically 2*r)18 lora_dropout=0.05, # Low dropout for stability19 target_modules=[20 "q_proj", "k_proj", "v_proj", "o_proj", # Attention projections21 "gate_proj", "up_proj", "down_proj" # MLP projections22 ],23 bias="none",24 use_rslora=True, # Rank-stabilized LoRA25 init_lora_weights="gaussian"26)2728# Apply LoRA to model29model = get_peft_model(model, lora_config)30model.print_trainable_parameters() # ~0.2% of total parameters3132# Training with LoRA-specific settings33training_args = langtrain.TrainingArguments(34 output_dir="./lora-mistral-7b",35 learning_rate=3e-4, # Higher LR for LoRA36 per_device_train_batch_size=8,37 gradient_accumulation_steps=4,38 max_steps=2000,39 warmup_steps=200,40 weight_decay=0.01,41 logging_steps=25,42 save_steps=500,43 bf16=True,44 dataloader_pin_memory=False45)4647trainer = SFTTrainer(48 model=model,49 tokenizer=tokenizer,50 train_dataset=dataset,51 args=training_args52)5354trainer.train()55model.save_pretrained("./lora-adapters")
1from transformers import BitsAndBytesConfig2from peft import LoraConfig, prepare_model_for_kbit_training3import torch45# Configure 4-bit quantization6bnb_config = BitsAndBytesConfig(7 load_in_4bit=True,8 bnb_4bit_quant_type="nf4", # Normal Float 4-bit9 bnb_4bit_compute_dtype=torch.bfloat16,10 bnb_4bit_use_double_quant=True, # Nested quantization11)1213# Load quantized model14model = langtrain.AutoModelForCausalLM.from_pretrained(15 "CodeLlama-13b-hf",16 quantization_config=bnb_config,17 device_map="auto",18 trust_remote_code=True19)2021# Prepare model for k-bit training22model.gradient_checkpointing_enable()23model = prepare_model_for_kbit_training(model)2425# QLoRA configuration26qlora_config = LoraConfig(27 r=32,28 lora_alpha=64,29 target_modules=[30 "q_proj", "k_proj", "v_proj", "o_proj",31 "gate_proj", "up_proj", "down_proj"32 ],33 lora_dropout=0.1,34 bias="none",35 task_type="CAUSAL_LM"36)3738model = get_peft_model(model, qlora_config)3940# Training arguments optimized for QLoRA41training_args = langtrain.TrainingArguments(42 output_dir="./qlora-codellama-13b",43 learning_rate=2e-4,44 per_device_train_batch_size=2,45 gradient_accumulation_steps=16,46 max_steps=1500,47 warmup_steps=150,48 bf16=True,49 logging_steps=10,50 optim="paged_adamw_32bit", # Memory-efficient optimizer51 lr_scheduler_type="constant",52 max_grad_norm=0.3,53 group_by_length=True # Pack sequences by length54)5556trainer = SFTTrainer(57 model=model,58 tokenizer=tokenizer,59 train_dataset=code_dataset,60 args=training_args,61 max_seq_length=204862)6364trainer.train()
1from langtrain.rlhf import RLHFTrainer, RewardModel2from langtrain.data import PreferenceDataset3from trl import PPOTrainer, PPOConfig45# Step 1: Train reward model on preference data6preference_data = PreferenceDataset.from_json("human_preferences.json")78reward_model = RewardModel.from_pretrained(9 "sft-model-checkpoint", # Start from SFT model10 num_labels=111)1213reward_trainer = langtrain.RewardTrainer(14 model=reward_model,15 tokenizer=tokenizer,16 train_dataset=preference_data,17 eval_dataset=preference_data.train_test_split(0.1)["test"],18 compute_metrics=lambda p: {"accuracy": (p.predictions > 0).sum() / len(p.predictions)}19)2021reward_trainer.train()2223# Step 2: PPO training with reward model24ppo_config = PPOConfig(25 model_name="sft-model-checkpoint",26 learning_rate=1.41e-5,27 log_with="wandb",28 mini_batch_size=64,29 batch_size=256,30 gradient_accumulation_steps=4,31 optimize_cuda_cache=True,32 early_stopping=True,33 target_kl=0.1, # KL divergence constraint34 ppo_epochs=4,35 max_grad_norm=1.0,36 use_score_scaling=True,37 use_score_norm=True38)3940# Initialize PPO trainer41ppo_trainer = PPOTrainer(42 config=ppo_config,43 model=model,44 ref_model=ref_model, # Reference model (frozen SFT model)45 tokenizer=tokenizer,46 reward_model=reward_model47)4849# Training loop50for epoch in range(ppo_config.ppo_epochs):51 for batch in ppo_trainer.dataloader:52 query_tensors = batch["input_ids"]5354 # Generate responses55 response_tensors = ppo_trainer.generate(56 query_tensors,57 return_prompt=False,58 **generation_kwargs59 )6061 # Get rewards from reward model62 rewards = reward_model.get_rewards(query_tensors, response_tensors)6364 # PPO step65 stats = ppo_trainer.step(query_tensors, response_tensors, rewards)66 ppo_trainer.log_stats(stats, batch, rewards)