Nessii013 commited on
Commit
0f830f9
·
verified ·
1 Parent(s): 59fd3eb

Create oumi/oumi_train.yaml

Browse files
Files changed (1) hide show
  1. oumi/oumi_train.yaml +92 -0
oumi/oumi_train.yaml ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Lora config for CALM 405B.
2
+
3
+ model:
4
+ model_name: "meta-llama/Llama-3.1-405B-Instruct"
5
+ model_max_length: 4096
6
+ torch_dtype_str: "bfloat16"
7
+ attn_implementation: "sdpa"
8
+ load_pretrained_weights: True
9
+ trust_remote_code: True
10
+ tokenizer_pad_token: "<|finetune_right_pad_id|>"
11
+ enable_liger_kernel: True
12
+
13
+ data:
14
+ train:
15
+ datasets:
16
+ - dataset_name: "text_sft_jsonl"
17
+ dataset_path: "/path/to/training/dataset.jsonl"
18
+ shuffle: True
19
+ seed: 42
20
+ collator_name: "text_completions_only_with_padding"
21
+ target_col: "messages"
22
+ use_async_dataset: True
23
+ seed: 42
24
+ validation:
25
+ datasets:
26
+ - dataset_name: "text_sft_jsonl"
27
+ dataset_path: "/path/to/validation/dataset.jsonl"
28
+ collator_name: "text_completions_only_with_padding"
29
+ target_col: "messages"
30
+ use_async_dataset: True
31
+ seed: 42
32
+
33
+ training:
34
+ trainer_type: "TRL_SFT"
35
+ use_peft: True
36
+ save_steps: 500
37
+ num_train_epochs: 1
38
+ per_device_train_batch_size: 2
39
+ gradient_accumulation_steps: 1
40
+ eval_strategy: "steps"
41
+ eval_steps: 500
42
+ per_device_eval_batch_size: 1
43
+
44
+ enable_gradient_checkpointing: True
45
+ gradient_checkpointing_kwargs:
46
+ use_reentrant: False
47
+ ddp_find_unused_parameters: False
48
+ optimizer: "adamw_torch_fused"
49
+ learning_rate: 1.0e-04
50
+ warmup_ratio: 0.1
51
+ weight_decay: 0.01
52
+ max_grad_norm: 10
53
+ compile: False
54
+
55
+ dataloader_num_workers: "auto"
56
+ dataloader_prefetch_factor: 32
57
+
58
+ logging_steps: 50
59
+ log_model_summary: False
60
+ empty_device_cache_steps: 1
61
+ include_performance_metrics: True
62
+ output_dir: "output/llama405b.qlora"
63
+ enable_wandb: True
64
+
65
+ peft:
66
+ q_lora: True
67
+ # https://github.com/pytorch/torchtune/blob/37337f71677da69f0967a9cde34b96ad7fec3cb6/torchtune/modules/peft/lora.py#L95
68
+ bnb_4bit_quant_type: "nf4"
69
+ # Must use a float type for quantized data storage. See:
70
+ # https://huggingface.co/docs/bitsandbytes/main/en/fsdp_qlora#quantized-data-storage.
71
+ bnb_4bit_quant_storage: "bfloat16"
72
+ bnb_4bit_compute_dtype: "bfloat16"
73
+ lora_r: 16
74
+ lora_alpha: 32
75
+ lora_dropout: 0.0
76
+ lora_target_modules:
77
+ - q_proj
78
+ - k_proj
79
+ - v_proj
80
+ - o_proj
81
+ - gate_proj
82
+ - up_proj
83
+ - down_proj
84
+
85
+ fsdp:
86
+ enable_fsdp: True
87
+ forward_prefetch: True
88
+ backward_prefetch: "BACKWARD_POST"
89
+ use_orig_params: True
90
+ cpu_offload: True
91
+ auto_wrap_policy: "TRANSFORMER_BASED_WRAP"
92
+ transformer_layer_cls: "LlamaDecoderLayer"