antebe1 commited on
Commit
6ec6355
·
verified ·
1 Parent(s): 05843b1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +34 -1
README.md CHANGED
@@ -10,7 +10,40 @@ tags: []
10
 
11
 
12
  ## Model Details
13
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ### Model Description
15
 
16
  <!-- Provide a longer summary of what this model is. -->
 
10
 
11
 
12
  ## Model Details
13
+ class SFTConfig:
14
+ sft_model_name: str = 'facebook/opt-350m'
15
+ sft_dataset_path: str = 'train.csv'
16
+ sft_model_cache_dir: str = 'cache'
17
+ sft_output_dir: str = '.'
18
+ hf_key: str = ''
19
+
20
+ peft_config = LoraConfig(
21
+ r=4, # TODO: play with this number
22
+ lora_alpha=8, # TODO: play with this number
23
+ target_modules=['q_proj', 'v_proj', 'k_proj'],
24
+ lora_dropout=0.05,
25
+ bias="none",
26
+ task_type="CAUSAL_LM" # TODO: you need to figure this out. HINT https://github.com/huggingface/peft/blob/3d2bf9a8b261ed2960f26e61246cf0aa624a6115/src/peft/utils/peft_types.py#L67
27
+ )
28
+
29
+ training_args = TrainingArguments(
30
+ per_device_train_batch_size=2,
31
+ gradient_accumulation_steps=2,
32
+ gradient_checkpointing =False,
33
+ max_grad_norm= 0.3,
34
+ num_train_epochs=2, # TODO: play with this number
35
+ save_steps= 100,
36
+ learning_rate=0.0001, # TODO: play with this number
37
+ bf16=True,
38
+ save_total_limit=3,
39
+ logging_steps=10,
40
+ output_dir='./sft_models',
41
+ optim="adamw_torch",
42
+ lr_scheduler_type="cosine",
43
+ warmup_ratio=0.05,
44
+ remove_unused_columns=False,
45
+ report_to="none",
46
+ )
47
  ### Model Description
48
 
49
  <!-- Provide a longer summary of what this model is. -->