antebe1
/

calc10

antebe1 commited on 17 days ago

Commit

6ec6355

verified ·

1 Parent(s): 05843b1

Update README.md

Files changed (1) hide show

README.md CHANGED Viewed

@@ -10,7 +10,40 @@ tags: []
 ## Model Details
 ### Model Description
 <!-- Provide a longer summary of what this model is. -->

 ## Model Details
+class SFTConfig:
+    sft_model_name: str = 'facebook/opt-350m'
+    sft_dataset_path: str = 'train.csv'
+    sft_model_cache_dir: str = 'cache'
+    sft_output_dir: str = '.'
+    hf_key: str = ''
+    peft_config = LoraConfig(
+        r=4, # TODO: play with this number
+        lora_alpha=8, # TODO: play with this number
+        target_modules=['q_proj', 'v_proj', 'k_proj'],
+        lora_dropout=0.05,
+        bias="none",
+        task_type="CAUSAL_LM" # TODO: you need to figure this out. HINT https://github.com/huggingface/peft/blob/3d2bf9a8b261ed2960f26e61246cf0aa624a6115/src/peft/utils/peft_types.py#L67
+    )
+    training_args = TrainingArguments(
+        per_device_train_batch_size=2,
+        gradient_accumulation_steps=2,
+        gradient_checkpointing =False,
+        max_grad_norm= 0.3,
+        num_train_epochs=2, # TODO: play with this number
+        save_steps= 100,
+        learning_rate=0.0001, # TODO: play with this number
+        bf16=True,
+        save_total_limit=3,
+        logging_steps=10,
+        output_dir='./sft_models',
+        optim="adamw_torch",
+        lr_scheduler_type="cosine",
+        warmup_ratio=0.05,
+        remove_unused_columns=False,
+        report_to="none",
+    )
 ### Model Description
 <!-- Provide a longer summary of what this model is. -->