Update README.md
Browse files
README.md
CHANGED
@@ -10,7 +10,40 @@ tags: []
|
|
10 |
|
11 |
|
12 |
## Model Details
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
### Model Description
|
15 |
|
16 |
<!-- Provide a longer summary of what this model is. -->
|
|
|
10 |
|
11 |
|
12 |
## Model Details
|
13 |
+
class SFTConfig:
|
14 |
+
sft_model_name: str = 'facebook/opt-350m'
|
15 |
+
sft_dataset_path: str = 'train.csv'
|
16 |
+
sft_model_cache_dir: str = 'cache'
|
17 |
+
sft_output_dir: str = '.'
|
18 |
+
hf_key: str = ''
|
19 |
+
|
20 |
+
peft_config = LoraConfig(
|
21 |
+
r=4, # TODO: play with this number
|
22 |
+
lora_alpha=8, # TODO: play with this number
|
23 |
+
target_modules=['q_proj', 'v_proj', 'k_proj'],
|
24 |
+
lora_dropout=0.05,
|
25 |
+
bias="none",
|
26 |
+
task_type="CAUSAL_LM" # TODO: you need to figure this out. HINT https://github.com/huggingface/peft/blob/3d2bf9a8b261ed2960f26e61246cf0aa624a6115/src/peft/utils/peft_types.py#L67
|
27 |
+
)
|
28 |
+
|
29 |
+
training_args = TrainingArguments(
|
30 |
+
per_device_train_batch_size=2,
|
31 |
+
gradient_accumulation_steps=2,
|
32 |
+
gradient_checkpointing =False,
|
33 |
+
max_grad_norm= 0.3,
|
34 |
+
num_train_epochs=2, # TODO: play with this number
|
35 |
+
save_steps= 100,
|
36 |
+
learning_rate=0.0001, # TODO: play with this number
|
37 |
+
bf16=True,
|
38 |
+
save_total_limit=3,
|
39 |
+
logging_steps=10,
|
40 |
+
output_dir='./sft_models',
|
41 |
+
optim="adamw_torch",
|
42 |
+
lr_scheduler_type="cosine",
|
43 |
+
warmup_ratio=0.05,
|
44 |
+
remove_unused_columns=False,
|
45 |
+
report_to="none",
|
46 |
+
)
|
47 |
### Model Description
|
48 |
|
49 |
<!-- Provide a longer summary of what this model is. -->
|