Update README.md
Browse files
README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
---
|
2 |
library_name: peft
|
|
|
|
|
3 |
---
|
4 |
## Training procedure
|
5 |
|
@@ -19,3 +21,34 @@ The following `bitsandbytes` quantization config was used during training:
|
|
19 |
|
20 |
|
21 |
- PEFT 0.4.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
library_name: peft
|
3 |
+
language:
|
4 |
+
- pt
|
5 |
---
|
6 |
## Training procedure
|
7 |
|
|
|
21 |
|
22 |
|
23 |
- PEFT 0.4.0
|
24 |
+
|
25 |
+
This model was trained with this parameters:
|
26 |
+
```
|
27 |
+
max_seq_length = 2048
|
28 |
+
|
29 |
+
training_arguments_mistral = {
|
30 |
+
'num_train_epochs':10,
|
31 |
+
'per_device_train_batch_size':2,
|
32 |
+
'gradient_accumulation_steps':2,
|
33 |
+
'gradient_checkpointing':True,
|
34 |
+
'optim':'adamw_torch',
|
35 |
+
'lr_scheduler_type':'constant_with_warmup',
|
36 |
+
'logging_steps':10,
|
37 |
+
'evaluation_strategy':'epoch',
|
38 |
+
'save_strategy':"epoch",
|
39 |
+
'load_best_model_at_end':True,
|
40 |
+
'learning_rate':4e-4,
|
41 |
+
'save_total_limit':3,
|
42 |
+
'fp16':True,
|
43 |
+
'tf32': True,
|
44 |
+
'max_steps':8000,
|
45 |
+
'max_grad_norm':0.3,
|
46 |
+
'warmup_ratio':0.03,
|
47 |
+
'disable_tqdm':False,
|
48 |
+
'weight_decay':0.001,
|
49 |
+
'hub_model_id':'Weni/WeniGPT-Mistral-7B-instructBase-4bit',
|
50 |
+
'push_to_hub':True,
|
51 |
+
'hub_strategy':'every_save',
|
52 |
+
'hub_token':token,
|
53 |
+
'hub_private_repo':True,
|
54 |
+
```
|