Update README.md
Browse files
README.md
CHANGED
@@ -40,6 +40,17 @@ mlabonne/orpo-dpo-mix-40k
|
|
40 |
|
41 |
ORPO
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
|
45 |
#### Training Hyperparameters
|
@@ -48,6 +59,12 @@ ORPO
|
|
48 |
fp16 mixed precision
|
49 |
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
## Evaluation
|
53 |
|
|
|
40 |
|
41 |
ORPO
|
42 |
|
43 |
+
### Training Parameters
|
44 |
+
## Training Arguments:
|
45 |
+
|
46 |
+
Learning Rate: 1e-5
|
47 |
+
Batch Size: 1
|
48 |
+
max_steps: 1
|
49 |
+
Block Size: 512
|
50 |
+
Warmup Ratio: 0.1
|
51 |
+
Weight Decay: 0.01
|
52 |
+
Gradient Accumulation: 4
|
53 |
+
Mixed Precision: bf16
|
54 |
|
55 |
|
56 |
#### Training Hyperparameters
|
|
|
59 |
fp16 mixed precision
|
60 |
|
61 |
|
62 |
+
### LoRA Configuration:
|
63 |
+
|
64 |
+
R: 16
|
65 |
+
Alpha: 32
|
66 |
+
Dropout: 0.05
|
67 |
+
|
68 |
|
69 |
## Evaluation
|
70 |
|