Update README.md
Browse files
README.md
CHANGED
@@ -4,7 +4,9 @@ datasets:
|
|
4 |
- cnn_dailymail
|
5 |
language:
|
6 |
- en
|
|
|
7 |
flan_t5_z3_config.json:
|
|
|
8 |
{
|
9 |
"fp16": {
|
10 |
"enabled": "auto",
|
@@ -14,6 +16,7 @@ flan_t5_z3_config.json:
|
|
14 |
"hysteresis": 2,
|
15 |
"min_loss_scale": 1
|
16 |
},
|
|
|
17 |
"optimizer": {
|
18 |
"type": "AdamW",
|
19 |
"params": {
|
@@ -23,6 +26,7 @@ flan_t5_z3_config.json:
|
|
23 |
"weight_decay": "auto"
|
24 |
}
|
25 |
},
|
|
|
26 |
"scheduler": {
|
27 |
"type": "WarmupLR",
|
28 |
"params": {
|
@@ -31,6 +35,8 @@ flan_t5_z3_config.json:
|
|
31 |
"warmup_num_steps": "auto"
|
32 |
}
|
33 |
},
|
|
|
|
|
34 |
"zero_optimization": {
|
35 |
"stage": 3,
|
36 |
"overlap_comm": true,
|
@@ -43,6 +49,7 @@ flan_t5_z3_config.json:
|
|
43 |
"stage3_max_reuse_distance": 1e9,
|
44 |
"stage3_gather_16bit_weights_on_model_save": true
|
45 |
},
|
|
|
46 |
"gradient_accumulation_steps": "auto",
|
47 |
"gradient_clipping": "auto",
|
48 |
"steps_per_print": 2000,
|
|
|
4 |
- cnn_dailymail
|
5 |
language:
|
6 |
- en
|
7 |
+
|
8 |
flan_t5_z3_config.json:
|
9 |
+
|
10 |
{
|
11 |
"fp16": {
|
12 |
"enabled": "auto",
|
|
|
16 |
"hysteresis": 2,
|
17 |
"min_loss_scale": 1
|
18 |
},
|
19 |
+
|
20 |
"optimizer": {
|
21 |
"type": "AdamW",
|
22 |
"params": {
|
|
|
26 |
"weight_decay": "auto"
|
27 |
}
|
28 |
},
|
29 |
+
|
30 |
"scheduler": {
|
31 |
"type": "WarmupLR",
|
32 |
"params": {
|
|
|
35 |
"warmup_num_steps": "auto"
|
36 |
}
|
37 |
},
|
38 |
+
|
39 |
+
|
40 |
"zero_optimization": {
|
41 |
"stage": 3,
|
42 |
"overlap_comm": true,
|
|
|
49 |
"stage3_max_reuse_distance": 1e9,
|
50 |
"stage3_gather_16bit_weights_on_model_save": true
|
51 |
},
|
52 |
+
|
53 |
"gradient_accumulation_steps": "auto",
|
54 |
"gradient_clipping": "auto",
|
55 |
"steps_per_print": 2000,
|