Laurie
/

flan-t5-xl-deepspeed-zero3-summary

text2text-generation

Generated from Trainer

text-generation-inference

Model card Files Files and versions

Metrics Training metrics Community

Laurie commited on Apr 16, 2023

Commit

224efea

·

1 Parent(s): 48040d0

Update README.md

Files changed (1) hide show

README.md +7 -0

README.md CHANGED Viewed

@@ -4,7 +4,9 @@ datasets:
 - cnn_dailymail
 language:
 - en
 flan_t5_z3_config.json:
 {
   "fp16": {
     "enabled": "auto",
@@ -14,6 +16,7 @@ flan_t5_z3_config.json:
     "hysteresis": 2,
     "min_loss_scale": 1
   },
   "optimizer": {
     "type": "AdamW",
     "params": {
@@ -23,6 +26,7 @@ flan_t5_z3_config.json:
       "weight_decay": "auto"
     }
   },
   "scheduler": {
     "type": "WarmupLR",
     "params": {
@@ -31,6 +35,8 @@ flan_t5_z3_config.json:
       "warmup_num_steps": "auto"
     }
   },
   "zero_optimization": {
     "stage": 3,
     "overlap_comm": true,
@@ -43,6 +49,7 @@ flan_t5_z3_config.json:
     "stage3_max_reuse_distance": 1e9,
     "stage3_gather_16bit_weights_on_model_save": true
   },
   "gradient_accumulation_steps": "auto",
   "gradient_clipping": "auto",
   "steps_per_print": 2000,

 - cnn_dailymail
 language:
 - en
 flan_t5_z3_config.json:
 {
   "fp16": {
     "enabled": "auto",
     "hysteresis": 2,
     "min_loss_scale": 1
   },
   "optimizer": {
     "type": "AdamW",
     "params": {
       "weight_decay": "auto"
     }
   },
   "scheduler": {
     "type": "WarmupLR",
     "params": {
       "warmup_num_steps": "auto"
     }
   },
   "zero_optimization": {
     "stage": 3,
     "overlap_comm": true,
     "stage3_max_reuse_distance": 1e9,
     "stage3_gather_16bit_weights_on_model_save": true
   },
   "gradient_accumulation_steps": "auto",
   "gradient_clipping": "auto",
   "steps_per_print": 2000,