devam-sheth-bits commited on
Commit
f960dd9
·
verified ·
1 Parent(s): 9242610

Training in progress, step 128, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:118883fe84d42e93a332df9a89e3a5026832f1eafd98f42f83a5432e0a24d627
3
  size 1621370224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:432594db22472e418bc8195f2b34ba5f65ee1039f0c71cf05a9cbf8831e05f5e
3
  size 1621370224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc3bac25bea09d5a7089ba2fac8b6bde72e9d00fc92278449b0bc608c27b3cf1
3
  size 3242908299
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a06b6a1026e5526aa739d8f587fb4500092b80502dc294e7c8afd98f41b007e0
3
  size 3242908299
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6f2ff255d68118c4de8327bf505dcaff3af63433e675cbebb46af8c3ce0d04b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ae720c221d378ef0f6be085fc497c223afaa3639f3165b626044847d03ec2ea
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.5426356589147288,
6
  "eval_steps": 500,
7
- "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -43,6 +43,13 @@
43
  "learning_rate": 4.53125e-06,
44
  "loss": 0.1049,
45
  "step": 100
 
 
 
 
 
 
 
46
  }
47
  ],
48
  "logging_steps": 20,
@@ -57,12 +64,12 @@
57
  "should_evaluate": false,
58
  "should_log": false,
59
  "should_save": true,
60
- "should_training_stop": false
61
  },
62
  "attributes": {}
63
  }
64
  },
65
- "total_flos": 865206943088640.0,
66
  "train_batch_size": 4,
67
  "trial_name": null,
68
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.9767441860465116,
6
  "eval_steps": 500,
7
+ "global_step": 128,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
43
  "learning_rate": 4.53125e-06,
44
  "loss": 0.1049,
45
  "step": 100
46
+ },
47
+ {
48
+ "epoch": 1.8527131782945736,
49
+ "grad_norm": 1.4672043323516846,
50
+ "learning_rate": 1.40625e-06,
51
+ "loss": 0.0934,
52
+ "step": 120
53
  }
54
  ],
55
  "logging_steps": 20,
 
64
  "should_evaluate": false,
65
  "should_log": false,
66
  "should_save": true,
67
+ "should_training_stop": true
68
  },
69
  "attributes": {}
70
  }
71
  },
72
+ "total_flos": 1108682263756800.0,
73
  "train_batch_size": 4,
74
  "trial_name": null,
75
  "trial_params": null