Upload 8 files

Browse files

Files changed (8) hide show

README.md +202 -3
adapter_config.json +37 -0
adapter_model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +1147 -0
training_args.bin +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,202 @@
----
-license: apache-2.0
----

+---
+base_model: Qwen/Qwen2.5-VL-7B-Instruct
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.14.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "gate_proj",
+    "q_proj",
+    "o_proj",
+    "k_proj",
+    "up_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a36d29b8322176cf36f3b07aeaaba2214a917a551d7ad3e9dc1211f52efe5e0d
+size 95255648

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:796ff5912c44672528480a2b5adf844c3b6488a2661fe5d4e25cbd8e442c6e36
+size 161811050

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9196a1e708bf24d6abba41cce3f8558820acc3e50f9394c5955e29eb41ffea3d
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79fba8d36352a914e697da10631eed4b12001e76f97340f18155de9f1617af5f
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1147 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 15900,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.018867924528301886,
+      "grad_norm": 0.540359616279602,
+      "learning_rate": 1.9876714052082024e-05,
+      "loss": 9.6223,
+      "step": 100
+    },
+    {
+      "epoch": 0.03773584905660377,
+      "grad_norm": 0.65615314245224,
+      "learning_rate": 1.975091206441062e-05,
+      "loss": 6.7276,
+      "step": 200
+    },
+    {
+      "epoch": 0.05660377358490566,
+      "grad_norm": 0.10516568273305893,
+      "learning_rate": 1.9625110076739213e-05,
+      "loss": 6.6446,
+      "step": 300
+    },
+    {
+      "epoch": 0.07547169811320754,
+      "grad_norm": 0.6999079585075378,
+      "learning_rate": 1.949930808906781e-05,
+      "loss": 6.6792,
+      "step": 400
+    },
+    {
+      "epoch": 0.09433962264150944,
+      "grad_norm": 0.25218865275382996,
+      "learning_rate": 1.9373506101396403e-05,
+      "loss": 6.7655,
+      "step": 500
+    },
+    {
+      "epoch": 0.11320754716981132,
+      "grad_norm": 0.02898556925356388,
+      "learning_rate": 1.9247704113724998e-05,
+      "loss": 6.71,
+      "step": 600
+    },
+    {
+      "epoch": 0.1320754716981132,
+      "grad_norm": 0.2081533968448639,
+      "learning_rate": 1.9121902126053593e-05,
+      "loss": 6.7371,
+      "step": 700
+    },
+    {
+      "epoch": 0.1509433962264151,
+      "grad_norm": 0.6868953108787537,
+      "learning_rate": 1.8996100138382188e-05,
+      "loss": 6.7152,
+      "step": 800
+    },
+    {
+      "epoch": 0.16981132075471697,
+      "grad_norm": 0.5356037020683289,
+      "learning_rate": 1.8870298150710782e-05,
+      "loss": 6.7843,
+      "step": 900
+    },
+    {
+      "epoch": 0.18867924528301888,
+      "grad_norm": 0.7029320597648621,
+      "learning_rate": 1.8744496163039377e-05,
+      "loss": 6.5619,
+      "step": 1000
+    },
+    {
+      "epoch": 0.20754716981132076,
+      "grad_norm": 0.09844966232776642,
+      "learning_rate": 1.8618694175367972e-05,
+      "loss": 6.8881,
+      "step": 1100
+    },
+    {
+      "epoch": 0.22641509433962265,
+      "grad_norm": 0.5668662786483765,
+      "learning_rate": 1.8492892187696567e-05,
+      "loss": 6.7273,
+      "step": 1200
+    },
+    {
+      "epoch": 0.24528301886792453,
+      "grad_norm": 0.21362020075321198,
+      "learning_rate": 1.8367090200025162e-05,
+      "loss": 6.8294,
+      "step": 1300
+    },
+    {
+      "epoch": 0.2641509433962264,
+      "grad_norm": 0.4036034345626831,
+      "learning_rate": 1.8241288212353757e-05,
+      "loss": 6.7298,
+      "step": 1400
+    },
+    {
+      "epoch": 0.2830188679245283,
+      "grad_norm": 0.43861180543899536,
+      "learning_rate": 1.811548622468235e-05,
+      "loss": 6.7187,
+      "step": 1500
+    },
+    {
+      "epoch": 0.3018867924528302,
+      "grad_norm": 0.021787254139780998,
+      "learning_rate": 1.7989684237010946e-05,
+      "loss": 6.6952,
+      "step": 1600
+    },
+    {
+      "epoch": 0.32075471698113206,
+      "grad_norm": 0.33325955271720886,
+      "learning_rate": 1.786388224933954e-05,
+      "loss": 6.8646,
+      "step": 1700
+    },
+    {
+      "epoch": 0.33962264150943394,
+      "grad_norm": 0.3679867088794708,
+      "learning_rate": 1.7738080261668136e-05,
+      "loss": 6.693,
+      "step": 1800
+    },
+    {
+      "epoch": 0.3584905660377358,
+      "grad_norm": 0.45209288597106934,
+      "learning_rate": 1.761227827399673e-05,
+      "loss": 6.8112,
+      "step": 1900
+    },
+    {
+      "epoch": 0.37735849056603776,
+      "grad_norm": 0.46227267384529114,
+      "learning_rate": 1.7486476286325326e-05,
+      "loss": 6.7437,
+      "step": 2000
+    },
+    {
+      "epoch": 0.39622641509433965,
+      "grad_norm": 0.7230538725852966,
+      "learning_rate": 1.736067429865392e-05,
+      "loss": 6.7605,
+      "step": 2100
+    },
+    {
+      "epoch": 0.41509433962264153,
+      "grad_norm": 0.7919989228248596,
+      "learning_rate": 1.7234872310982516e-05,
+      "loss": 6.7826,
+      "step": 2200
+    },
+    {
+      "epoch": 0.4339622641509434,
+      "grad_norm": 0.6324593424797058,
+      "learning_rate": 1.710907032331111e-05,
+      "loss": 6.7607,
+      "step": 2300
+    },
+    {
+      "epoch": 0.4528301886792453,
+      "grad_norm": 0.4530716836452484,
+      "learning_rate": 1.6983268335639705e-05,
+      "loss": 6.6475,
+      "step": 2400
+    },
+    {
+      "epoch": 0.4716981132075472,
+      "grad_norm": 0.6669319868087769,
+      "learning_rate": 1.68574663479683e-05,
+      "loss": 6.6457,
+      "step": 2500
+    },
+    {
+      "epoch": 0.49056603773584906,
+      "grad_norm": 0.5140043497085571,
+      "learning_rate": 1.673166436029689e-05,
+      "loss": 6.8527,
+      "step": 2600
+    },
+    {
+      "epoch": 0.5094339622641509,
+      "grad_norm": 0.01416013389825821,
+      "learning_rate": 1.660586237262549e-05,
+      "loss": 6.7319,
+      "step": 2700
+    },
+    {
+      "epoch": 0.5283018867924528,
+      "grad_norm": 0.012981708161532879,
+      "learning_rate": 1.6480060384954085e-05,
+      "loss": 6.8672,
+      "step": 2800
+    },
+    {
+      "epoch": 0.5471698113207547,
+      "grad_norm": 0.4480224549770355,
+      "learning_rate": 1.635425839728268e-05,
+      "loss": 6.6993,
+      "step": 2900
+    },
+    {
+      "epoch": 0.5660377358490566,
+      "grad_norm": 0.6981659531593323,
+      "learning_rate": 1.6228456409611274e-05,
+      "loss": 6.7323,
+      "step": 3000
+    },
+    {
+      "epoch": 0.5849056603773585,
+      "grad_norm": 0.8297523260116577,
+      "learning_rate": 1.6102654421939866e-05,
+      "loss": 6.7374,
+      "step": 3100
+    },
+    {
+      "epoch": 0.6037735849056604,
+      "grad_norm": 0.39988359808921814,
+      "learning_rate": 1.597685243426846e-05,
+      "loss": 6.788,
+      "step": 3200
+    },
+    {
+      "epoch": 0.6226415094339622,
+      "grad_norm": 0.7337197065353394,
+      "learning_rate": 1.585105044659706e-05,
+      "loss": 6.6144,
+      "step": 3300
+    },
+    {
+      "epoch": 0.6415094339622641,
+      "grad_norm": 0.7628914713859558,
+      "learning_rate": 1.5725248458925654e-05,
+      "loss": 6.712,
+      "step": 3400
+    },
+    {
+      "epoch": 0.660377358490566,
+      "grad_norm": 0.8378991484642029,
+      "learning_rate": 1.559944647125425e-05,
+      "loss": 6.7784,
+      "step": 3500
+    },
+    {
+      "epoch": 0.6792452830188679,
+      "grad_norm": 0.9686161279678345,
+      "learning_rate": 1.547364448358284e-05,
+      "loss": 6.7574,
+      "step": 3600
+    },
+    {
+      "epoch": 0.6981132075471698,
+      "grad_norm": 0.013421991840004921,
+      "learning_rate": 1.5347842495911435e-05,
+      "loss": 6.7419,
+      "step": 3700
+    },
+    {
+      "epoch": 0.7169811320754716,
+      "grad_norm": 0.4076175391674042,
+      "learning_rate": 1.5222040508240033e-05,
+      "loss": 6.6656,
+      "step": 3800
+    },
+    {
+      "epoch": 0.7358490566037735,
+      "grad_norm": 0.3251872956752777,
+      "learning_rate": 1.5096238520568626e-05,
+      "loss": 6.7172,
+      "step": 3900
+    },
+    {
+      "epoch": 0.7547169811320755,
+      "grad_norm": 0.9598690271377563,
+      "learning_rate": 1.4970436532897221e-05,
+      "loss": 6.5542,
+      "step": 4000
+    },
+    {
+      "epoch": 0.7735849056603774,
+      "grad_norm": 0.018446920439600945,
+      "learning_rate": 1.4844634545225816e-05,
+      "loss": 6.6315,
+      "step": 4100
+    },
+    {
+      "epoch": 0.7924528301886793,
+      "grad_norm": 0.6573294401168823,
+      "learning_rate": 1.471883255755441e-05,
+      "loss": 6.654,
+      "step": 4200
+    },
+    {
+      "epoch": 0.8113207547169812,
+      "grad_norm": 0.3887442946434021,
+      "learning_rate": 1.4593030569883004e-05,
+      "loss": 6.5997,
+      "step": 4300
+    },
+    {
+      "epoch": 0.8301886792452831,
+      "grad_norm": 0.8568657636642456,
+      "learning_rate": 1.44672285822116e-05,
+      "loss": 6.6663,
+      "step": 4400
+    },
+    {
+      "epoch": 0.8490566037735849,
+      "grad_norm": 0.5098298788070679,
+      "learning_rate": 1.4341426594540195e-05,
+      "loss": 6.6538,
+      "step": 4500
+    },
+    {
+      "epoch": 0.8679245283018868,
+      "grad_norm": 0.02426791377365589,
+      "learning_rate": 1.421562460686879e-05,
+      "loss": 6.7685,
+      "step": 4600
+    },
+    {
+      "epoch": 0.8867924528301887,
+      "grad_norm": 0.017061766237020493,
+      "learning_rate": 1.4089822619197383e-05,
+      "loss": 6.6943,
+      "step": 4700
+    },
+    {
+      "epoch": 0.9056603773584906,
+      "grad_norm": 0.021372614428400993,
+      "learning_rate": 1.3964020631525978e-05,
+      "loss": 6.7053,
+      "step": 4800
+    },
+    {
+      "epoch": 0.9245283018867925,
+      "grad_norm": 0.8628166913986206,
+      "learning_rate": 1.3838218643854575e-05,
+      "loss": 6.7014,
+      "step": 4900
+    },
+    {
+      "epoch": 0.9433962264150944,
+      "grad_norm": 0.37710800766944885,
+      "learning_rate": 1.371241665618317e-05,
+      "loss": 6.5818,
+      "step": 5000
+    },
+    {
+      "epoch": 0.9622641509433962,
+      "grad_norm": 0.021292181685566902,
+      "learning_rate": 1.3586614668511765e-05,
+      "loss": 6.7116,
+      "step": 5100
+    },
+    {
+      "epoch": 0.9811320754716981,
+      "grad_norm": 0.9271852970123291,
+      "learning_rate": 1.3460812680840358e-05,
+      "loss": 6.6276,
+      "step": 5200
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.8322843313217163,
+      "learning_rate": 1.3335010693168953e-05,
+      "loss": 6.6285,
+      "step": 5300
+    },
+    {
+      "epoch": 1.0188679245283019,
+      "grad_norm": 0.3267331123352051,
+      "learning_rate": 1.3209208705497547e-05,
+      "loss": 6.5646,
+      "step": 5400
+    },
+    {
+      "epoch": 1.0377358490566038,
+      "grad_norm": 0.760538637638092,
+      "learning_rate": 1.3083406717826144e-05,
+      "loss": 6.6934,
+      "step": 5500
+    },
+    {
+      "epoch": 1.0566037735849056,
+      "grad_norm": 1.0321497917175293,
+      "learning_rate": 1.2957604730154739e-05,
+      "loss": 6.4869,
+      "step": 5600
+    },
+    {
+      "epoch": 1.0754716981132075,
+      "grad_norm": 0.017552530393004417,
+      "learning_rate": 1.2831802742483332e-05,
+      "loss": 6.5972,
+      "step": 5700
+    },
+    {
+      "epoch": 1.0943396226415094,
+      "grad_norm": 0.9292933344841003,
+      "learning_rate": 1.2706000754811927e-05,
+      "loss": 6.6404,
+      "step": 5800
+    },
+    {
+      "epoch": 1.1132075471698113,
+      "grad_norm": 0.9518078565597534,
+      "learning_rate": 1.2580198767140522e-05,
+      "loss": 6.6773,
+      "step": 5900
+    },
+    {
+      "epoch": 1.1320754716981132,
+      "grad_norm": 0.6914264559745789,
+      "learning_rate": 1.2454396779469115e-05,
+      "loss": 6.6109,
+      "step": 6000
+    },
+    {
+      "epoch": 1.150943396226415,
+      "grad_norm": 1.0884290933609009,
+      "learning_rate": 1.2328594791797713e-05,
+      "loss": 6.6457,
+      "step": 6100
+    },
+    {
+      "epoch": 1.169811320754717,
+      "grad_norm": 0.7199152112007141,
+      "learning_rate": 1.2202792804126306e-05,
+      "loss": 6.6842,
+      "step": 6200
+    },
+    {
+      "epoch": 1.1886792452830188,
+      "grad_norm": 0.8702525496482849,
+      "learning_rate": 1.2076990816454901e-05,
+      "loss": 6.7451,
+      "step": 6300
+    },
+    {
+      "epoch": 1.2075471698113207,
+      "grad_norm": 0.012843250297009945,
+      "learning_rate": 1.1951188828783496e-05,
+      "loss": 6.7243,
+      "step": 6400
+    },
+    {
+      "epoch": 1.2264150943396226,
+      "grad_norm": 0.8592051863670349,
+      "learning_rate": 1.1825386841112089e-05,
+      "loss": 6.6312,
+      "step": 6500
+    },
+    {
+      "epoch": 1.2452830188679245,
+      "grad_norm": 0.6073086261749268,
+      "learning_rate": 1.1699584853440686e-05,
+      "loss": 6.7517,
+      "step": 6600
+    },
+    {
+      "epoch": 1.2641509433962264,
+      "grad_norm": 0.020668864250183105,
+      "learning_rate": 1.157378286576928e-05,
+      "loss": 6.7079,
+      "step": 6700
+    },
+    {
+      "epoch": 1.2830188679245282,
+      "grad_norm": 1.1033306121826172,
+      "learning_rate": 1.1447980878097875e-05,
+      "loss": 6.7079,
+      "step": 6800
+    },
+    {
+      "epoch": 1.3018867924528301,
+      "grad_norm": 1.2252494096755981,
+      "learning_rate": 1.132217889042647e-05,
+      "loss": 6.6514,
+      "step": 6900
+    },
+    {
+      "epoch": 1.320754716981132,
+      "grad_norm": 1.137609839439392,
+      "learning_rate": 1.1196376902755063e-05,
+      "loss": 6.7933,
+      "step": 7000
+    },
+    {
+      "epoch": 1.3396226415094339,
+      "grad_norm": 0.4724065959453583,
+      "learning_rate": 1.1070574915083658e-05,
+      "loss": 6.5575,
+      "step": 7100
+    },
+    {
+      "epoch": 1.3584905660377358,
+      "grad_norm": 0.5756193399429321,
+      "learning_rate": 1.0944772927412255e-05,
+      "loss": 6.618,
+      "step": 7200
+    },
+    {
+      "epoch": 1.3773584905660377,
+      "grad_norm": 0.015185345895588398,
+      "learning_rate": 1.081897093974085e-05,
+      "loss": 6.6726,
+      "step": 7300
+    },
+    {
+      "epoch": 1.3962264150943398,
+      "grad_norm": 1.1484003067016602,
+      "learning_rate": 1.0693168952069444e-05,
+      "loss": 6.5617,
+      "step": 7400
+    },
+    {
+      "epoch": 1.4150943396226414,
+      "grad_norm": 0.02675127424299717,
+      "learning_rate": 1.0567366964398038e-05,
+      "loss": 6.6635,
+      "step": 7500
+    },
+    {
+      "epoch": 1.4339622641509435,
+      "grad_norm": 0.821569561958313,
+      "learning_rate": 1.0441564976726632e-05,
+      "loss": 6.6775,
+      "step": 7600
+    },
+    {
+      "epoch": 1.4528301886792452,
+      "grad_norm": 1.14842689037323,
+      "learning_rate": 1.0315762989055227e-05,
+      "loss": 6.5903,
+      "step": 7700
+    },
+    {
+      "epoch": 1.4716981132075473,
+      "grad_norm": 0.9347752332687378,
+      "learning_rate": 1.0189961001383824e-05,
+      "loss": 6.6971,
+      "step": 7800
+    },
+    {
+      "epoch": 1.490566037735849,
+      "grad_norm": 1.080321192741394,
+      "learning_rate": 1.0064159013712419e-05,
+      "loss": 6.7747,
+      "step": 7900
+    },
+    {
+      "epoch": 1.509433962264151,
+      "grad_norm": 1.0815764665603638,
+      "learning_rate": 9.938357026041012e-06,
+      "loss": 6.7454,
+      "step": 8000
+    },
+    {
+      "epoch": 1.5283018867924527,
+      "grad_norm": 1.3317512273788452,
+      "learning_rate": 9.812555038369607e-06,
+      "loss": 6.6851,
+      "step": 8100
+    },
+    {
+      "epoch": 1.5471698113207548,
+      "grad_norm": 1.2034707069396973,
+      "learning_rate": 9.686753050698202e-06,
+      "loss": 6.6681,
+      "step": 8200
+    },
+    {
+      "epoch": 1.5660377358490565,
+      "grad_norm": 0.844953715801239,
+      "learning_rate": 9.560951063026796e-06,
+      "loss": 6.6648,
+      "step": 8300
+    },
+    {
+      "epoch": 1.5849056603773586,
+      "grad_norm": 1.811436414718628,
+      "learning_rate": 9.435149075355391e-06,
+      "loss": 6.7758,
+      "step": 8400
+    },
+    {
+      "epoch": 1.6037735849056602,
+      "grad_norm": 0.948888897895813,
+      "learning_rate": 9.309347087683986e-06,
+      "loss": 6.6531,
+      "step": 8500
+    },
+    {
+      "epoch": 1.6226415094339623,
+      "grad_norm": 0.024910159409046173,
+      "learning_rate": 9.183545100012581e-06,
+      "loss": 6.6952,
+      "step": 8600
+    },
+    {
+      "epoch": 1.641509433962264,
+      "grad_norm": 0.021278131753206253,
+      "learning_rate": 9.057743112341176e-06,
+      "loss": 6.5898,
+      "step": 8700
+    },
+    {
+      "epoch": 1.6603773584905661,
+      "grad_norm": 0.4377930462360382,
+      "learning_rate": 8.93194112466977e-06,
+      "loss": 6.7929,
+      "step": 8800
+    },
+    {
+      "epoch": 1.6792452830188678,
+      "grad_norm": 0.9045907855033875,
+      "learning_rate": 8.806139136998365e-06,
+      "loss": 6.7079,
+      "step": 8900
+    },
+    {
+      "epoch": 1.6981132075471699,
+      "grad_norm": 0.01687094010412693,
+      "learning_rate": 8.68033714932696e-06,
+      "loss": 6.6295,
+      "step": 9000
+    },
+    {
+      "epoch": 1.7169811320754715,
+      "grad_norm": 0.10603800415992737,
+      "learning_rate": 8.554535161655555e-06,
+      "loss": 6.6663,
+      "step": 9100
+    },
+    {
+      "epoch": 1.7358490566037736,
+      "grad_norm": 1.1065797805786133,
+      "learning_rate": 8.42873317398415e-06,
+      "loss": 6.5519,
+      "step": 9200
+    },
+    {
+      "epoch": 1.7547169811320755,
+      "grad_norm": 0.6226603388786316,
+      "learning_rate": 8.302931186312745e-06,
+      "loss": 6.679,
+      "step": 9300
+    },
+    {
+      "epoch": 1.7735849056603774,
+      "grad_norm": 0.6876096725463867,
+      "learning_rate": 8.17712919864134e-06,
+      "loss": 6.7494,
+      "step": 9400
+    },
+    {
+      "epoch": 1.7924528301886793,
+      "grad_norm": 0.017670048400759697,
+      "learning_rate": 8.051327210969933e-06,
+      "loss": 6.8879,
+      "step": 9500
+    },
+    {
+      "epoch": 1.8113207547169812,
+      "grad_norm": 0.861192524433136,
+      "learning_rate": 7.92552522329853e-06,
+      "loss": 6.5379,
+      "step": 9600
+    },
+    {
+      "epoch": 1.830188679245283,
+      "grad_norm": 1.216601848602295,
+      "learning_rate": 7.799723235627124e-06,
+      "loss": 6.6839,
+      "step": 9700
+    },
+    {
+      "epoch": 1.849056603773585,
+      "grad_norm": 0.017238155007362366,
+      "learning_rate": 7.673921247955717e-06,
+      "loss": 6.6944,
+      "step": 9800
+    },
+    {
+      "epoch": 1.8679245283018868,
+      "grad_norm": 1.4015023708343506,
+      "learning_rate": 7.548119260284313e-06,
+      "loss": 6.5428,
+      "step": 9900
+    },
+    {
+      "epoch": 1.8867924528301887,
+      "grad_norm": 0.01809878647327423,
+      "learning_rate": 7.422317272612908e-06,
+      "loss": 6.6884,
+      "step": 10000
+    },
+    {
+      "epoch": 1.9056603773584906,
+      "grad_norm": 1.3040437698364258,
+      "learning_rate": 7.296515284941502e-06,
+      "loss": 6.707,
+      "step": 10100
+    },
+    {
+      "epoch": 1.9245283018867925,
+      "grad_norm": 0.7161667943000793,
+      "learning_rate": 7.170713297270098e-06,
+      "loss": 6.7509,
+      "step": 10200
+    },
+    {
+      "epoch": 1.9433962264150944,
+      "grad_norm": 0.7398432493209839,
+      "learning_rate": 7.044911309598692e-06,
+      "loss": 6.7263,
+      "step": 10300
+    },
+    {
+      "epoch": 1.9622641509433962,
+      "grad_norm": 0.4890976548194885,
+      "learning_rate": 6.919109321927287e-06,
+      "loss": 6.6012,
+      "step": 10400
+    },
+    {
+      "epoch": 1.9811320754716981,
+      "grad_norm": 0.7056707143783569,
+      "learning_rate": 6.793307334255882e-06,
+      "loss": 6.8422,
+      "step": 10500
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.01583670824766159,
+      "learning_rate": 6.667505346584476e-06,
+      "loss": 6.7954,
+      "step": 10600
+    },
+    {
+      "epoch": 2.018867924528302,
+      "grad_norm": 0.019600138068199158,
+      "learning_rate": 6.541703358913072e-06,
+      "loss": 6.5555,
+      "step": 10700
+    },
+    {
+      "epoch": 2.0377358490566038,
+      "grad_norm": 1.0875599384307861,
+      "learning_rate": 6.415901371241666e-06,
+      "loss": 6.6792,
+      "step": 10800
+    },
+    {
+      "epoch": 2.056603773584906,
+      "grad_norm": 0.01668621227145195,
+      "learning_rate": 6.290099383570261e-06,
+      "loss": 6.6727,
+      "step": 10900
+    },
+    {
+      "epoch": 2.0754716981132075,
+      "grad_norm": 0.027622198686003685,
+      "learning_rate": 6.1642973958988565e-06,
+      "loss": 6.6928,
+      "step": 11000
+    },
+    {
+      "epoch": 2.0943396226415096,
+      "grad_norm": 1.4289435148239136,
+      "learning_rate": 6.0384954082274505e-06,
+      "loss": 6.7192,
+      "step": 11100
+    },
+    {
+      "epoch": 2.1132075471698113,
+      "grad_norm": 1.1510781049728394,
+      "learning_rate": 5.9126934205560445e-06,
+      "loss": 6.7552,
+      "step": 11200
+    },
+    {
+      "epoch": 2.1320754716981134,
+      "grad_norm": 1.010807991027832,
+      "learning_rate": 5.78689143288464e-06,
+      "loss": 6.5937,
+      "step": 11300
+    },
+    {
+      "epoch": 2.150943396226415,
+      "grad_norm": 1.253769874572754,
+      "learning_rate": 5.661089445213235e-06,
+      "loss": 6.6097,
+      "step": 11400
+    },
+    {
+      "epoch": 2.169811320754717,
+      "grad_norm": 0.6519715785980225,
+      "learning_rate": 5.535287457541829e-06,
+      "loss": 6.6471,
+      "step": 11500
+    },
+    {
+      "epoch": 2.188679245283019,
+      "grad_norm": 0.7796726822853088,
+      "learning_rate": 5.409485469870425e-06,
+      "loss": 6.5562,
+      "step": 11600
+    },
+    {
+      "epoch": 2.207547169811321,
+      "grad_norm": 0.030649475753307343,
+      "learning_rate": 5.283683482199019e-06,
+      "loss": 6.75,
+      "step": 11700
+    },
+    {
+      "epoch": 2.2264150943396226,
+      "grad_norm": 1.440807819366455,
+      "learning_rate": 5.157881494527614e-06,
+      "loss": 6.7018,
+      "step": 11800
+    },
+    {
+      "epoch": 2.2452830188679247,
+      "grad_norm": 0.01666397601366043,
+      "learning_rate": 5.032079506856209e-06,
+      "loss": 6.6883,
+      "step": 11900
+    },
+    {
+      "epoch": 2.2641509433962264,
+      "grad_norm": 1.349582314491272,
+      "learning_rate": 4.906277519184803e-06,
+      "loss": 6.6214,
+      "step": 12000
+    },
+    {
+      "epoch": 2.2830188679245285,
+      "grad_norm": 1.6022156476974487,
+      "learning_rate": 4.780475531513398e-06,
+      "loss": 6.5044,
+      "step": 12100
+    },
+    {
+      "epoch": 2.30188679245283,
+      "grad_norm": 0.7667035460472107,
+      "learning_rate": 4.654673543841993e-06,
+      "loss": 6.5574,
+      "step": 12200
+    },
+    {
+      "epoch": 2.3207547169811322,
+      "grad_norm": 0.15926729142665863,
+      "learning_rate": 4.528871556170588e-06,
+      "loss": 6.5422,
+      "step": 12300
+    },
+    {
+      "epoch": 2.339622641509434,
+      "grad_norm": 0.7738819718360901,
+      "learning_rate": 4.403069568499183e-06,
+      "loss": 6.6868,
+      "step": 12400
+    },
+    {
+      "epoch": 2.358490566037736,
+      "grad_norm": 1.2127015590667725,
+      "learning_rate": 4.277267580827778e-06,
+      "loss": 6.57,
+      "step": 12500
+    },
+    {
+      "epoch": 2.3773584905660377,
+      "grad_norm": 1.2758924961090088,
+      "learning_rate": 4.1514655931563724e-06,
+      "loss": 6.5772,
+      "step": 12600
+    },
+    {
+      "epoch": 2.3962264150943398,
+      "grad_norm": 0.5068910121917725,
+      "learning_rate": 4.0256636054849664e-06,
+      "loss": 6.6545,
+      "step": 12700
+    },
+    {
+      "epoch": 2.4150943396226414,
+      "grad_norm": 1.9382731914520264,
+      "learning_rate": 3.899861617813562e-06,
+      "loss": 6.8134,
+      "step": 12800
+    },
+    {
+      "epoch": 2.4339622641509435,
+      "grad_norm": 1.1461691856384277,
+      "learning_rate": 3.7740596301421566e-06,
+      "loss": 6.714,
+      "step": 12900
+    },
+    {
+      "epoch": 2.452830188679245,
+      "grad_norm": 0.01691712997853756,
+      "learning_rate": 3.648257642470751e-06,
+      "loss": 6.6254,
+      "step": 13000
+    },
+    {
+      "epoch": 2.4716981132075473,
+      "grad_norm": 1.160014271736145,
+      "learning_rate": 3.522455654799346e-06,
+      "loss": 6.7595,
+      "step": 13100
+    },
+    {
+      "epoch": 2.490566037735849,
+      "grad_norm": 0.01536568719893694,
+      "learning_rate": 3.396653667127941e-06,
+      "loss": 6.7556,
+      "step": 13200
+    },
+    {
+      "epoch": 2.509433962264151,
+      "grad_norm": 1.5934854745864868,
+      "learning_rate": 3.270851679456536e-06,
+      "loss": 6.7787,
+      "step": 13300
+    },
+    {
+      "epoch": 2.5283018867924527,
+      "grad_norm": 1.256834626197815,
+      "learning_rate": 3.1450496917851304e-06,
+      "loss": 6.682,
+      "step": 13400
+    },
+    {
+      "epoch": 2.547169811320755,
+      "grad_norm": 0.19352222979068756,
+      "learning_rate": 3.0192477041137253e-06,
+      "loss": 6.62,
+      "step": 13500
+    },
+    {
+      "epoch": 2.5660377358490565,
+      "grad_norm": 1.6496492624282837,
+      "learning_rate": 2.89344571644232e-06,
+      "loss": 6.6104,
+      "step": 13600
+    },
+    {
+      "epoch": 2.5849056603773586,
+      "grad_norm": 0.8581194877624512,
+      "learning_rate": 2.7676437287709145e-06,
+      "loss": 6.6019,
+      "step": 13700
+    },
+    {
+      "epoch": 2.6037735849056602,
+      "grad_norm": 1.436110019683838,
+      "learning_rate": 2.6418417410995094e-06,
+      "loss": 6.6612,
+      "step": 13800
+    },
+    {
+      "epoch": 2.6226415094339623,
+      "grad_norm": 1.5139416456222534,
+      "learning_rate": 2.5160397534281047e-06,
+      "loss": 6.6926,
+      "step": 13900
+    },
+    {
+      "epoch": 2.641509433962264,
+      "grad_norm": 1.4659171104431152,
+      "learning_rate": 2.390237765756699e-06,
+      "loss": 6.6786,
+      "step": 14000
+    },
+    {
+      "epoch": 2.660377358490566,
+      "grad_norm": 0.016153251752257347,
+      "learning_rate": 2.264435778085294e-06,
+      "loss": 6.8056,
+      "step": 14100
+    },
+    {
+      "epoch": 2.6792452830188678,
+      "grad_norm": 1.0579724311828613,
+      "learning_rate": 2.138633790413889e-06,
+      "loss": 6.6101,
+      "step": 14200
+    },
+    {
+      "epoch": 2.69811320754717,
+      "grad_norm": 0.0181367639452219,
+      "learning_rate": 2.0128318027424832e-06,
+      "loss": 6.633,
+      "step": 14300
+    },
+    {
+      "epoch": 2.7169811320754715,
+      "grad_norm": 0.01983201876282692,
+      "learning_rate": 1.8870298150710783e-06,
+      "loss": 6.7544,
+      "step": 14400
+    },
+    {
+      "epoch": 2.7358490566037736,
+      "grad_norm": 1.0050116777420044,
+      "learning_rate": 1.761227827399673e-06,
+      "loss": 6.6989,
+      "step": 14500
+    },
+    {
+      "epoch": 2.7547169811320753,
+      "grad_norm": 0.9864720702171326,
+      "learning_rate": 1.635425839728268e-06,
+      "loss": 6.8561,
+      "step": 14600
+    },
+    {
+      "epoch": 2.7735849056603774,
+      "grad_norm": 0.5503388047218323,
+      "learning_rate": 1.5096238520568626e-06,
+      "loss": 6.5487,
+      "step": 14700
+    },
+    {
+      "epoch": 2.7924528301886795,
+      "grad_norm": 1.6037436723709106,
+      "learning_rate": 1.3838218643854573e-06,
+      "loss": 6.6174,
+      "step": 14800
+    },
+    {
+      "epoch": 2.811320754716981,
+      "grad_norm": 0.021289100870490074,
+      "learning_rate": 1.2580198767140523e-06,
+      "loss": 6.6794,
+      "step": 14900
+    },
+    {
+      "epoch": 2.830188679245283,
+      "grad_norm": 0.016215715557336807,
+      "learning_rate": 1.132217889042647e-06,
+      "loss": 6.7682,
+      "step": 15000
+    },
+    {
+      "epoch": 2.849056603773585,
+      "grad_norm": 1.8671302795410156,
+      "learning_rate": 1.0064159013712416e-06,
+      "loss": 6.6019,
+      "step": 15100
+    },
+    {
+      "epoch": 2.867924528301887,
+      "grad_norm": 1.1597052812576294,
+      "learning_rate": 8.806139136998365e-07,
+      "loss": 6.6485,
+      "step": 15200
+    },
+    {
+      "epoch": 2.8867924528301887,
+      "grad_norm": 1.716347098350525,
+      "learning_rate": 7.548119260284313e-07,
+      "loss": 6.6007,
+      "step": 15300
+    },
+    {
+      "epoch": 2.9056603773584904,
+      "grad_norm": 0.8515202403068542,
+      "learning_rate": 6.290099383570262e-07,
+      "loss": 6.5699,
+      "step": 15400
+    },
+    {
+      "epoch": 2.9245283018867925,
+      "grad_norm": 1.5821613073349,
+      "learning_rate": 5.032079506856208e-07,
+      "loss": 6.5612,
+      "step": 15500
+    },
+    {
+      "epoch": 2.9433962264150946,
+      "grad_norm": 1.6181998252868652,
+      "learning_rate": 3.7740596301421566e-07,
+      "loss": 6.764,
+      "step": 15600
+    },
+    {
+      "epoch": 2.9622641509433962,
+      "grad_norm": 1.1383754014968872,
+      "learning_rate": 2.516039753428104e-07,
+      "loss": 6.6389,
+      "step": 15700
+    },
+    {
+      "epoch": 2.981132075471698,
+      "grad_norm": 0.02065679244697094,
+      "learning_rate": 1.258019876714052e-07,
+      "loss": 6.5413,
+      "step": 15800
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 1.228332757949829,
+      "learning_rate": 0.0,
+      "loss": 6.6513,
+      "step": 15900
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 15900,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.794462703493448e+17,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:484896cca9144ad839cf82aa9c38108bc7222986638fa502115d48e04d1f758d
+size 5304