diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..0aa05647e6c4345f2f71e53a1b7fb0683512b4b5 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,29 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_0.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_1.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_10.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_11.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_16.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_19.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_2.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_20.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_22.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_23.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_5.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_8.png filter=lfs diff=lfs merge=lfs -text
+eval/cpu/inference_grid_9.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_0.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_1.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_10.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_11.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_16.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_19.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_2.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_20.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_22.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_23.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_5.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_8.png filter=lfs diff=lfs merge=lfs -text
+eval/gpu/inference_grid_9.png filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c66352ce1383592cbb3825e78bdef8d89732f3c8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,354 @@
+---
+license: apache-2.0
+base_model:
+- microsoft/conditional-detr-resnet-50
+pipeline_tag: object-detection
+datasets:
+- tech4humans/signature-detection
+metrics:
+- f1
+- precision
+- recall
+library_name: transformers
+inference: false
+tags:
+- object-detection
+- signature-detection
+- detr
+- conditional-detr
+- pytorch
+model-index:
+- name: tech4humans/conditional-detr-50-signature-detector
+ results:
+ - task:
+ type: object-detection
+ dataset:
+ type: tech4humans/signature-detection
+ name: tech4humans/signature-detection
+ split: test
+ metrics:
+ - type: precision
+ value: 0.936524
+ name: mAP@0.5
+ - type: precision
+ value: 0.653321
+ name: mAP@0.5:0.95
+---
+
+# **Conditional-DETR ResNet-50 - Handwritten Signature Detection**
+
+This repository presents a Conditional-DETR model with ResNet-50 backbone, fine-tuned to detect handwritten signatures in document images. This model achieved the **highest mAP@0.5 (93.65%)** among all tested architectures in our comprehensive evaluation.
+
+| Resource | Links / Badges | Details |
+|---------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| **Article** | [](https://huggingface.co/blog/samuellimabraz/signature-detection-model) | A detailed community article covering the full development process of the project |
+| **Model Files (YOLOv8s)** | [](https://huggingface.co/tech4humans/yolov8s-signature-detector) | **Available formats:** [](https://pytorch.org/) [](https://onnx.ai/) [](https://developer.nvidia.com/tensorrt) |
+| **Dataset – Original** | [](https://universe.roboflow.com/tech-ysdkk/signature-detection-hlx8j) | 2,819 document images annotated with signature coordinates |
+| **Dataset – Processed** | [](https://huggingface.co/datasets/tech4humans/signature-detection) | Augmented and pre-processed version (640px) for model training |
+| **Notebooks – Model Experiments** | [](https://colab.research.google.com/drive/1wSySw_zwyuv6XSaGmkngI4dwbj-hR4ix) [](https://api.wandb.ai/links/samuel-lima-tech4humans/30cmrkp8) | Complete training and evaluation pipeline with selection among different architectures (yolo, detr, rt-detr, conditional-detr, yolos) |
+| **Notebooks – HP Tuning** | [](https://colab.research.google.com/drive/1wSySw_zwyuv6XSaGmkngI4dwbj-hR4ix) [](https://api.wandb.ai/links/samuel-lima-tech4humans/31a6zhb1) | Optuna trials for optimizing the precision/recall balance |
+| **Inference Server** | [](https://github.com/tech4ai/t4ai-signature-detect-server) | Complete deployment and inference pipeline with Triton Inference Server
[](https://docs.openvino.ai/2025/index.html) [](https://www.docker.com/) [](https://developer.nvidia.com/triton-inference-server) |
+| **Live Demo** | [](https://huggingface.co/spaces/tech4humans/signature-detection) | Graphical interface with real-time inference
[](https://www.gradio.app/) [](https://plotly.com/python/) |
+
+---
+
+---
+
+## **Dataset**
+
+
+The training utilized a dataset built from two public datasets: [Tobacco800](https://paperswithcode.com/dataset/tobacco-800) and [signatures-xc8up](https://universe.roboflow.com/roboflow-100/signatures-xc8up), unified and processed in [Roboflow](https://roboflow.com/).
+
+**Dataset Summary:**
+- Training: 1,980 images (70%)
+- Validation: 420 images (15%)
+- Testing: 419 images (15%)
+- Format: COCO JSON
+- Resolution: 640x640 pixels
+
+
+
+---
+
+## **Training Process**
+
+The training process involved the following steps:
+
+### 1. **Model Selection:**
+
+Various object detection models were evaluated to identify the best balance between precision, recall, and inference time.
+
+
+| **Metric** | [rtdetr-l](https://github.com/ultralytics/assets/releases/download/v8.2.0/rtdetr-l.pt) | [yolos-base](https://huggingface.co/hustvl/yolos-base) | [yolos-tiny](https://huggingface.co/hustvl/yolos-tiny) | [conditional-detr-resnet-50](https://huggingface.co/microsoft/conditional-detr-resnet-50) | [detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50) | [yolov8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x.pt) | [yolov8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l.pt) | [yolov8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt) | [yolov8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt) | [yolov8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt) | [yolo11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | [yolo11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | [yolo11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | [yolo11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | [yolo11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | [yolov10x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10x.pt) | [yolov10l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10l.pt) | [yolov10b](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10b.pt) | [yolov10m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10m.pt) | [yolov10s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10s.pt) | [yolov10n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10n.pt) |
+|:---------------------|---------:|-----------:|-----------:|---------------------------:|---------------:|--------:|--------:|--------:|--------:|--------:|--------:|--------:|--------:|--------:|--------:|---------:|---------:|---------:|---------:|---------:|---------:|
+| **Inference Time - CPU (ms)** | 583.608 | 1706.49 | 265.346 | 476.831 | 425.649 | 1259.47 | 871.329 | 401.183 | 216.6 | 110.442 | 1016.68 | 518.147 | 381.652 | 179.792 | 106.656 | 821.183 | 580.767 | 473.109 | 320.12 | 150.076 | **73.8596** |
+| **mAP50** | 0.92709 | 0.901154 | 0.869814 | **0.936524** | 0.88885 | 0.794237| 0.800312| 0.875322| 0.874721| 0.816089| 0.667074| 0.707409| 0.809557| 0.835605| 0.813799| 0.681023| 0.726802| 0.789835| 0.787688| 0.663877| 0.734332 |
+| **mAP50-95** | 0.622364 | 0.583569 | 0.469064 | 0.653321 | 0.579428 | 0.552919| 0.593976| **0.665495**| 0.65457 | 0.623963| 0.482289| 0.499126| 0.600797| 0.638849| 0.617496| 0.474535| 0.522654| 0.578874| 0.581259| 0.473857| 0.552704 |
+
+
+
+
+#### Highlights:
+- **Best mAP50:** `conditional-detr-resnet-50` (**0.936524**)
+- **Best mAP50-95:** `yolov8m` (**0.665495**)
+- **Fastest Inference Time:** `yolov10n` (**73.8596 ms**)
+
+Detailed experiments are available on [**Weights & Biases**](https://api.wandb.ai/links/samuel-lima-tech4humans/30cmrkp8).
+
+### 2. **Hyperparameter Tuning:**
+
+The YOLOv8s model, which demonstrated a good balance of inference time, precision, and recall, was selected for hyperparameter tuning.
+
+[Optuna](https://optuna.org/) was used for 20 optimization trials.
+The hyperparameter tuning used the following parameter configuration:
+
+```python
+ dropout = trial.suggest_float("dropout", 0.0, 0.5, step=0.1)
+ lr0 = trial.suggest_float("lr0", 1e-5, 1e-1, log=True)
+ box = trial.suggest_float("box", 3.0, 7.0, step=1.0)
+ cls = trial.suggest_float("cls", 0.5, 1.5, step=0.2)
+ opt = trial.suggest_categorical("optimizer", ["AdamW", "RMSProp"])
+```
+Results can be visualized here: [**Hypertuning Experiment**](https://api.wandb.ai/links/samuel-lima-tech4humans/31a6zhb1).
+
+
+
+### 3. **Evaluation:**
+
+The models were evaluated on the test set at the end of training in ONNX (CPU) and TensorRT (GPU - T4) formats. Performance metrics included precision, recall, mAP50, and mAP50-95.
+
+
+
+#### Results Comparison:
+
+| Metric | Base Model | Best Trial (#10) | Difference |
+|------------|------------|-------------------|-------------|
+| mAP50 | 87.47% | **95.75%** | +8.28% |
+| mAP50-95 | 65.46% | **66.26%** | +0.81% |
+| Precision | **97.23%** | 95.61% | -1.63% |
+| Recall | 76.16% | **91.21%** | +15.05% |
+| F1-score | 85.42% | **93.36%** | +7.94% |
+
+---
+
+## **Results**
+
+After hyperparameter tuning of the YOLOv8s model, the best model achieved the following results on the test set:
+
+- **Precision:** 94.74%
+- **Recall:** 89.72%
+- **mAP@50:** 94.50%
+- **mAP@50-95:** 67.35%
+- **Inference Time:**
+ - **ONNX Runtime (CPU):** 171.56 ms
+ - **TensorRT (GPU - T4):** 7.657 ms
+
+---
+
+## **How to Use**
+
+### **Installation**
+
+```bash
+pip install transformers torch torchvision pillow
+```
+
+### **Inference**
+
+```python
+from transformers import AutoImageProcessor, AutoModelForObjectDetection
+from PIL import Image
+import torch
+
+# Load model and processor
+model_name = "tech4humans/conditional-detr-50-signature-detector"
+processor = AutoImageProcessor.from_pretrained(model_name)
+model = AutoModelForObjectDetection.from_pretrained(model_name)
+
+# Load and process image
+image = Image.open("path/to/your/document.jpg")
+inputs = processor(images=image, return_tensors="pt")
+
+# Run inference
+with torch.no_grad():
+ outputs = model(**inputs)
+
+# Post-process results
+target_sizes = torch.tensor([image.size[::-1]])
+results = processor.post_process_object_detection(
+ outputs, target_sizes=target_sizes, threshold=0.5
+)[0]
+
+# Extract detections
+for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+ box = [round(i, 2) for i in box.tolist()]
+ print(f"Detected signature with confidence {round(score.item(), 3)} at location {box}")
+```
+
+### **Visualization**
+
+```python
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+from PIL import Image
+
+def visualize_predictions(image_path, results, threshold=0.5):
+ image = Image.open(image_path)
+ fig, ax = plt.subplots(1, figsize=(12, 9))
+ ax.imshow(image)
+
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+ if score > threshold:
+ x, y, x2, y2 = box.tolist()
+ width, height = x2 - x, y2 - y
+
+ rect = patches.Rectangle(
+ (x, y), width, height,
+ linewidth=2, edgecolor='red', facecolor='none'
+ )
+ ax.add_patch(rect)
+ ax.text(x, y-10, f'Signature: {score:.3f}',
+ bbox=dict(boxstyle="round,pad=0.3", facecolor="yellow", alpha=0.7))
+
+ ax.set_title("Signature Detection Results")
+ plt.axis('off')
+ plt.show()
+
+# Use the visualization
+visualize_predictions("path/to/your/document.jpg", results)
+```
+
+---
+
+## **Demo**
+
+You can explore the model and test real-time inference in the Hugging Face Spaces demo, built with Gradio and ONNXRuntime.
+
+[](https://huggingface.co/spaces/tech4humans/signature-detection)
+
+---
+
+## 🔗 **Inference with Triton Server**
+
+If you want to deploy this signature detection model in a production environment, check out our inference server repository based on the NVIDIA Triton Inference Server.
+
+
+
+
+
+ |
+
+
+ |
+
+
+---
+
+## **Infrastructure**
+
+### Software
+
+The model was trained and tuned using a Jupyter Notebook environment.
+
+- **Operating System:** Ubuntu 22.04
+- **Python:** 3.10.12
+- **PyTorch:** 2.5.1+cu121
+- **Ultralytics:** 8.3.58
+- **Roboflow:** 1.1.50
+- **Optuna:** 4.1.0
+- **ONNX Runtime:** 1.20.1
+- **TensorRT:** 10.7.0
+
+### Hardware
+
+Training was performed on a Google Cloud Platform n1-standard-8 instance with the following specifications:
+
+- **CPU:** 8 vCPUs
+- **GPU:** NVIDIA Tesla T4
+
+---
+
+## **License**
+
+### Model Weights, Code and Training Materials – **Apache 2.0**
+- **License:** Apache License 2.0
+- **Usage:** All training scripts, deployment code, and usage instructions are licensed under the Apache 2.0 license.
+
+---
+
+## **Citation**
+
+If you use this model in your research, please cite:
+
+```bibtex
+@misc{lima2024conditional-detr-signature-detection,
+ title={Conditional-DETR for Handwritten Signature Detection},
+ author={Lima, Samuel and Tech4Humans Team},
+ year={2024},
+ publisher={Hugging Face},
+ url={https://huggingface.co/tech4humans/conditional-detr-50-signature-detector}
+}
+```
+
+---
+
+## **Contact and Information**
+
+For further information, questions, or contributions, contact us at **iag@tech4h.com.br**.
+
+
+
+## **Author**
+
+
+
+
+
+
+
+ Samuel Lima
+
+ AI Research Engineer
+
+
+
+
+
+ |
+
+ Responsibilities in this Project
+
+ - 🔬 Model development and training
+ - 📊 Dataset analysis and processing
+ - ⚙️ Architecture selection and performance evaluation
+ - 📝 Technical documentation and model card
+
+ |
+
+
+
+
+---
+
+
diff --git a/best_checkpoint/config.json b/best_checkpoint/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5f75103d2a8582229fe2ced88551ad02cbb27e1c
--- /dev/null
+++ b/best_checkpoint/config.json
@@ -0,0 +1,61 @@
+{
+ "_name_or_path": "microsoft/conditional-detr-resnet-50",
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "architectures": [
+ "ConditionalDetrForObjectDetection"
+ ],
+ "attention_dropout": 0.0,
+ "auxiliary_loss": false,
+ "backbone": "resnet50",
+ "backbone_config": null,
+ "backbone_kwargs": {
+ "in_chans": 3,
+ "out_indices": [
+ 1,
+ 2,
+ 3,
+ 4
+ ]
+ },
+ "bbox_cost": 5,
+ "bbox_loss_coefficient": 5,
+ "class_cost": 2,
+ "cls_loss_coefficient": 2,
+ "d_model": 256,
+ "decoder_attention_heads": 8,
+ "decoder_ffn_dim": 2048,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 6,
+ "dice_loss_coefficient": 1,
+ "dilation": false,
+ "dropout": 0.1,
+ "encoder_attention_heads": 8,
+ "encoder_ffn_dim": 2048,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 6,
+ "focal_alpha": 0.25,
+ "giou_cost": 2,
+ "giou_loss_coefficient": 2,
+ "id2label": {
+ "0": "signature"
+ },
+ "init_std": 0.02,
+ "init_xavier_std": 1.0,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "signature": 0
+ },
+ "mask_loss_coefficient": 1,
+ "max_position_embeddings": 1024,
+ "model_type": "conditional_detr",
+ "num_channels": 3,
+ "num_hidden_layers": 6,
+ "num_queries": 300,
+ "position_embedding_type": "sine",
+ "scale_embedding": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.46.3",
+ "use_pretrained_backbone": true,
+ "use_timm_backbone": true
+}
diff --git a/best_checkpoint/model.safetensors b/best_checkpoint/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..623115fef6b4192304696ca5a8166762ae27d387
--- /dev/null
+++ b/best_checkpoint/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b804b3797a81dbaa7f803c93ddff884acb321b10f3ad2520861b378e72cb3ef
+size 174075684
diff --git a/best_checkpoint/optimizer.pt b/best_checkpoint/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6e449fa83e8cc884ca80e57658d9f08e3a2c1e
--- /dev/null
+++ b/best_checkpoint/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60667f62d23d0156209d0db0cd48fc1bf1aaaabf2f564a2cf22aa304543eecd0
+size 345689625
diff --git a/best_checkpoint/preprocessor_config.json b/best_checkpoint/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8fa4bd2a1e0250a62cdd19c0597f2100eec51d15
--- /dev/null
+++ b/best_checkpoint/preprocessor_config.json
@@ -0,0 +1,26 @@
+{
+ "do_convert_annotations": true,
+ "do_normalize": true,
+ "do_pad": true,
+ "do_rescale": true,
+ "do_resize": true,
+ "format": "coco_detection",
+ "image_mean": [
+ 0.485,
+ 0.456,
+ 0.406
+ ],
+ "image_processor_type": "ConditionalDetrImageProcessor",
+ "image_std": [
+ 0.229,
+ 0.224,
+ 0.225
+ ],
+ "pad_size": null,
+ "resample": 2,
+ "rescale_factor": 0.00392156862745098,
+ "size": {
+ "height": 640,
+ "width": 640
+ }
+}
diff --git a/best_checkpoint/rng_state.pth b/best_checkpoint/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2a0e58d3ec4cda441b7ad0baa595c06d8bc4ccc5
--- /dev/null
+++ b/best_checkpoint/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:672f61b91e1dc0ec0cfc7cc6bea9c0630fa1b53fe3a606869eead6061469864c
+size 14244
diff --git a/best_checkpoint/scheduler.pt b/best_checkpoint/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..91019efb8414ccac74569365310475adec2ab102
--- /dev/null
+++ b/best_checkpoint/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73201c99891272e8d20ef63730f93b9b956d012d7aefe414a361a43f9b574909
+size 1064
diff --git a/best_checkpoint/trainer_state.json b/best_checkpoint/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..389daeebfecd1109bea96701ecf998b5a0fc3402
--- /dev/null
+++ b/best_checkpoint/trainer_state.json
@@ -0,0 +1,62442 @@
+{
+ "best_metric": 0.9409,
+ "best_model_checkpoint": "/content/gcs/iag-training/models/image/signature-detection/detr/conditional-detr-resnet-50/checkpoint-6300",
+ "epoch": 50.0,
+ "eval_steps": 500,
+ "global_step": 8750,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.005714285714285714,
+ "grad_norm": 860.2239990234375,
+ "learning_rate": 5.714285714285715e-08,
+ "loss": 48.7852,
+ "step": 1
+ },
+ {
+ "epoch": 0.011428571428571429,
+ "grad_norm": 1068.1214599609375,
+ "learning_rate": 1.142857142857143e-07,
+ "loss": 59.7628,
+ "step": 2
+ },
+ {
+ "epoch": 0.017142857142857144,
+ "grad_norm": 629.9717407226562,
+ "learning_rate": 1.7142857142857143e-07,
+ "loss": 35.0063,
+ "step": 3
+ },
+ {
+ "epoch": 0.022857142857142857,
+ "grad_norm": 969.9238891601562,
+ "learning_rate": 2.285714285714286e-07,
+ "loss": 55.0153,
+ "step": 4
+ },
+ {
+ "epoch": 0.02857142857142857,
+ "grad_norm": 976.5315551757812,
+ "learning_rate": 2.8571428571428575e-07,
+ "loss": 54.7871,
+ "step": 5
+ },
+ {
+ "epoch": 0.03428571428571429,
+ "grad_norm": 848.9593505859375,
+ "learning_rate": 3.4285714285714286e-07,
+ "loss": 48.7854,
+ "step": 6
+ },
+ {
+ "epoch": 0.04,
+ "grad_norm": 1419.771728515625,
+ "learning_rate": 4.0000000000000003e-07,
+ "loss": 78.9038,
+ "step": 7
+ },
+ {
+ "epoch": 0.045714285714285714,
+ "grad_norm": 863.019287109375,
+ "learning_rate": 4.571428571428572e-07,
+ "loss": 47.4685,
+ "step": 8
+ },
+ {
+ "epoch": 0.05142857142857143,
+ "grad_norm": 1193.3538818359375,
+ "learning_rate": 5.142857142857143e-07,
+ "loss": 67.3228,
+ "step": 9
+ },
+ {
+ "epoch": 0.05714285714285714,
+ "grad_norm": 939.5175170898438,
+ "learning_rate": 5.714285714285715e-07,
+ "loss": 53.061,
+ "step": 10
+ },
+ {
+ "epoch": 0.06285714285714286,
+ "grad_norm": 775.0874633789062,
+ "learning_rate": 6.285714285714287e-07,
+ "loss": 43.3721,
+ "step": 11
+ },
+ {
+ "epoch": 0.06857142857142857,
+ "grad_norm": 1446.0272216796875,
+ "learning_rate": 6.857142857142857e-07,
+ "loss": 80.2565,
+ "step": 12
+ },
+ {
+ "epoch": 0.07428571428571429,
+ "grad_norm": 1051.2706298828125,
+ "learning_rate": 7.428571428571429e-07,
+ "loss": 58.7238,
+ "step": 13
+ },
+ {
+ "epoch": 0.08,
+ "grad_norm": 925.4149780273438,
+ "learning_rate": 8.000000000000001e-07,
+ "loss": 52.5781,
+ "step": 14
+ },
+ {
+ "epoch": 0.08571428571428572,
+ "grad_norm": 1029.443115234375,
+ "learning_rate": 8.571428571428572e-07,
+ "loss": 57.8017,
+ "step": 15
+ },
+ {
+ "epoch": 0.09142857142857143,
+ "grad_norm": 568.0119018554688,
+ "learning_rate": 9.142857142857144e-07,
+ "loss": 32.4704,
+ "step": 16
+ },
+ {
+ "epoch": 0.09714285714285714,
+ "grad_norm": 1057.5506591796875,
+ "learning_rate": 9.714285714285715e-07,
+ "loss": 59.1879,
+ "step": 17
+ },
+ {
+ "epoch": 0.10285714285714286,
+ "grad_norm": 1026.0074462890625,
+ "learning_rate": 1.0285714285714286e-06,
+ "loss": 55.431,
+ "step": 18
+ },
+ {
+ "epoch": 0.10857142857142857,
+ "grad_norm": 1320.9364013671875,
+ "learning_rate": 1.0857142857142858e-06,
+ "loss": 73.9857,
+ "step": 19
+ },
+ {
+ "epoch": 0.11428571428571428,
+ "grad_norm": 1451.337646484375,
+ "learning_rate": 1.142857142857143e-06,
+ "loss": 64.1714,
+ "step": 20
+ },
+ {
+ "epoch": 0.12,
+ "grad_norm": 963.4343872070312,
+ "learning_rate": 1.2000000000000002e-06,
+ "loss": 54.32,
+ "step": 21
+ },
+ {
+ "epoch": 0.12571428571428572,
+ "grad_norm": 1317.2686767578125,
+ "learning_rate": 1.2571428571428573e-06,
+ "loss": 72.2047,
+ "step": 22
+ },
+ {
+ "epoch": 0.13142857142857142,
+ "grad_norm": 653.3263549804688,
+ "learning_rate": 1.3142857142857145e-06,
+ "loss": 36.5625,
+ "step": 23
+ },
+ {
+ "epoch": 0.13714285714285715,
+ "grad_norm": 1563.2149658203125,
+ "learning_rate": 1.3714285714285715e-06,
+ "loss": 86.5436,
+ "step": 24
+ },
+ {
+ "epoch": 0.14285714285714285,
+ "grad_norm": 910.78564453125,
+ "learning_rate": 1.4285714285714286e-06,
+ "loss": 46.524,
+ "step": 25
+ },
+ {
+ "epoch": 0.14857142857142858,
+ "grad_norm": 850.3976440429688,
+ "learning_rate": 1.4857142857142858e-06,
+ "loss": 45.7479,
+ "step": 26
+ },
+ {
+ "epoch": 0.15428571428571428,
+ "grad_norm": 874.3585815429688,
+ "learning_rate": 1.542857142857143e-06,
+ "loss": 47.2899,
+ "step": 27
+ },
+ {
+ "epoch": 0.16,
+ "grad_norm": 1307.404541015625,
+ "learning_rate": 1.6000000000000001e-06,
+ "loss": 69.136,
+ "step": 28
+ },
+ {
+ "epoch": 0.1657142857142857,
+ "grad_norm": 1136.086669921875,
+ "learning_rate": 1.657142857142857e-06,
+ "loss": 58.2475,
+ "step": 29
+ },
+ {
+ "epoch": 0.17142857142857143,
+ "grad_norm": 1073.1068115234375,
+ "learning_rate": 1.7142857142857145e-06,
+ "loss": 59.151,
+ "step": 30
+ },
+ {
+ "epoch": 0.17714285714285713,
+ "grad_norm": 1063.392822265625,
+ "learning_rate": 1.7714285714285714e-06,
+ "loss": 56.9664,
+ "step": 31
+ },
+ {
+ "epoch": 0.18285714285714286,
+ "grad_norm": 1098.790283203125,
+ "learning_rate": 1.8285714285714288e-06,
+ "loss": 55.6005,
+ "step": 32
+ },
+ {
+ "epoch": 0.18857142857142858,
+ "grad_norm": 1341.4580078125,
+ "learning_rate": 1.8857142857142858e-06,
+ "loss": 65.0587,
+ "step": 33
+ },
+ {
+ "epoch": 0.19428571428571428,
+ "grad_norm": 637.0850830078125,
+ "learning_rate": 1.942857142857143e-06,
+ "loss": 34.4329,
+ "step": 34
+ },
+ {
+ "epoch": 0.2,
+ "grad_norm": 838.4291381835938,
+ "learning_rate": 2.0000000000000003e-06,
+ "loss": 41.3785,
+ "step": 35
+ },
+ {
+ "epoch": 0.2057142857142857,
+ "grad_norm": 1155.2122802734375,
+ "learning_rate": 2.0571428571428573e-06,
+ "loss": 53.1862,
+ "step": 36
+ },
+ {
+ "epoch": 0.21142857142857144,
+ "grad_norm": 519.1825561523438,
+ "learning_rate": 2.1142857142857147e-06,
+ "loss": 27.9733,
+ "step": 37
+ },
+ {
+ "epoch": 0.21714285714285714,
+ "grad_norm": 1609.19482421875,
+ "learning_rate": 2.1714285714285716e-06,
+ "loss": 43.9349,
+ "step": 38
+ },
+ {
+ "epoch": 0.22285714285714286,
+ "grad_norm": 666.6470336914062,
+ "learning_rate": 2.228571428571429e-06,
+ "loss": 32.3454,
+ "step": 39
+ },
+ {
+ "epoch": 0.22857142857142856,
+ "grad_norm": 676.74267578125,
+ "learning_rate": 2.285714285714286e-06,
+ "loss": 32.8595,
+ "step": 40
+ },
+ {
+ "epoch": 0.2342857142857143,
+ "grad_norm": 883.8102416992188,
+ "learning_rate": 2.342857142857143e-06,
+ "loss": 43.0263,
+ "step": 41
+ },
+ {
+ "epoch": 0.24,
+ "grad_norm": 982.6002197265625,
+ "learning_rate": 2.4000000000000003e-06,
+ "loss": 46.6085,
+ "step": 42
+ },
+ {
+ "epoch": 0.24571428571428572,
+ "grad_norm": 697.6786499023438,
+ "learning_rate": 2.4571428571428573e-06,
+ "loss": 33.0379,
+ "step": 43
+ },
+ {
+ "epoch": 0.25142857142857145,
+ "grad_norm": 691.048828125,
+ "learning_rate": 2.5142857142857147e-06,
+ "loss": 33.1235,
+ "step": 44
+ },
+ {
+ "epoch": 0.2571428571428571,
+ "grad_norm": 841.7835693359375,
+ "learning_rate": 2.5714285714285716e-06,
+ "loss": 27.9442,
+ "step": 45
+ },
+ {
+ "epoch": 0.26285714285714284,
+ "grad_norm": 812.8052368164062,
+ "learning_rate": 2.628571428571429e-06,
+ "loss": 36.1373,
+ "step": 46
+ },
+ {
+ "epoch": 0.26857142857142857,
+ "grad_norm": 758.7867431640625,
+ "learning_rate": 2.685714285714286e-06,
+ "loss": 36.4701,
+ "step": 47
+ },
+ {
+ "epoch": 0.2742857142857143,
+ "grad_norm": 675.4127197265625,
+ "learning_rate": 2.742857142857143e-06,
+ "loss": 30.4991,
+ "step": 48
+ },
+ {
+ "epoch": 0.28,
+ "grad_norm": 858.139892578125,
+ "learning_rate": 2.8000000000000003e-06,
+ "loss": 37.4854,
+ "step": 49
+ },
+ {
+ "epoch": 0.2857142857142857,
+ "grad_norm": 512.81689453125,
+ "learning_rate": 2.8571428571428573e-06,
+ "loss": 25.0081,
+ "step": 50
+ },
+ {
+ "epoch": 0.2914285714285714,
+ "grad_norm": 619.5103149414062,
+ "learning_rate": 2.9142857142857146e-06,
+ "loss": 28.0793,
+ "step": 51
+ },
+ {
+ "epoch": 0.29714285714285715,
+ "grad_norm": 621.6701049804688,
+ "learning_rate": 2.9714285714285716e-06,
+ "loss": 27.6905,
+ "step": 52
+ },
+ {
+ "epoch": 0.3028571428571429,
+ "grad_norm": 482.0356750488281,
+ "learning_rate": 3.028571428571429e-06,
+ "loss": 20.7056,
+ "step": 53
+ },
+ {
+ "epoch": 0.30857142857142855,
+ "grad_norm": 869.6558837890625,
+ "learning_rate": 3.085714285714286e-06,
+ "loss": 36.8075,
+ "step": 54
+ },
+ {
+ "epoch": 0.3142857142857143,
+ "grad_norm": 627.7383422851562,
+ "learning_rate": 3.1428571428571433e-06,
+ "loss": 29.0973,
+ "step": 55
+ },
+ {
+ "epoch": 0.32,
+ "grad_norm": 428.7781677246094,
+ "learning_rate": 3.2000000000000003e-06,
+ "loss": 19.1738,
+ "step": 56
+ },
+ {
+ "epoch": 0.32571428571428573,
+ "grad_norm": 576.77099609375,
+ "learning_rate": 3.2571428571428572e-06,
+ "loss": 24.746,
+ "step": 57
+ },
+ {
+ "epoch": 0.3314285714285714,
+ "grad_norm": 581.8759765625,
+ "learning_rate": 3.314285714285714e-06,
+ "loss": 26.0945,
+ "step": 58
+ },
+ {
+ "epoch": 0.33714285714285713,
+ "grad_norm": 518.2973022460938,
+ "learning_rate": 3.371428571428572e-06,
+ "loss": 23.3643,
+ "step": 59
+ },
+ {
+ "epoch": 0.34285714285714286,
+ "grad_norm": 566.5599975585938,
+ "learning_rate": 3.428571428571429e-06,
+ "loss": 25.4305,
+ "step": 60
+ },
+ {
+ "epoch": 0.3485714285714286,
+ "grad_norm": 423.3687744140625,
+ "learning_rate": 3.485714285714286e-06,
+ "loss": 17.0948,
+ "step": 61
+ },
+ {
+ "epoch": 0.35428571428571426,
+ "grad_norm": 610.7925415039062,
+ "learning_rate": 3.542857142857143e-06,
+ "loss": 25.4729,
+ "step": 62
+ },
+ {
+ "epoch": 0.36,
+ "grad_norm": 289.5078430175781,
+ "learning_rate": 3.6e-06,
+ "loss": 14.4792,
+ "step": 63
+ },
+ {
+ "epoch": 0.3657142857142857,
+ "grad_norm": 375.97882080078125,
+ "learning_rate": 3.6571428571428576e-06,
+ "loss": 15.283,
+ "step": 64
+ },
+ {
+ "epoch": 0.37142857142857144,
+ "grad_norm": 556.4803466796875,
+ "learning_rate": 3.7142857142857146e-06,
+ "loss": 23.1576,
+ "step": 65
+ },
+ {
+ "epoch": 0.37714285714285717,
+ "grad_norm": 400.8939208984375,
+ "learning_rate": 3.7714285714285716e-06,
+ "loss": 17.8219,
+ "step": 66
+ },
+ {
+ "epoch": 0.38285714285714284,
+ "grad_norm": 473.13897705078125,
+ "learning_rate": 3.828571428571429e-06,
+ "loss": 18.0179,
+ "step": 67
+ },
+ {
+ "epoch": 0.38857142857142857,
+ "grad_norm": 369.0384521484375,
+ "learning_rate": 3.885714285714286e-06,
+ "loss": 16.7251,
+ "step": 68
+ },
+ {
+ "epoch": 0.3942857142857143,
+ "grad_norm": 444.827392578125,
+ "learning_rate": 3.942857142857143e-06,
+ "loss": 20.251,
+ "step": 69
+ },
+ {
+ "epoch": 0.4,
+ "grad_norm": 482.73333740234375,
+ "learning_rate": 4.000000000000001e-06,
+ "loss": 21.5673,
+ "step": 70
+ },
+ {
+ "epoch": 0.4057142857142857,
+ "grad_norm": 285.9597473144531,
+ "learning_rate": 4.057142857142858e-06,
+ "loss": 13.1687,
+ "step": 71
+ },
+ {
+ "epoch": 0.4114285714285714,
+ "grad_norm": 351.0760803222656,
+ "learning_rate": 4.114285714285715e-06,
+ "loss": 15.4936,
+ "step": 72
+ },
+ {
+ "epoch": 0.41714285714285715,
+ "grad_norm": 395.3171081542969,
+ "learning_rate": 4.1714285714285715e-06,
+ "loss": 16.7497,
+ "step": 73
+ },
+ {
+ "epoch": 0.4228571428571429,
+ "grad_norm": 310.77935791015625,
+ "learning_rate": 4.228571428571429e-06,
+ "loss": 13.1282,
+ "step": 74
+ },
+ {
+ "epoch": 0.42857142857142855,
+ "grad_norm": 333.78497314453125,
+ "learning_rate": 4.285714285714286e-06,
+ "loss": 13.1536,
+ "step": 75
+ },
+ {
+ "epoch": 0.4342857142857143,
+ "grad_norm": 378.2083435058594,
+ "learning_rate": 4.342857142857143e-06,
+ "loss": 15.6583,
+ "step": 76
+ },
+ {
+ "epoch": 0.44,
+ "grad_norm": 238.56402587890625,
+ "learning_rate": 4.4e-06,
+ "loss": 10.2499,
+ "step": 77
+ },
+ {
+ "epoch": 0.44571428571428573,
+ "grad_norm": 255.11715698242188,
+ "learning_rate": 4.457142857142858e-06,
+ "loss": 7.5773,
+ "step": 78
+ },
+ {
+ "epoch": 0.4514285714285714,
+ "grad_norm": 354.782470703125,
+ "learning_rate": 4.514285714285715e-06,
+ "loss": 14.8135,
+ "step": 79
+ },
+ {
+ "epoch": 0.45714285714285713,
+ "grad_norm": 307.1529541015625,
+ "learning_rate": 4.571428571428572e-06,
+ "loss": 12.8466,
+ "step": 80
+ },
+ {
+ "epoch": 0.46285714285714286,
+ "grad_norm": 238.98980712890625,
+ "learning_rate": 4.628571428571429e-06,
+ "loss": 9.0211,
+ "step": 81
+ },
+ {
+ "epoch": 0.4685714285714286,
+ "grad_norm": 291.2945251464844,
+ "learning_rate": 4.685714285714286e-06,
+ "loss": 12.0581,
+ "step": 82
+ },
+ {
+ "epoch": 0.4742857142857143,
+ "grad_norm": 184.88775634765625,
+ "learning_rate": 4.742857142857144e-06,
+ "loss": 8.1703,
+ "step": 83
+ },
+ {
+ "epoch": 0.48,
+ "grad_norm": 213.38546752929688,
+ "learning_rate": 4.800000000000001e-06,
+ "loss": 8.8526,
+ "step": 84
+ },
+ {
+ "epoch": 0.4857142857142857,
+ "grad_norm": 291.2813415527344,
+ "learning_rate": 4.857142857142858e-06,
+ "loss": 9.452,
+ "step": 85
+ },
+ {
+ "epoch": 0.49142857142857144,
+ "grad_norm": 318.67938232421875,
+ "learning_rate": 4.9142857142857145e-06,
+ "loss": 12.6986,
+ "step": 86
+ },
+ {
+ "epoch": 0.49714285714285716,
+ "grad_norm": 143.00746154785156,
+ "learning_rate": 4.9714285714285715e-06,
+ "loss": 6.0976,
+ "step": 87
+ },
+ {
+ "epoch": 0.5028571428571429,
+ "grad_norm": 232.49122619628906,
+ "learning_rate": 5.028571428571429e-06,
+ "loss": 8.8368,
+ "step": 88
+ },
+ {
+ "epoch": 0.5085714285714286,
+ "grad_norm": 183.7664794921875,
+ "learning_rate": 5.085714285714286e-06,
+ "loss": 7.6702,
+ "step": 89
+ },
+ {
+ "epoch": 0.5142857142857142,
+ "grad_norm": 162.1573944091797,
+ "learning_rate": 5.142857142857143e-06,
+ "loss": 7.0566,
+ "step": 90
+ },
+ {
+ "epoch": 0.52,
+ "grad_norm": 202.04483032226562,
+ "learning_rate": 5.2e-06,
+ "loss": 8.3381,
+ "step": 91
+ },
+ {
+ "epoch": 0.5257142857142857,
+ "grad_norm": 131.73065185546875,
+ "learning_rate": 5.257142857142858e-06,
+ "loss": 4.88,
+ "step": 92
+ },
+ {
+ "epoch": 0.5314285714285715,
+ "grad_norm": 379.7463684082031,
+ "learning_rate": 5.314285714285715e-06,
+ "loss": 8.5365,
+ "step": 93
+ },
+ {
+ "epoch": 0.5371428571428571,
+ "grad_norm": 192.61703491210938,
+ "learning_rate": 5.371428571428572e-06,
+ "loss": 7.4655,
+ "step": 94
+ },
+ {
+ "epoch": 0.5428571428571428,
+ "grad_norm": 190.39840698242188,
+ "learning_rate": 5.428571428571429e-06,
+ "loss": 7.7574,
+ "step": 95
+ },
+ {
+ "epoch": 0.5485714285714286,
+ "grad_norm": 182.56820678710938,
+ "learning_rate": 5.485714285714286e-06,
+ "loss": 6.4847,
+ "step": 96
+ },
+ {
+ "epoch": 0.5542857142857143,
+ "grad_norm": 138.87295532226562,
+ "learning_rate": 5.542857142857144e-06,
+ "loss": 5.575,
+ "step": 97
+ },
+ {
+ "epoch": 0.56,
+ "grad_norm": 528.1649780273438,
+ "learning_rate": 5.600000000000001e-06,
+ "loss": 4.9471,
+ "step": 98
+ },
+ {
+ "epoch": 0.5657142857142857,
+ "grad_norm": 234.5634002685547,
+ "learning_rate": 5.6571428571428576e-06,
+ "loss": 7.5448,
+ "step": 99
+ },
+ {
+ "epoch": 0.5714285714285714,
+ "grad_norm": 280.67047119140625,
+ "learning_rate": 5.7142857142857145e-06,
+ "loss": 4.4288,
+ "step": 100
+ },
+ {
+ "epoch": 0.5771428571428572,
+ "grad_norm": 121.8084945678711,
+ "learning_rate": 5.7714285714285715e-06,
+ "loss": 4.4681,
+ "step": 101
+ },
+ {
+ "epoch": 0.5828571428571429,
+ "grad_norm": 155.22755432128906,
+ "learning_rate": 5.828571428571429e-06,
+ "loss": 4.7771,
+ "step": 102
+ },
+ {
+ "epoch": 0.5885714285714285,
+ "grad_norm": 108.56673431396484,
+ "learning_rate": 5.885714285714286e-06,
+ "loss": 4.6154,
+ "step": 103
+ },
+ {
+ "epoch": 0.5942857142857143,
+ "grad_norm": 118.11555480957031,
+ "learning_rate": 5.942857142857143e-06,
+ "loss": 3.9309,
+ "step": 104
+ },
+ {
+ "epoch": 0.6,
+ "grad_norm": 184.95895385742188,
+ "learning_rate": 6e-06,
+ "loss": 4.5836,
+ "step": 105
+ },
+ {
+ "epoch": 0.6057142857142858,
+ "grad_norm": 100.24383544921875,
+ "learning_rate": 6.057142857142858e-06,
+ "loss": 4.0985,
+ "step": 106
+ },
+ {
+ "epoch": 0.6114285714285714,
+ "grad_norm": 69.76212310791016,
+ "learning_rate": 6.114285714285715e-06,
+ "loss": 3.3107,
+ "step": 107
+ },
+ {
+ "epoch": 0.6171428571428571,
+ "grad_norm": 70.37541961669922,
+ "learning_rate": 6.171428571428572e-06,
+ "loss": 3.4681,
+ "step": 108
+ },
+ {
+ "epoch": 0.6228571428571429,
+ "grad_norm": 83.5008544921875,
+ "learning_rate": 6.228571428571429e-06,
+ "loss": 3.7449,
+ "step": 109
+ },
+ {
+ "epoch": 0.6285714285714286,
+ "grad_norm": 74.19607543945312,
+ "learning_rate": 6.285714285714287e-06,
+ "loss": 3.7265,
+ "step": 110
+ },
+ {
+ "epoch": 0.6342857142857142,
+ "grad_norm": 89.18614196777344,
+ "learning_rate": 6.342857142857144e-06,
+ "loss": 3.2003,
+ "step": 111
+ },
+ {
+ "epoch": 0.64,
+ "grad_norm": 77.59644317626953,
+ "learning_rate": 6.4000000000000006e-06,
+ "loss": 3.7784,
+ "step": 112
+ },
+ {
+ "epoch": 0.6457142857142857,
+ "grad_norm": 76.62641143798828,
+ "learning_rate": 6.4571428571428575e-06,
+ "loss": 3.7134,
+ "step": 113
+ },
+ {
+ "epoch": 0.6514285714285715,
+ "grad_norm": 102.94229888916016,
+ "learning_rate": 6.5142857142857145e-06,
+ "loss": 2.9531,
+ "step": 114
+ },
+ {
+ "epoch": 0.6571428571428571,
+ "grad_norm": 195.8087615966797,
+ "learning_rate": 6.5714285714285714e-06,
+ "loss": 3.368,
+ "step": 115
+ },
+ {
+ "epoch": 0.6628571428571428,
+ "grad_norm": 45.65556716918945,
+ "learning_rate": 6.628571428571428e-06,
+ "loss": 2.9719,
+ "step": 116
+ },
+ {
+ "epoch": 0.6685714285714286,
+ "grad_norm": 46.55626678466797,
+ "learning_rate": 6.685714285714285e-06,
+ "loss": 2.5911,
+ "step": 117
+ },
+ {
+ "epoch": 0.6742857142857143,
+ "grad_norm": 50.443519592285156,
+ "learning_rate": 6.742857142857144e-06,
+ "loss": 2.4823,
+ "step": 118
+ },
+ {
+ "epoch": 0.68,
+ "grad_norm": 63.44559860229492,
+ "learning_rate": 6.800000000000001e-06,
+ "loss": 3.1693,
+ "step": 119
+ },
+ {
+ "epoch": 0.6857142857142857,
+ "grad_norm": 52.527462005615234,
+ "learning_rate": 6.857142857142858e-06,
+ "loss": 2.763,
+ "step": 120
+ },
+ {
+ "epoch": 0.6914285714285714,
+ "grad_norm": 56.55500030517578,
+ "learning_rate": 6.914285714285715e-06,
+ "loss": 2.291,
+ "step": 121
+ },
+ {
+ "epoch": 0.6971428571428572,
+ "grad_norm": 44.16722106933594,
+ "learning_rate": 6.971428571428572e-06,
+ "loss": 2.318,
+ "step": 122
+ },
+ {
+ "epoch": 0.7028571428571428,
+ "grad_norm": 340.8834228515625,
+ "learning_rate": 7.028571428571429e-06,
+ "loss": 2.3768,
+ "step": 123
+ },
+ {
+ "epoch": 0.7085714285714285,
+ "grad_norm": 49.526893615722656,
+ "learning_rate": 7.085714285714286e-06,
+ "loss": 2.0165,
+ "step": 124
+ },
+ {
+ "epoch": 0.7142857142857143,
+ "grad_norm": 63.88526916503906,
+ "learning_rate": 7.142857142857143e-06,
+ "loss": 2.222,
+ "step": 125
+ },
+ {
+ "epoch": 0.72,
+ "grad_norm": 54.57352828979492,
+ "learning_rate": 7.2e-06,
+ "loss": 2.4017,
+ "step": 126
+ },
+ {
+ "epoch": 0.7257142857142858,
+ "grad_norm": 65.19429779052734,
+ "learning_rate": 7.257142857142857e-06,
+ "loss": 1.9304,
+ "step": 127
+ },
+ {
+ "epoch": 0.7314285714285714,
+ "grad_norm": 57.65582275390625,
+ "learning_rate": 7.314285714285715e-06,
+ "loss": 2.3272,
+ "step": 128
+ },
+ {
+ "epoch": 0.7371428571428571,
+ "grad_norm": 40.34184646606445,
+ "learning_rate": 7.371428571428572e-06,
+ "loss": 1.9784,
+ "step": 129
+ },
+ {
+ "epoch": 0.7428571428571429,
+ "grad_norm": 44.75320816040039,
+ "learning_rate": 7.428571428571429e-06,
+ "loss": 2.0642,
+ "step": 130
+ },
+ {
+ "epoch": 0.7485714285714286,
+ "grad_norm": 97.42229461669922,
+ "learning_rate": 7.485714285714286e-06,
+ "loss": 1.7851,
+ "step": 131
+ },
+ {
+ "epoch": 0.7542857142857143,
+ "grad_norm": 48.73323059082031,
+ "learning_rate": 7.542857142857143e-06,
+ "loss": 1.8948,
+ "step": 132
+ },
+ {
+ "epoch": 0.76,
+ "grad_norm": 68.79501342773438,
+ "learning_rate": 7.6e-06,
+ "loss": 1.6354,
+ "step": 133
+ },
+ {
+ "epoch": 0.7657142857142857,
+ "grad_norm": 42.33122634887695,
+ "learning_rate": 7.657142857142858e-06,
+ "loss": 1.6211,
+ "step": 134
+ },
+ {
+ "epoch": 0.7714285714285715,
+ "grad_norm": 95.96932220458984,
+ "learning_rate": 7.714285714285714e-06,
+ "loss": 1.8455,
+ "step": 135
+ },
+ {
+ "epoch": 0.7771428571428571,
+ "grad_norm": 42.68363571166992,
+ "learning_rate": 7.771428571428572e-06,
+ "loss": 1.7399,
+ "step": 136
+ },
+ {
+ "epoch": 0.7828571428571428,
+ "grad_norm": 55.536964416503906,
+ "learning_rate": 7.82857142857143e-06,
+ "loss": 1.6986,
+ "step": 137
+ },
+ {
+ "epoch": 0.7885714285714286,
+ "grad_norm": 128.13711547851562,
+ "learning_rate": 7.885714285714286e-06,
+ "loss": 1.7396,
+ "step": 138
+ },
+ {
+ "epoch": 0.7942857142857143,
+ "grad_norm": 192.2489776611328,
+ "learning_rate": 7.942857142857144e-06,
+ "loss": 1.501,
+ "step": 139
+ },
+ {
+ "epoch": 0.8,
+ "grad_norm": 282.86810302734375,
+ "learning_rate": 8.000000000000001e-06,
+ "loss": 2.0364,
+ "step": 140
+ },
+ {
+ "epoch": 0.8057142857142857,
+ "grad_norm": 44.54533386230469,
+ "learning_rate": 8.057142857142857e-06,
+ "loss": 1.7826,
+ "step": 141
+ },
+ {
+ "epoch": 0.8114285714285714,
+ "grad_norm": 56.85557556152344,
+ "learning_rate": 8.114285714285715e-06,
+ "loss": 1.5573,
+ "step": 142
+ },
+ {
+ "epoch": 0.8171428571428572,
+ "grad_norm": 56.26758575439453,
+ "learning_rate": 8.171428571428571e-06,
+ "loss": 1.612,
+ "step": 143
+ },
+ {
+ "epoch": 0.8228571428571428,
+ "grad_norm": 52.04902648925781,
+ "learning_rate": 8.22857142857143e-06,
+ "loss": 1.5511,
+ "step": 144
+ },
+ {
+ "epoch": 0.8285714285714286,
+ "grad_norm": 41.186100006103516,
+ "learning_rate": 8.285714285714285e-06,
+ "loss": 1.5916,
+ "step": 145
+ },
+ {
+ "epoch": 0.8342857142857143,
+ "grad_norm": 67.39685821533203,
+ "learning_rate": 8.342857142857143e-06,
+ "loss": 1.4581,
+ "step": 146
+ },
+ {
+ "epoch": 0.84,
+ "grad_norm": 31.806520462036133,
+ "learning_rate": 8.400000000000001e-06,
+ "loss": 1.4401,
+ "step": 147
+ },
+ {
+ "epoch": 0.8457142857142858,
+ "grad_norm": 60.504905700683594,
+ "learning_rate": 8.457142857142859e-06,
+ "loss": 1.4034,
+ "step": 148
+ },
+ {
+ "epoch": 0.8514285714285714,
+ "grad_norm": 70.5359115600586,
+ "learning_rate": 8.514285714285715e-06,
+ "loss": 1.3912,
+ "step": 149
+ },
+ {
+ "epoch": 0.8571428571428571,
+ "grad_norm": 91.03093719482422,
+ "learning_rate": 8.571428571428573e-06,
+ "loss": 1.5493,
+ "step": 150
+ },
+ {
+ "epoch": 0.8628571428571429,
+ "grad_norm": 56.56180953979492,
+ "learning_rate": 8.628571428571429e-06,
+ "loss": 1.2104,
+ "step": 151
+ },
+ {
+ "epoch": 0.8685714285714285,
+ "grad_norm": 38.104461669921875,
+ "learning_rate": 8.685714285714287e-06,
+ "loss": 1.2611,
+ "step": 152
+ },
+ {
+ "epoch": 0.8742857142857143,
+ "grad_norm": 45.80816650390625,
+ "learning_rate": 8.742857142857143e-06,
+ "loss": 1.3031,
+ "step": 153
+ },
+ {
+ "epoch": 0.88,
+ "grad_norm": 69.43998718261719,
+ "learning_rate": 8.8e-06,
+ "loss": 1.4006,
+ "step": 154
+ },
+ {
+ "epoch": 0.8857142857142857,
+ "grad_norm": 50.02825927734375,
+ "learning_rate": 8.857142857142857e-06,
+ "loss": 1.3128,
+ "step": 155
+ },
+ {
+ "epoch": 0.8914285714285715,
+ "grad_norm": 49.2396125793457,
+ "learning_rate": 8.914285714285716e-06,
+ "loss": 1.6515,
+ "step": 156
+ },
+ {
+ "epoch": 0.8971428571428571,
+ "grad_norm": 58.82089614868164,
+ "learning_rate": 8.971428571428572e-06,
+ "loss": 1.6539,
+ "step": 157
+ },
+ {
+ "epoch": 0.9028571428571428,
+ "grad_norm": 41.613197326660156,
+ "learning_rate": 9.02857142857143e-06,
+ "loss": 1.3638,
+ "step": 158
+ },
+ {
+ "epoch": 0.9085714285714286,
+ "grad_norm": 56.6411018371582,
+ "learning_rate": 9.085714285714286e-06,
+ "loss": 1.6067,
+ "step": 159
+ },
+ {
+ "epoch": 0.9142857142857143,
+ "grad_norm": 25.510644912719727,
+ "learning_rate": 9.142857142857144e-06,
+ "loss": 1.5412,
+ "step": 160
+ },
+ {
+ "epoch": 0.92,
+ "grad_norm": 47.901302337646484,
+ "learning_rate": 9.2e-06,
+ "loss": 1.3587,
+ "step": 161
+ },
+ {
+ "epoch": 0.9257142857142857,
+ "grad_norm": 52.8482551574707,
+ "learning_rate": 9.257142857142858e-06,
+ "loss": 1.6951,
+ "step": 162
+ },
+ {
+ "epoch": 0.9314285714285714,
+ "grad_norm": 49.34379577636719,
+ "learning_rate": 9.314285714285714e-06,
+ "loss": 1.5581,
+ "step": 163
+ },
+ {
+ "epoch": 0.9371428571428572,
+ "grad_norm": 33.59023666381836,
+ "learning_rate": 9.371428571428572e-06,
+ "loss": 1.5744,
+ "step": 164
+ },
+ {
+ "epoch": 0.9428571428571428,
+ "grad_norm": 41.23167419433594,
+ "learning_rate": 9.42857142857143e-06,
+ "loss": 1.2757,
+ "step": 165
+ },
+ {
+ "epoch": 0.9485714285714286,
+ "grad_norm": 94.31331634521484,
+ "learning_rate": 9.485714285714287e-06,
+ "loss": 1.1797,
+ "step": 166
+ },
+ {
+ "epoch": 0.9542857142857143,
+ "grad_norm": 107.66983795166016,
+ "learning_rate": 9.542857142857143e-06,
+ "loss": 1.8364,
+ "step": 167
+ },
+ {
+ "epoch": 0.96,
+ "grad_norm": 119.86046600341797,
+ "learning_rate": 9.600000000000001e-06,
+ "loss": 1.2014,
+ "step": 168
+ },
+ {
+ "epoch": 0.9657142857142857,
+ "grad_norm": 59.50279235839844,
+ "learning_rate": 9.657142857142857e-06,
+ "loss": 1.3769,
+ "step": 169
+ },
+ {
+ "epoch": 0.9714285714285714,
+ "grad_norm": 52.51278305053711,
+ "learning_rate": 9.714285714285715e-06,
+ "loss": 1.5001,
+ "step": 170
+ },
+ {
+ "epoch": 0.9771428571428571,
+ "grad_norm": 66.36750030517578,
+ "learning_rate": 9.771428571428571e-06,
+ "loss": 1.3174,
+ "step": 171
+ },
+ {
+ "epoch": 0.9828571428571429,
+ "grad_norm": 148.5225067138672,
+ "learning_rate": 9.828571428571429e-06,
+ "loss": 1.2301,
+ "step": 172
+ },
+ {
+ "epoch": 0.9885714285714285,
+ "grad_norm": 57.223838806152344,
+ "learning_rate": 9.885714285714285e-06,
+ "loss": 1.3186,
+ "step": 173
+ },
+ {
+ "epoch": 0.9942857142857143,
+ "grad_norm": 45.917572021484375,
+ "learning_rate": 9.942857142857143e-06,
+ "loss": 1.4557,
+ "step": 174
+ },
+ {
+ "epoch": 1.0,
+ "grad_norm": 52.964290618896484,
+ "learning_rate": 1e-05,
+ "loss": 1.2294,
+ "step": 175
+ },
+ {
+ "epoch": 1.0,
+ "eval_classes": 0,
+ "eval_loss": 1.1637780666351318,
+ "eval_map": 0.1074,
+ "eval_map_50": 0.1346,
+ "eval_map_75": 0.1134,
+ "eval_map_large": 0.1146,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.1074,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.5098,
+ "eval_mar_10": 0.8324,
+ "eval_mar_100": 0.9521,
+ "eval_mar_100_per_class": 0.9521,
+ "eval_mar_large": 0.9521,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 17.5957,
+ "eval_samples_per_second": 16.709,
+ "eval_steps_per_second": 2.103,
+ "step": 175
+ },
+ {
+ "epoch": 1.0057142857142858,
+ "grad_norm": 25.71605682373047,
+ "learning_rate": 1.0057142857142859e-05,
+ "loss": 1.3411,
+ "step": 176
+ },
+ {
+ "epoch": 1.0114285714285713,
+ "grad_norm": 59.9210090637207,
+ "learning_rate": 1.0114285714285715e-05,
+ "loss": 1.2487,
+ "step": 177
+ },
+ {
+ "epoch": 1.0171428571428571,
+ "grad_norm": 64.52934265136719,
+ "learning_rate": 1.0171428571428573e-05,
+ "loss": 1.266,
+ "step": 178
+ },
+ {
+ "epoch": 1.022857142857143,
+ "grad_norm": 41.21025085449219,
+ "learning_rate": 1.0228571428571429e-05,
+ "loss": 1.2221,
+ "step": 179
+ },
+ {
+ "epoch": 1.0285714285714285,
+ "grad_norm": 56.34316635131836,
+ "learning_rate": 1.0285714285714286e-05,
+ "loss": 1.2948,
+ "step": 180
+ },
+ {
+ "epoch": 1.0342857142857143,
+ "grad_norm": 52.73511505126953,
+ "learning_rate": 1.0342857142857143e-05,
+ "loss": 1.1771,
+ "step": 181
+ },
+ {
+ "epoch": 1.04,
+ "grad_norm": 76.45502471923828,
+ "learning_rate": 1.04e-05,
+ "loss": 1.2123,
+ "step": 182
+ },
+ {
+ "epoch": 1.0457142857142858,
+ "grad_norm": 83.90774536132812,
+ "learning_rate": 1.0457142857142856e-05,
+ "loss": 1.1788,
+ "step": 183
+ },
+ {
+ "epoch": 1.0514285714285714,
+ "grad_norm": 45.44466781616211,
+ "learning_rate": 1.0514285714285716e-05,
+ "loss": 1.3411,
+ "step": 184
+ },
+ {
+ "epoch": 1.0571428571428572,
+ "grad_norm": 73.0859146118164,
+ "learning_rate": 1.0571428571428572e-05,
+ "loss": 1.3006,
+ "step": 185
+ },
+ {
+ "epoch": 1.062857142857143,
+ "grad_norm": 39.06739807128906,
+ "learning_rate": 1.062857142857143e-05,
+ "loss": 1.1095,
+ "step": 186
+ },
+ {
+ "epoch": 1.0685714285714285,
+ "grad_norm": 84.93657684326172,
+ "learning_rate": 1.0685714285714286e-05,
+ "loss": 1.4484,
+ "step": 187
+ },
+ {
+ "epoch": 1.0742857142857143,
+ "grad_norm": 48.28471755981445,
+ "learning_rate": 1.0742857142857144e-05,
+ "loss": 1.4836,
+ "step": 188
+ },
+ {
+ "epoch": 1.08,
+ "grad_norm": 76.1847152709961,
+ "learning_rate": 1.08e-05,
+ "loss": 1.1036,
+ "step": 189
+ },
+ {
+ "epoch": 1.0857142857142856,
+ "grad_norm": 74.36713409423828,
+ "learning_rate": 1.0857142857142858e-05,
+ "loss": 1.1358,
+ "step": 190
+ },
+ {
+ "epoch": 1.0914285714285714,
+ "grad_norm": 40.03973388671875,
+ "learning_rate": 1.0914285714285714e-05,
+ "loss": 1.4495,
+ "step": 191
+ },
+ {
+ "epoch": 1.0971428571428572,
+ "grad_norm": 32.62919616699219,
+ "learning_rate": 1.0971428571428572e-05,
+ "loss": 1.1748,
+ "step": 192
+ },
+ {
+ "epoch": 1.1028571428571428,
+ "grad_norm": 116.62271118164062,
+ "learning_rate": 1.102857142857143e-05,
+ "loss": 1.3018,
+ "step": 193
+ },
+ {
+ "epoch": 1.1085714285714285,
+ "grad_norm": 49.567359924316406,
+ "learning_rate": 1.1085714285714287e-05,
+ "loss": 1.2993,
+ "step": 194
+ },
+ {
+ "epoch": 1.1142857142857143,
+ "grad_norm": 89.49858093261719,
+ "learning_rate": 1.1142857142857143e-05,
+ "loss": 1.1842,
+ "step": 195
+ },
+ {
+ "epoch": 1.12,
+ "grad_norm": 86.89800262451172,
+ "learning_rate": 1.1200000000000001e-05,
+ "loss": 1.547,
+ "step": 196
+ },
+ {
+ "epoch": 1.1257142857142857,
+ "grad_norm": 52.09599304199219,
+ "learning_rate": 1.1257142857142857e-05,
+ "loss": 1.0948,
+ "step": 197
+ },
+ {
+ "epoch": 1.1314285714285715,
+ "grad_norm": 55.586875915527344,
+ "learning_rate": 1.1314285714285715e-05,
+ "loss": 1.1103,
+ "step": 198
+ },
+ {
+ "epoch": 1.1371428571428572,
+ "grad_norm": 32.5805549621582,
+ "learning_rate": 1.1371428571428571e-05,
+ "loss": 1.1465,
+ "step": 199
+ },
+ {
+ "epoch": 1.1428571428571428,
+ "grad_norm": 31.688655853271484,
+ "learning_rate": 1.1428571428571429e-05,
+ "loss": 1.3233,
+ "step": 200
+ },
+ {
+ "epoch": 1.1485714285714286,
+ "grad_norm": 37.952674865722656,
+ "learning_rate": 1.1485714285714285e-05,
+ "loss": 1.1838,
+ "step": 201
+ },
+ {
+ "epoch": 1.1542857142857144,
+ "grad_norm": 74.23160552978516,
+ "learning_rate": 1.1542857142857143e-05,
+ "loss": 1.3673,
+ "step": 202
+ },
+ {
+ "epoch": 1.16,
+ "grad_norm": 106.44403076171875,
+ "learning_rate": 1.16e-05,
+ "loss": 1.3237,
+ "step": 203
+ },
+ {
+ "epoch": 1.1657142857142857,
+ "grad_norm": 39.78948211669922,
+ "learning_rate": 1.1657142857142859e-05,
+ "loss": 0.9842,
+ "step": 204
+ },
+ {
+ "epoch": 1.1714285714285715,
+ "grad_norm": 35.71757507324219,
+ "learning_rate": 1.1714285714285715e-05,
+ "loss": 1.315,
+ "step": 205
+ },
+ {
+ "epoch": 1.177142857142857,
+ "grad_norm": 72.02671813964844,
+ "learning_rate": 1.1771428571428572e-05,
+ "loss": 1.0231,
+ "step": 206
+ },
+ {
+ "epoch": 1.1828571428571428,
+ "grad_norm": 47.552490234375,
+ "learning_rate": 1.1828571428571429e-05,
+ "loss": 1.5534,
+ "step": 207
+ },
+ {
+ "epoch": 1.1885714285714286,
+ "grad_norm": 71.50056457519531,
+ "learning_rate": 1.1885714285714286e-05,
+ "loss": 1.2347,
+ "step": 208
+ },
+ {
+ "epoch": 1.1942857142857144,
+ "grad_norm": 31.451601028442383,
+ "learning_rate": 1.1942857142857142e-05,
+ "loss": 0.9793,
+ "step": 209
+ },
+ {
+ "epoch": 1.2,
+ "grad_norm": 61.02602005004883,
+ "learning_rate": 1.2e-05,
+ "loss": 1.1618,
+ "step": 210
+ },
+ {
+ "epoch": 1.2057142857142857,
+ "grad_norm": 54.90080642700195,
+ "learning_rate": 1.2057142857142856e-05,
+ "loss": 1.3242,
+ "step": 211
+ },
+ {
+ "epoch": 1.2114285714285715,
+ "grad_norm": 29.097959518432617,
+ "learning_rate": 1.2114285714285716e-05,
+ "loss": 1.0527,
+ "step": 212
+ },
+ {
+ "epoch": 1.217142857142857,
+ "grad_norm": 34.94865036010742,
+ "learning_rate": 1.2171428571428572e-05,
+ "loss": 1.5236,
+ "step": 213
+ },
+ {
+ "epoch": 1.2228571428571429,
+ "grad_norm": 60.06378173828125,
+ "learning_rate": 1.222857142857143e-05,
+ "loss": 1.6229,
+ "step": 214
+ },
+ {
+ "epoch": 1.2285714285714286,
+ "grad_norm": 55.50552749633789,
+ "learning_rate": 1.2285714285714286e-05,
+ "loss": 1.1756,
+ "step": 215
+ },
+ {
+ "epoch": 1.2342857142857142,
+ "grad_norm": 96.09048461914062,
+ "learning_rate": 1.2342857142857144e-05,
+ "loss": 1.3275,
+ "step": 216
+ },
+ {
+ "epoch": 1.24,
+ "grad_norm": 28.802642822265625,
+ "learning_rate": 1.24e-05,
+ "loss": 1.2808,
+ "step": 217
+ },
+ {
+ "epoch": 1.2457142857142858,
+ "grad_norm": 43.80913543701172,
+ "learning_rate": 1.2457142857142858e-05,
+ "loss": 1.176,
+ "step": 218
+ },
+ {
+ "epoch": 1.2514285714285713,
+ "grad_norm": 78.38436126708984,
+ "learning_rate": 1.2514285714285714e-05,
+ "loss": 1.2733,
+ "step": 219
+ },
+ {
+ "epoch": 1.2571428571428571,
+ "grad_norm": 77.23725891113281,
+ "learning_rate": 1.2571428571428573e-05,
+ "loss": 1.0301,
+ "step": 220
+ },
+ {
+ "epoch": 1.262857142857143,
+ "grad_norm": 29.865652084350586,
+ "learning_rate": 1.2628571428571428e-05,
+ "loss": 1.156,
+ "step": 221
+ },
+ {
+ "epoch": 1.2685714285714287,
+ "grad_norm": 49.52701950073242,
+ "learning_rate": 1.2685714285714287e-05,
+ "loss": 1.4922,
+ "step": 222
+ },
+ {
+ "epoch": 1.2742857142857142,
+ "grad_norm": 71.39379119873047,
+ "learning_rate": 1.2742857142857143e-05,
+ "loss": 1.4941,
+ "step": 223
+ },
+ {
+ "epoch": 1.28,
+ "grad_norm": 73.40324401855469,
+ "learning_rate": 1.2800000000000001e-05,
+ "loss": 1.4157,
+ "step": 224
+ },
+ {
+ "epoch": 1.2857142857142856,
+ "grad_norm": 51.31359100341797,
+ "learning_rate": 1.2857142857142857e-05,
+ "loss": 1.3963,
+ "step": 225
+ },
+ {
+ "epoch": 1.2914285714285714,
+ "grad_norm": 335.7010803222656,
+ "learning_rate": 1.2914285714285715e-05,
+ "loss": 1.4926,
+ "step": 226
+ },
+ {
+ "epoch": 1.2971428571428572,
+ "grad_norm": 42.918251037597656,
+ "learning_rate": 1.2971428571428573e-05,
+ "loss": 1.2107,
+ "step": 227
+ },
+ {
+ "epoch": 1.302857142857143,
+ "grad_norm": 62.98908233642578,
+ "learning_rate": 1.3028571428571429e-05,
+ "loss": 1.5262,
+ "step": 228
+ },
+ {
+ "epoch": 1.3085714285714285,
+ "grad_norm": 42.34180450439453,
+ "learning_rate": 1.3085714285714287e-05,
+ "loss": 1.3537,
+ "step": 229
+ },
+ {
+ "epoch": 1.3142857142857143,
+ "grad_norm": 32.696128845214844,
+ "learning_rate": 1.3142857142857143e-05,
+ "loss": 1.2539,
+ "step": 230
+ },
+ {
+ "epoch": 1.32,
+ "grad_norm": 64.67933654785156,
+ "learning_rate": 1.32e-05,
+ "loss": 1.435,
+ "step": 231
+ },
+ {
+ "epoch": 1.3257142857142856,
+ "grad_norm": 53.249977111816406,
+ "learning_rate": 1.3257142857142857e-05,
+ "loss": 1.2543,
+ "step": 232
+ },
+ {
+ "epoch": 1.3314285714285714,
+ "grad_norm": 45.25177764892578,
+ "learning_rate": 1.3314285714285715e-05,
+ "loss": 1.6551,
+ "step": 233
+ },
+ {
+ "epoch": 1.3371428571428572,
+ "grad_norm": 39.65488815307617,
+ "learning_rate": 1.337142857142857e-05,
+ "loss": 1.3281,
+ "step": 234
+ },
+ {
+ "epoch": 1.342857142857143,
+ "grad_norm": 48.21430206298828,
+ "learning_rate": 1.3428571428571429e-05,
+ "loss": 1.1968,
+ "step": 235
+ },
+ {
+ "epoch": 1.3485714285714285,
+ "grad_norm": 72.81331634521484,
+ "learning_rate": 1.3485714285714288e-05,
+ "loss": 1.1244,
+ "step": 236
+ },
+ {
+ "epoch": 1.3542857142857143,
+ "grad_norm": 66.70149230957031,
+ "learning_rate": 1.3542857142857142e-05,
+ "loss": 1.0649,
+ "step": 237
+ },
+ {
+ "epoch": 1.3599999999999999,
+ "grad_norm": 40.40673828125,
+ "learning_rate": 1.3600000000000002e-05,
+ "loss": 1.2182,
+ "step": 238
+ },
+ {
+ "epoch": 1.3657142857142857,
+ "grad_norm": 50.67107391357422,
+ "learning_rate": 1.3657142857142858e-05,
+ "loss": 1.2684,
+ "step": 239
+ },
+ {
+ "epoch": 1.3714285714285714,
+ "grad_norm": 56.261566162109375,
+ "learning_rate": 1.3714285714285716e-05,
+ "loss": 1.1621,
+ "step": 240
+ },
+ {
+ "epoch": 1.3771428571428572,
+ "grad_norm": 43.4561882019043,
+ "learning_rate": 1.3771428571428572e-05,
+ "loss": 1.236,
+ "step": 241
+ },
+ {
+ "epoch": 1.3828571428571428,
+ "grad_norm": 58.3783073425293,
+ "learning_rate": 1.382857142857143e-05,
+ "loss": 1.1642,
+ "step": 242
+ },
+ {
+ "epoch": 1.3885714285714286,
+ "grad_norm": 86.39718627929688,
+ "learning_rate": 1.3885714285714286e-05,
+ "loss": 1.2724,
+ "step": 243
+ },
+ {
+ "epoch": 1.3942857142857144,
+ "grad_norm": 44.4029541015625,
+ "learning_rate": 1.3942857142857144e-05,
+ "loss": 1.323,
+ "step": 244
+ },
+ {
+ "epoch": 1.4,
+ "grad_norm": 68.77522277832031,
+ "learning_rate": 1.4000000000000001e-05,
+ "loss": 0.8696,
+ "step": 245
+ },
+ {
+ "epoch": 1.4057142857142857,
+ "grad_norm": 40.915870666503906,
+ "learning_rate": 1.4057142857142858e-05,
+ "loss": 1.1746,
+ "step": 246
+ },
+ {
+ "epoch": 1.4114285714285715,
+ "grad_norm": 41.75663375854492,
+ "learning_rate": 1.4114285714285715e-05,
+ "loss": 1.1253,
+ "step": 247
+ },
+ {
+ "epoch": 1.4171428571428573,
+ "grad_norm": 41.473060607910156,
+ "learning_rate": 1.4171428571428572e-05,
+ "loss": 1.4069,
+ "step": 248
+ },
+ {
+ "epoch": 1.4228571428571428,
+ "grad_norm": 33.94078063964844,
+ "learning_rate": 1.422857142857143e-05,
+ "loss": 1.2038,
+ "step": 249
+ },
+ {
+ "epoch": 1.4285714285714286,
+ "grad_norm": 25.82473373413086,
+ "learning_rate": 1.4285714285714285e-05,
+ "loss": 0.9384,
+ "step": 250
+ },
+ {
+ "epoch": 1.4342857142857142,
+ "grad_norm": 49.748172760009766,
+ "learning_rate": 1.4342857142857143e-05,
+ "loss": 1.1221,
+ "step": 251
+ },
+ {
+ "epoch": 1.44,
+ "grad_norm": 50.44265365600586,
+ "learning_rate": 1.44e-05,
+ "loss": 1.0359,
+ "step": 252
+ },
+ {
+ "epoch": 1.4457142857142857,
+ "grad_norm": 77.93119812011719,
+ "learning_rate": 1.4457142857142857e-05,
+ "loss": 1.1011,
+ "step": 253
+ },
+ {
+ "epoch": 1.4514285714285715,
+ "grad_norm": 103.56857299804688,
+ "learning_rate": 1.4514285714285713e-05,
+ "loss": 1.1609,
+ "step": 254
+ },
+ {
+ "epoch": 1.457142857142857,
+ "grad_norm": 57.082916259765625,
+ "learning_rate": 1.4571428571428573e-05,
+ "loss": 1.153,
+ "step": 255
+ },
+ {
+ "epoch": 1.4628571428571429,
+ "grad_norm": 29.88153839111328,
+ "learning_rate": 1.462857142857143e-05,
+ "loss": 1.0068,
+ "step": 256
+ },
+ {
+ "epoch": 1.4685714285714286,
+ "grad_norm": 48.73033142089844,
+ "learning_rate": 1.4685714285714287e-05,
+ "loss": 1.0846,
+ "step": 257
+ },
+ {
+ "epoch": 1.4742857142857142,
+ "grad_norm": 50.5568733215332,
+ "learning_rate": 1.4742857142857144e-05,
+ "loss": 1.1191,
+ "step": 258
+ },
+ {
+ "epoch": 1.48,
+ "grad_norm": 36.18242645263672,
+ "learning_rate": 1.48e-05,
+ "loss": 0.8898,
+ "step": 259
+ },
+ {
+ "epoch": 1.4857142857142858,
+ "grad_norm": 38.34716033935547,
+ "learning_rate": 1.4857142857142858e-05,
+ "loss": 1.1931,
+ "step": 260
+ },
+ {
+ "epoch": 1.4914285714285715,
+ "grad_norm": 34.09955596923828,
+ "learning_rate": 1.4914285714285715e-05,
+ "loss": 1.2114,
+ "step": 261
+ },
+ {
+ "epoch": 1.497142857142857,
+ "grad_norm": 90.42156982421875,
+ "learning_rate": 1.4971428571428572e-05,
+ "loss": 1.0864,
+ "step": 262
+ },
+ {
+ "epoch": 1.502857142857143,
+ "grad_norm": 67.16676330566406,
+ "learning_rate": 1.5028571428571428e-05,
+ "loss": 0.845,
+ "step": 263
+ },
+ {
+ "epoch": 1.5085714285714285,
+ "grad_norm": 64.06490325927734,
+ "learning_rate": 1.5085714285714286e-05,
+ "loss": 1.164,
+ "step": 264
+ },
+ {
+ "epoch": 1.5142857142857142,
+ "grad_norm": 60.20040512084961,
+ "learning_rate": 1.5142857142857144e-05,
+ "loss": 0.8942,
+ "step": 265
+ },
+ {
+ "epoch": 1.52,
+ "grad_norm": 41.62589645385742,
+ "learning_rate": 1.52e-05,
+ "loss": 1.2033,
+ "step": 266
+ },
+ {
+ "epoch": 1.5257142857142858,
+ "grad_norm": 56.95344543457031,
+ "learning_rate": 1.5257142857142858e-05,
+ "loss": 0.8938,
+ "step": 267
+ },
+ {
+ "epoch": 1.5314285714285716,
+ "grad_norm": 37.76027297973633,
+ "learning_rate": 1.5314285714285716e-05,
+ "loss": 1.2119,
+ "step": 268
+ },
+ {
+ "epoch": 1.5371428571428571,
+ "grad_norm": 43.52880096435547,
+ "learning_rate": 1.5371428571428572e-05,
+ "loss": 1.2131,
+ "step": 269
+ },
+ {
+ "epoch": 1.5428571428571427,
+ "grad_norm": 65.82935333251953,
+ "learning_rate": 1.5428571428571428e-05,
+ "loss": 1.2843,
+ "step": 270
+ },
+ {
+ "epoch": 1.5485714285714285,
+ "grad_norm": 49.85045623779297,
+ "learning_rate": 1.5485714285714287e-05,
+ "loss": 1.2297,
+ "step": 271
+ },
+ {
+ "epoch": 1.5542857142857143,
+ "grad_norm": 43.79518508911133,
+ "learning_rate": 1.5542857142857144e-05,
+ "loss": 0.9763,
+ "step": 272
+ },
+ {
+ "epoch": 1.56,
+ "grad_norm": 55.906646728515625,
+ "learning_rate": 1.56e-05,
+ "loss": 1.4565,
+ "step": 273
+ },
+ {
+ "epoch": 1.5657142857142858,
+ "grad_norm": 43.23767852783203,
+ "learning_rate": 1.565714285714286e-05,
+ "loss": 1.0054,
+ "step": 274
+ },
+ {
+ "epoch": 1.5714285714285714,
+ "grad_norm": 29.514699935913086,
+ "learning_rate": 1.5714285714285715e-05,
+ "loss": 1.0612,
+ "step": 275
+ },
+ {
+ "epoch": 1.5771428571428572,
+ "grad_norm": 47.55738067626953,
+ "learning_rate": 1.577142857142857e-05,
+ "loss": 1.1649,
+ "step": 276
+ },
+ {
+ "epoch": 1.5828571428571427,
+ "grad_norm": 60.16688537597656,
+ "learning_rate": 1.5828571428571428e-05,
+ "loss": 1.1763,
+ "step": 277
+ },
+ {
+ "epoch": 1.5885714285714285,
+ "grad_norm": 31.548669815063477,
+ "learning_rate": 1.5885714285714287e-05,
+ "loss": 1.0998,
+ "step": 278
+ },
+ {
+ "epoch": 1.5942857142857143,
+ "grad_norm": 84.46231842041016,
+ "learning_rate": 1.5942857142857143e-05,
+ "loss": 1.1437,
+ "step": 279
+ },
+ {
+ "epoch": 1.6,
+ "grad_norm": 24.36756706237793,
+ "learning_rate": 1.6000000000000003e-05,
+ "loss": 1.1319,
+ "step": 280
+ },
+ {
+ "epoch": 1.6057142857142859,
+ "grad_norm": 41.27078628540039,
+ "learning_rate": 1.6057142857142855e-05,
+ "loss": 1.2245,
+ "step": 281
+ },
+ {
+ "epoch": 1.6114285714285714,
+ "grad_norm": 47.624202728271484,
+ "learning_rate": 1.6114285714285715e-05,
+ "loss": 1.0325,
+ "step": 282
+ },
+ {
+ "epoch": 1.617142857142857,
+ "grad_norm": 34.220977783203125,
+ "learning_rate": 1.6171428571428574e-05,
+ "loss": 1.1388,
+ "step": 283
+ },
+ {
+ "epoch": 1.6228571428571428,
+ "grad_norm": 80.00936889648438,
+ "learning_rate": 1.622857142857143e-05,
+ "loss": 1.4803,
+ "step": 284
+ },
+ {
+ "epoch": 1.6285714285714286,
+ "grad_norm": 55.12397384643555,
+ "learning_rate": 1.6285714285714287e-05,
+ "loss": 1.0528,
+ "step": 285
+ },
+ {
+ "epoch": 1.6342857142857143,
+ "grad_norm": 58.02750015258789,
+ "learning_rate": 1.6342857142857143e-05,
+ "loss": 0.9903,
+ "step": 286
+ },
+ {
+ "epoch": 1.6400000000000001,
+ "grad_norm": 56.112327575683594,
+ "learning_rate": 1.6400000000000002e-05,
+ "loss": 0.8939,
+ "step": 287
+ },
+ {
+ "epoch": 1.6457142857142857,
+ "grad_norm": 32.75306701660156,
+ "learning_rate": 1.645714285714286e-05,
+ "loss": 1.0878,
+ "step": 288
+ },
+ {
+ "epoch": 1.6514285714285715,
+ "grad_norm": 32.49684524536133,
+ "learning_rate": 1.6514285714285714e-05,
+ "loss": 1.1555,
+ "step": 289
+ },
+ {
+ "epoch": 1.657142857142857,
+ "grad_norm": 28.5819034576416,
+ "learning_rate": 1.657142857142857e-05,
+ "loss": 1.4024,
+ "step": 290
+ },
+ {
+ "epoch": 1.6628571428571428,
+ "grad_norm": 77.95793151855469,
+ "learning_rate": 1.662857142857143e-05,
+ "loss": 1.4714,
+ "step": 291
+ },
+ {
+ "epoch": 1.6685714285714286,
+ "grad_norm": 81.19375610351562,
+ "learning_rate": 1.6685714285714286e-05,
+ "loss": 1.686,
+ "step": 292
+ },
+ {
+ "epoch": 1.6742857142857144,
+ "grad_norm": 42.35756301879883,
+ "learning_rate": 1.6742857142857142e-05,
+ "loss": 1.4978,
+ "step": 293
+ },
+ {
+ "epoch": 1.6800000000000002,
+ "grad_norm": 37.717281341552734,
+ "learning_rate": 1.6800000000000002e-05,
+ "loss": 1.2754,
+ "step": 294
+ },
+ {
+ "epoch": 1.6857142857142857,
+ "grad_norm": 37.30710983276367,
+ "learning_rate": 1.6857142857142858e-05,
+ "loss": 1.0861,
+ "step": 295
+ },
+ {
+ "epoch": 1.6914285714285713,
+ "grad_norm": 47.3027458190918,
+ "learning_rate": 1.6914285714285717e-05,
+ "loss": 1.171,
+ "step": 296
+ },
+ {
+ "epoch": 1.697142857142857,
+ "grad_norm": 55.43088912963867,
+ "learning_rate": 1.697142857142857e-05,
+ "loss": 0.9904,
+ "step": 297
+ },
+ {
+ "epoch": 1.7028571428571428,
+ "grad_norm": 51.3643684387207,
+ "learning_rate": 1.702857142857143e-05,
+ "loss": 1.1707,
+ "step": 298
+ },
+ {
+ "epoch": 1.7085714285714286,
+ "grad_norm": 44.561859130859375,
+ "learning_rate": 1.7085714285714286e-05,
+ "loss": 1.096,
+ "step": 299
+ },
+ {
+ "epoch": 1.7142857142857144,
+ "grad_norm": 45.064903259277344,
+ "learning_rate": 1.7142857142857145e-05,
+ "loss": 1.259,
+ "step": 300
+ },
+ {
+ "epoch": 1.72,
+ "grad_norm": 54.00167465209961,
+ "learning_rate": 1.7199999999999998e-05,
+ "loss": 1.1983,
+ "step": 301
+ },
+ {
+ "epoch": 1.7257142857142858,
+ "grad_norm": 61.48951721191406,
+ "learning_rate": 1.7257142857142857e-05,
+ "loss": 1.0426,
+ "step": 302
+ },
+ {
+ "epoch": 1.7314285714285713,
+ "grad_norm": 49.40690612792969,
+ "learning_rate": 1.7314285714285717e-05,
+ "loss": 0.9505,
+ "step": 303
+ },
+ {
+ "epoch": 1.737142857142857,
+ "grad_norm": 41.197330474853516,
+ "learning_rate": 1.7371428571428573e-05,
+ "loss": 1.4243,
+ "step": 304
+ },
+ {
+ "epoch": 1.7428571428571429,
+ "grad_norm": 49.21907424926758,
+ "learning_rate": 1.742857142857143e-05,
+ "loss": 1.2452,
+ "step": 305
+ },
+ {
+ "epoch": 1.7485714285714287,
+ "grad_norm": 48.3714599609375,
+ "learning_rate": 1.7485714285714285e-05,
+ "loss": 0.9797,
+ "step": 306
+ },
+ {
+ "epoch": 1.7542857142857144,
+ "grad_norm": 59.02680969238281,
+ "learning_rate": 1.7542857142857145e-05,
+ "loss": 1.0165,
+ "step": 307
+ },
+ {
+ "epoch": 1.76,
+ "grad_norm": 43.11337661743164,
+ "learning_rate": 1.76e-05,
+ "loss": 1.0995,
+ "step": 308
+ },
+ {
+ "epoch": 1.7657142857142856,
+ "grad_norm": 55.11835479736328,
+ "learning_rate": 1.7657142857142857e-05,
+ "loss": 1.1308,
+ "step": 309
+ },
+ {
+ "epoch": 1.7714285714285714,
+ "grad_norm": 41.109554290771484,
+ "learning_rate": 1.7714285714285713e-05,
+ "loss": 1.2723,
+ "step": 310
+ },
+ {
+ "epoch": 1.7771428571428571,
+ "grad_norm": 79.188720703125,
+ "learning_rate": 1.7771428571428573e-05,
+ "loss": 1.4638,
+ "step": 311
+ },
+ {
+ "epoch": 1.782857142857143,
+ "grad_norm": 57.13640213012695,
+ "learning_rate": 1.7828571428571432e-05,
+ "loss": 1.3124,
+ "step": 312
+ },
+ {
+ "epoch": 1.7885714285714287,
+ "grad_norm": 36.40741729736328,
+ "learning_rate": 1.7885714285714285e-05,
+ "loss": 1.0631,
+ "step": 313
+ },
+ {
+ "epoch": 1.7942857142857143,
+ "grad_norm": 48.303768157958984,
+ "learning_rate": 1.7942857142857144e-05,
+ "loss": 0.8791,
+ "step": 314
+ },
+ {
+ "epoch": 1.8,
+ "grad_norm": 29.768587112426758,
+ "learning_rate": 1.8e-05,
+ "loss": 1.0898,
+ "step": 315
+ },
+ {
+ "epoch": 1.8057142857142856,
+ "grad_norm": 39.185028076171875,
+ "learning_rate": 1.805714285714286e-05,
+ "loss": 1.1098,
+ "step": 316
+ },
+ {
+ "epoch": 1.8114285714285714,
+ "grad_norm": 32.381385803222656,
+ "learning_rate": 1.8114285714285713e-05,
+ "loss": 1.0213,
+ "step": 317
+ },
+ {
+ "epoch": 1.8171428571428572,
+ "grad_norm": 52.5543098449707,
+ "learning_rate": 1.8171428571428572e-05,
+ "loss": 0.9562,
+ "step": 318
+ },
+ {
+ "epoch": 1.822857142857143,
+ "grad_norm": 35.95527648925781,
+ "learning_rate": 1.8228571428571428e-05,
+ "loss": 0.9972,
+ "step": 319
+ },
+ {
+ "epoch": 1.8285714285714287,
+ "grad_norm": 42.66740036010742,
+ "learning_rate": 1.8285714285714288e-05,
+ "loss": 0.9987,
+ "step": 320
+ },
+ {
+ "epoch": 1.8342857142857143,
+ "grad_norm": 76.55131530761719,
+ "learning_rate": 1.8342857142857144e-05,
+ "loss": 1.0302,
+ "step": 321
+ },
+ {
+ "epoch": 1.8399999999999999,
+ "grad_norm": 45.564605712890625,
+ "learning_rate": 1.84e-05,
+ "loss": 1.0857,
+ "step": 322
+ },
+ {
+ "epoch": 1.8457142857142856,
+ "grad_norm": 28.313241958618164,
+ "learning_rate": 1.845714285714286e-05,
+ "loss": 1.0884,
+ "step": 323
+ },
+ {
+ "epoch": 1.8514285714285714,
+ "grad_norm": 38.595279693603516,
+ "learning_rate": 1.8514285714285716e-05,
+ "loss": 1.1742,
+ "step": 324
+ },
+ {
+ "epoch": 1.8571428571428572,
+ "grad_norm": 32.34958267211914,
+ "learning_rate": 1.8571428571428572e-05,
+ "loss": 0.9849,
+ "step": 325
+ },
+ {
+ "epoch": 1.862857142857143,
+ "grad_norm": 51.1898307800293,
+ "learning_rate": 1.8628571428571428e-05,
+ "loss": 1.5177,
+ "step": 326
+ },
+ {
+ "epoch": 1.8685714285714285,
+ "grad_norm": 80.85442352294922,
+ "learning_rate": 1.8685714285714287e-05,
+ "loss": 0.8764,
+ "step": 327
+ },
+ {
+ "epoch": 1.8742857142857143,
+ "grad_norm": 43.80781173706055,
+ "learning_rate": 1.8742857142857143e-05,
+ "loss": 1.2376,
+ "step": 328
+ },
+ {
+ "epoch": 1.88,
+ "grad_norm": 83.95499420166016,
+ "learning_rate": 1.88e-05,
+ "loss": 0.981,
+ "step": 329
+ },
+ {
+ "epoch": 1.8857142857142857,
+ "grad_norm": 42.65687942504883,
+ "learning_rate": 1.885714285714286e-05,
+ "loss": 1.0849,
+ "step": 330
+ },
+ {
+ "epoch": 1.8914285714285715,
+ "grad_norm": 46.72631072998047,
+ "learning_rate": 1.8914285714285715e-05,
+ "loss": 1.2969,
+ "step": 331
+ },
+ {
+ "epoch": 1.8971428571428572,
+ "grad_norm": 76.61483001708984,
+ "learning_rate": 1.8971428571428575e-05,
+ "loss": 1.0192,
+ "step": 332
+ },
+ {
+ "epoch": 1.9028571428571428,
+ "grad_norm": 34.24771499633789,
+ "learning_rate": 1.9028571428571427e-05,
+ "loss": 1.0717,
+ "step": 333
+ },
+ {
+ "epoch": 1.9085714285714286,
+ "grad_norm": 124.33210754394531,
+ "learning_rate": 1.9085714285714287e-05,
+ "loss": 1.0499,
+ "step": 334
+ },
+ {
+ "epoch": 1.9142857142857141,
+ "grad_norm": 45.05997085571289,
+ "learning_rate": 1.9142857142857143e-05,
+ "loss": 1.1427,
+ "step": 335
+ },
+ {
+ "epoch": 1.92,
+ "grad_norm": 43.917236328125,
+ "learning_rate": 1.9200000000000003e-05,
+ "loss": 1.0491,
+ "step": 336
+ },
+ {
+ "epoch": 1.9257142857142857,
+ "grad_norm": 74.866455078125,
+ "learning_rate": 1.9257142857142855e-05,
+ "loss": 0.9418,
+ "step": 337
+ },
+ {
+ "epoch": 1.9314285714285715,
+ "grad_norm": 46.80175018310547,
+ "learning_rate": 1.9314285714285715e-05,
+ "loss": 1.3776,
+ "step": 338
+ },
+ {
+ "epoch": 1.9371428571428573,
+ "grad_norm": 69.39533233642578,
+ "learning_rate": 1.9371428571428574e-05,
+ "loss": 1.035,
+ "step": 339
+ },
+ {
+ "epoch": 1.9428571428571428,
+ "grad_norm": 79.26314544677734,
+ "learning_rate": 1.942857142857143e-05,
+ "loss": 1.0309,
+ "step": 340
+ },
+ {
+ "epoch": 1.9485714285714286,
+ "grad_norm": 91.3028793334961,
+ "learning_rate": 1.9485714285714286e-05,
+ "loss": 1.2112,
+ "step": 341
+ },
+ {
+ "epoch": 1.9542857142857142,
+ "grad_norm": 67.27446746826172,
+ "learning_rate": 1.9542857142857143e-05,
+ "loss": 1.0539,
+ "step": 342
+ },
+ {
+ "epoch": 1.96,
+ "grad_norm": 117.01478576660156,
+ "learning_rate": 1.9600000000000002e-05,
+ "loss": 0.8803,
+ "step": 343
+ },
+ {
+ "epoch": 1.9657142857142857,
+ "grad_norm": 49.12966537475586,
+ "learning_rate": 1.9657142857142858e-05,
+ "loss": 1.305,
+ "step": 344
+ },
+ {
+ "epoch": 1.9714285714285715,
+ "grad_norm": 36.83738708496094,
+ "learning_rate": 1.9714285714285714e-05,
+ "loss": 1.0059,
+ "step": 345
+ },
+ {
+ "epoch": 1.977142857142857,
+ "grad_norm": 55.849609375,
+ "learning_rate": 1.977142857142857e-05,
+ "loss": 1.0178,
+ "step": 346
+ },
+ {
+ "epoch": 1.9828571428571429,
+ "grad_norm": 47.24936294555664,
+ "learning_rate": 1.982857142857143e-05,
+ "loss": 1.1549,
+ "step": 347
+ },
+ {
+ "epoch": 1.9885714285714284,
+ "grad_norm": 38.20663070678711,
+ "learning_rate": 1.9885714285714286e-05,
+ "loss": 0.8695,
+ "step": 348
+ },
+ {
+ "epoch": 1.9942857142857142,
+ "grad_norm": 37.89916229248047,
+ "learning_rate": 1.9942857142857142e-05,
+ "loss": 1.0985,
+ "step": 349
+ },
+ {
+ "epoch": 2.0,
+ "grad_norm": 26.7008113861084,
+ "learning_rate": 2e-05,
+ "loss": 0.9978,
+ "step": 350
+ },
+ {
+ "epoch": 2.0,
+ "eval_classes": 0,
+ "eval_loss": 0.9191280007362366,
+ "eval_map": 0.287,
+ "eval_map_50": 0.3324,
+ "eval_map_75": 0.3026,
+ "eval_map_large": 0.2872,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.287,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.6952,
+ "eval_mar_10": 0.9121,
+ "eval_mar_100": 0.9737,
+ "eval_mar_100_per_class": 0.9737,
+ "eval_mar_large": 0.9737,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 18.6378,
+ "eval_samples_per_second": 15.774,
+ "eval_steps_per_second": 1.985,
+ "step": 350
+ },
+ {
+ "epoch": 2.005714285714286,
+ "grad_norm": 47.30525207519531,
+ "learning_rate": 2.0057142857142858e-05,
+ "loss": 0.9073,
+ "step": 351
+ },
+ {
+ "epoch": 2.0114285714285716,
+ "grad_norm": 54.44076919555664,
+ "learning_rate": 2.0114285714285717e-05,
+ "loss": 1.0188,
+ "step": 352
+ },
+ {
+ "epoch": 2.0171428571428573,
+ "grad_norm": 61.43099594116211,
+ "learning_rate": 2.0171428571428573e-05,
+ "loss": 0.8953,
+ "step": 353
+ },
+ {
+ "epoch": 2.0228571428571427,
+ "grad_norm": 52.021724700927734,
+ "learning_rate": 2.022857142857143e-05,
+ "loss": 1.2142,
+ "step": 354
+ },
+ {
+ "epoch": 2.0285714285714285,
+ "grad_norm": 96.9588394165039,
+ "learning_rate": 2.0285714285714286e-05,
+ "loss": 1.1424,
+ "step": 355
+ },
+ {
+ "epoch": 2.0342857142857143,
+ "grad_norm": 43.736534118652344,
+ "learning_rate": 2.0342857142857145e-05,
+ "loss": 1.0734,
+ "step": 356
+ },
+ {
+ "epoch": 2.04,
+ "grad_norm": 26.348413467407227,
+ "learning_rate": 2.04e-05,
+ "loss": 1.2237,
+ "step": 357
+ },
+ {
+ "epoch": 2.045714285714286,
+ "grad_norm": 92.93049621582031,
+ "learning_rate": 2.0457142857142857e-05,
+ "loss": 1.0244,
+ "step": 358
+ },
+ {
+ "epoch": 2.0514285714285716,
+ "grad_norm": 43.89330291748047,
+ "learning_rate": 2.0514285714285717e-05,
+ "loss": 1.1088,
+ "step": 359
+ },
+ {
+ "epoch": 2.057142857142857,
+ "grad_norm": 31.635217666625977,
+ "learning_rate": 2.0571428571428573e-05,
+ "loss": 0.9592,
+ "step": 360
+ },
+ {
+ "epoch": 2.0628571428571427,
+ "grad_norm": 47.123844146728516,
+ "learning_rate": 2.062857142857143e-05,
+ "loss": 0.9273,
+ "step": 361
+ },
+ {
+ "epoch": 2.0685714285714285,
+ "grad_norm": 70.71419525146484,
+ "learning_rate": 2.0685714285714285e-05,
+ "loss": 1.1692,
+ "step": 362
+ },
+ {
+ "epoch": 2.0742857142857143,
+ "grad_norm": 54.28295135498047,
+ "learning_rate": 2.0742857142857145e-05,
+ "loss": 0.9669,
+ "step": 363
+ },
+ {
+ "epoch": 2.08,
+ "grad_norm": 41.889286041259766,
+ "learning_rate": 2.08e-05,
+ "loss": 0.9807,
+ "step": 364
+ },
+ {
+ "epoch": 2.085714285714286,
+ "grad_norm": 88.92764282226562,
+ "learning_rate": 2.0857142857142857e-05,
+ "loss": 1.18,
+ "step": 365
+ },
+ {
+ "epoch": 2.0914285714285716,
+ "grad_norm": 43.853431701660156,
+ "learning_rate": 2.0914285714285713e-05,
+ "loss": 1.0387,
+ "step": 366
+ },
+ {
+ "epoch": 2.097142857142857,
+ "grad_norm": 42.877838134765625,
+ "learning_rate": 2.0971428571428572e-05,
+ "loss": 1.043,
+ "step": 367
+ },
+ {
+ "epoch": 2.1028571428571428,
+ "grad_norm": 44.86766052246094,
+ "learning_rate": 2.1028571428571432e-05,
+ "loss": 0.9026,
+ "step": 368
+ },
+ {
+ "epoch": 2.1085714285714285,
+ "grad_norm": 36.03720474243164,
+ "learning_rate": 2.1085714285714288e-05,
+ "loss": 0.9818,
+ "step": 369
+ },
+ {
+ "epoch": 2.1142857142857143,
+ "grad_norm": 60.320594787597656,
+ "learning_rate": 2.1142857142857144e-05,
+ "loss": 1.2608,
+ "step": 370
+ },
+ {
+ "epoch": 2.12,
+ "grad_norm": 81.33612060546875,
+ "learning_rate": 2.12e-05,
+ "loss": 1.1488,
+ "step": 371
+ },
+ {
+ "epoch": 2.125714285714286,
+ "grad_norm": 66.61579895019531,
+ "learning_rate": 2.125714285714286e-05,
+ "loss": 1.0263,
+ "step": 372
+ },
+ {
+ "epoch": 2.1314285714285712,
+ "grad_norm": 48.11174392700195,
+ "learning_rate": 2.1314285714285716e-05,
+ "loss": 0.8535,
+ "step": 373
+ },
+ {
+ "epoch": 2.137142857142857,
+ "grad_norm": 61.046600341796875,
+ "learning_rate": 2.1371428571428572e-05,
+ "loss": 1.0907,
+ "step": 374
+ },
+ {
+ "epoch": 2.142857142857143,
+ "grad_norm": 30.193220138549805,
+ "learning_rate": 2.1428571428571428e-05,
+ "loss": 1.1635,
+ "step": 375
+ },
+ {
+ "epoch": 2.1485714285714286,
+ "grad_norm": 48.01976776123047,
+ "learning_rate": 2.1485714285714288e-05,
+ "loss": 1.0505,
+ "step": 376
+ },
+ {
+ "epoch": 2.1542857142857144,
+ "grad_norm": 70.48137664794922,
+ "learning_rate": 2.1542857142857144e-05,
+ "loss": 0.9531,
+ "step": 377
+ },
+ {
+ "epoch": 2.16,
+ "grad_norm": 45.929988861083984,
+ "learning_rate": 2.16e-05,
+ "loss": 0.9404,
+ "step": 378
+ },
+ {
+ "epoch": 2.1657142857142855,
+ "grad_norm": 98.45175170898438,
+ "learning_rate": 2.165714285714286e-05,
+ "loss": 1.0931,
+ "step": 379
+ },
+ {
+ "epoch": 2.1714285714285713,
+ "grad_norm": 50.332740783691406,
+ "learning_rate": 2.1714285714285715e-05,
+ "loss": 1.4033,
+ "step": 380
+ },
+ {
+ "epoch": 2.177142857142857,
+ "grad_norm": 78.17147064208984,
+ "learning_rate": 2.177142857142857e-05,
+ "loss": 1.0196,
+ "step": 381
+ },
+ {
+ "epoch": 2.182857142857143,
+ "grad_norm": 45.67463684082031,
+ "learning_rate": 2.1828571428571428e-05,
+ "loss": 1.133,
+ "step": 382
+ },
+ {
+ "epoch": 2.1885714285714286,
+ "grad_norm": 57.02043533325195,
+ "learning_rate": 2.1885714285714287e-05,
+ "loss": 1.1489,
+ "step": 383
+ },
+ {
+ "epoch": 2.1942857142857144,
+ "grad_norm": 72.92861938476562,
+ "learning_rate": 2.1942857142857143e-05,
+ "loss": 1.1011,
+ "step": 384
+ },
+ {
+ "epoch": 2.2,
+ "grad_norm": 46.322547912597656,
+ "learning_rate": 2.2000000000000003e-05,
+ "loss": 1.2764,
+ "step": 385
+ },
+ {
+ "epoch": 2.2057142857142855,
+ "grad_norm": 50.237022399902344,
+ "learning_rate": 2.205714285714286e-05,
+ "loss": 1.1354,
+ "step": 386
+ },
+ {
+ "epoch": 2.2114285714285713,
+ "grad_norm": 143.770751953125,
+ "learning_rate": 2.2114285714285715e-05,
+ "loss": 1.147,
+ "step": 387
+ },
+ {
+ "epoch": 2.217142857142857,
+ "grad_norm": 58.19064712524414,
+ "learning_rate": 2.2171428571428575e-05,
+ "loss": 1.2218,
+ "step": 388
+ },
+ {
+ "epoch": 2.222857142857143,
+ "grad_norm": 133.52745056152344,
+ "learning_rate": 2.222857142857143e-05,
+ "loss": 1.3217,
+ "step": 389
+ },
+ {
+ "epoch": 2.2285714285714286,
+ "grad_norm": 55.836387634277344,
+ "learning_rate": 2.2285714285714287e-05,
+ "loss": 1.2342,
+ "step": 390
+ },
+ {
+ "epoch": 2.2342857142857144,
+ "grad_norm": 46.55270004272461,
+ "learning_rate": 2.2342857142857143e-05,
+ "loss": 1.1928,
+ "step": 391
+ },
+ {
+ "epoch": 2.24,
+ "grad_norm": 69.13031768798828,
+ "learning_rate": 2.2400000000000002e-05,
+ "loss": 1.3276,
+ "step": 392
+ },
+ {
+ "epoch": 2.2457142857142856,
+ "grad_norm": 94.66584777832031,
+ "learning_rate": 2.245714285714286e-05,
+ "loss": 1.3648,
+ "step": 393
+ },
+ {
+ "epoch": 2.2514285714285713,
+ "grad_norm": 132.82162475585938,
+ "learning_rate": 2.2514285714285715e-05,
+ "loss": 1.0906,
+ "step": 394
+ },
+ {
+ "epoch": 2.257142857142857,
+ "grad_norm": 166.76165771484375,
+ "learning_rate": 2.257142857142857e-05,
+ "loss": 1.1635,
+ "step": 395
+ },
+ {
+ "epoch": 2.262857142857143,
+ "grad_norm": 49.48722457885742,
+ "learning_rate": 2.262857142857143e-05,
+ "loss": 1.457,
+ "step": 396
+ },
+ {
+ "epoch": 2.2685714285714287,
+ "grad_norm": 51.98416519165039,
+ "learning_rate": 2.2685714285714286e-05,
+ "loss": 1.397,
+ "step": 397
+ },
+ {
+ "epoch": 2.2742857142857145,
+ "grad_norm": 268.32562255859375,
+ "learning_rate": 2.2742857142857142e-05,
+ "loss": 1.1811,
+ "step": 398
+ },
+ {
+ "epoch": 2.2800000000000002,
+ "grad_norm": 113.3601303100586,
+ "learning_rate": 2.2800000000000002e-05,
+ "loss": 1.6457,
+ "step": 399
+ },
+ {
+ "epoch": 2.2857142857142856,
+ "grad_norm": 42.322349548339844,
+ "learning_rate": 2.2857142857142858e-05,
+ "loss": 1.529,
+ "step": 400
+ },
+ {
+ "epoch": 2.2914285714285714,
+ "grad_norm": 37.9848518371582,
+ "learning_rate": 2.2914285714285718e-05,
+ "loss": 1.1678,
+ "step": 401
+ },
+ {
+ "epoch": 2.297142857142857,
+ "grad_norm": 58.746028900146484,
+ "learning_rate": 2.297142857142857e-05,
+ "loss": 1.0461,
+ "step": 402
+ },
+ {
+ "epoch": 2.302857142857143,
+ "grad_norm": 33.83110427856445,
+ "learning_rate": 2.302857142857143e-05,
+ "loss": 1.1699,
+ "step": 403
+ },
+ {
+ "epoch": 2.3085714285714287,
+ "grad_norm": 37.950626373291016,
+ "learning_rate": 2.3085714285714286e-05,
+ "loss": 0.8945,
+ "step": 404
+ },
+ {
+ "epoch": 2.314285714285714,
+ "grad_norm": 36.12480926513672,
+ "learning_rate": 2.3142857142857145e-05,
+ "loss": 0.9123,
+ "step": 405
+ },
+ {
+ "epoch": 2.32,
+ "grad_norm": 44.77686309814453,
+ "learning_rate": 2.32e-05,
+ "loss": 1.0462,
+ "step": 406
+ },
+ {
+ "epoch": 2.3257142857142856,
+ "grad_norm": 91.76225280761719,
+ "learning_rate": 2.3257142857142858e-05,
+ "loss": 1.1602,
+ "step": 407
+ },
+ {
+ "epoch": 2.3314285714285714,
+ "grad_norm": 46.81283950805664,
+ "learning_rate": 2.3314285714285717e-05,
+ "loss": 1.0097,
+ "step": 408
+ },
+ {
+ "epoch": 2.337142857142857,
+ "grad_norm": 78.25968170166016,
+ "learning_rate": 2.3371428571428573e-05,
+ "loss": 0.9829,
+ "step": 409
+ },
+ {
+ "epoch": 2.342857142857143,
+ "grad_norm": 38.0760498046875,
+ "learning_rate": 2.342857142857143e-05,
+ "loss": 1.1976,
+ "step": 410
+ },
+ {
+ "epoch": 2.3485714285714288,
+ "grad_norm": 118.48062896728516,
+ "learning_rate": 2.3485714285714285e-05,
+ "loss": 1.0831,
+ "step": 411
+ },
+ {
+ "epoch": 2.354285714285714,
+ "grad_norm": 63.731868743896484,
+ "learning_rate": 2.3542857142857145e-05,
+ "loss": 1.0006,
+ "step": 412
+ },
+ {
+ "epoch": 2.36,
+ "grad_norm": 57.7193603515625,
+ "learning_rate": 2.36e-05,
+ "loss": 0.8601,
+ "step": 413
+ },
+ {
+ "epoch": 2.3657142857142857,
+ "grad_norm": 65.94424438476562,
+ "learning_rate": 2.3657142857142857e-05,
+ "loss": 1.0073,
+ "step": 414
+ },
+ {
+ "epoch": 2.3714285714285714,
+ "grad_norm": 38.24591064453125,
+ "learning_rate": 2.3714285714285717e-05,
+ "loss": 1.0621,
+ "step": 415
+ },
+ {
+ "epoch": 2.3771428571428572,
+ "grad_norm": 53.88074493408203,
+ "learning_rate": 2.3771428571428573e-05,
+ "loss": 0.8198,
+ "step": 416
+ },
+ {
+ "epoch": 2.382857142857143,
+ "grad_norm": 45.4550895690918,
+ "learning_rate": 2.3828571428571432e-05,
+ "loss": 1.3931,
+ "step": 417
+ },
+ {
+ "epoch": 2.388571428571429,
+ "grad_norm": 42.56758117675781,
+ "learning_rate": 2.3885714285714285e-05,
+ "loss": 1.0437,
+ "step": 418
+ },
+ {
+ "epoch": 2.394285714285714,
+ "grad_norm": 54.02827072143555,
+ "learning_rate": 2.3942857142857144e-05,
+ "loss": 1.1002,
+ "step": 419
+ },
+ {
+ "epoch": 2.4,
+ "grad_norm": 65.14449310302734,
+ "learning_rate": 2.4e-05,
+ "loss": 1.2548,
+ "step": 420
+ },
+ {
+ "epoch": 2.4057142857142857,
+ "grad_norm": 60.62049102783203,
+ "learning_rate": 2.405714285714286e-05,
+ "loss": 1.0646,
+ "step": 421
+ },
+ {
+ "epoch": 2.4114285714285715,
+ "grad_norm": 47.858726501464844,
+ "learning_rate": 2.4114285714285713e-05,
+ "loss": 1.0466,
+ "step": 422
+ },
+ {
+ "epoch": 2.4171428571428573,
+ "grad_norm": 50.54081726074219,
+ "learning_rate": 2.4171428571428572e-05,
+ "loss": 1.0147,
+ "step": 423
+ },
+ {
+ "epoch": 2.422857142857143,
+ "grad_norm": 40.64912033081055,
+ "learning_rate": 2.4228571428571432e-05,
+ "loss": 0.9584,
+ "step": 424
+ },
+ {
+ "epoch": 2.4285714285714284,
+ "grad_norm": 26.813034057617188,
+ "learning_rate": 2.4285714285714288e-05,
+ "loss": 0.8747,
+ "step": 425
+ },
+ {
+ "epoch": 2.434285714285714,
+ "grad_norm": 55.92356491088867,
+ "learning_rate": 2.4342857142857144e-05,
+ "loss": 1.3426,
+ "step": 426
+ },
+ {
+ "epoch": 2.44,
+ "grad_norm": 102.78366088867188,
+ "learning_rate": 2.44e-05,
+ "loss": 0.8812,
+ "step": 427
+ },
+ {
+ "epoch": 2.4457142857142857,
+ "grad_norm": 34.32600021362305,
+ "learning_rate": 2.445714285714286e-05,
+ "loss": 1.0611,
+ "step": 428
+ },
+ {
+ "epoch": 2.4514285714285715,
+ "grad_norm": 58.62373733520508,
+ "learning_rate": 2.4514285714285716e-05,
+ "loss": 1.2773,
+ "step": 429
+ },
+ {
+ "epoch": 2.4571428571428573,
+ "grad_norm": 44.461082458496094,
+ "learning_rate": 2.4571428571428572e-05,
+ "loss": 1.0687,
+ "step": 430
+ },
+ {
+ "epoch": 2.4628571428571426,
+ "grad_norm": 34.10036087036133,
+ "learning_rate": 2.4628571428571428e-05,
+ "loss": 1.1176,
+ "step": 431
+ },
+ {
+ "epoch": 2.4685714285714284,
+ "grad_norm": 72.3791732788086,
+ "learning_rate": 2.4685714285714288e-05,
+ "loss": 0.8924,
+ "step": 432
+ },
+ {
+ "epoch": 2.474285714285714,
+ "grad_norm": 46.446556091308594,
+ "learning_rate": 2.4742857142857147e-05,
+ "loss": 1.203,
+ "step": 433
+ },
+ {
+ "epoch": 2.48,
+ "grad_norm": 32.066680908203125,
+ "learning_rate": 2.48e-05,
+ "loss": 1.2535,
+ "step": 434
+ },
+ {
+ "epoch": 2.4857142857142858,
+ "grad_norm": 46.31652069091797,
+ "learning_rate": 2.485714285714286e-05,
+ "loss": 1.5904,
+ "step": 435
+ },
+ {
+ "epoch": 2.4914285714285715,
+ "grad_norm": 42.79632568359375,
+ "learning_rate": 2.4914285714285715e-05,
+ "loss": 1.2958,
+ "step": 436
+ },
+ {
+ "epoch": 2.4971428571428573,
+ "grad_norm": 30.193653106689453,
+ "learning_rate": 2.4971428571428575e-05,
+ "loss": 1.0666,
+ "step": 437
+ },
+ {
+ "epoch": 2.5028571428571427,
+ "grad_norm": 78.6703872680664,
+ "learning_rate": 2.5028571428571428e-05,
+ "loss": 1.0003,
+ "step": 438
+ },
+ {
+ "epoch": 2.5085714285714285,
+ "grad_norm": 49.08283615112305,
+ "learning_rate": 2.5085714285714284e-05,
+ "loss": 0.9589,
+ "step": 439
+ },
+ {
+ "epoch": 2.5142857142857142,
+ "grad_norm": 27.450288772583008,
+ "learning_rate": 2.5142857142857147e-05,
+ "loss": 1.1667,
+ "step": 440
+ },
+ {
+ "epoch": 2.52,
+ "grad_norm": 120.01811981201172,
+ "learning_rate": 2.5200000000000003e-05,
+ "loss": 1.3021,
+ "step": 441
+ },
+ {
+ "epoch": 2.525714285714286,
+ "grad_norm": 54.62894058227539,
+ "learning_rate": 2.5257142857142855e-05,
+ "loss": 1.0864,
+ "step": 442
+ },
+ {
+ "epoch": 2.5314285714285716,
+ "grad_norm": 135.79263305664062,
+ "learning_rate": 2.5314285714285718e-05,
+ "loss": 1.1455,
+ "step": 443
+ },
+ {
+ "epoch": 2.5371428571428574,
+ "grad_norm": 32.32933807373047,
+ "learning_rate": 2.5371428571428574e-05,
+ "loss": 1.2702,
+ "step": 444
+ },
+ {
+ "epoch": 2.5428571428571427,
+ "grad_norm": 67.88202667236328,
+ "learning_rate": 2.542857142857143e-05,
+ "loss": 1.12,
+ "step": 445
+ },
+ {
+ "epoch": 2.5485714285714285,
+ "grad_norm": 54.43214416503906,
+ "learning_rate": 2.5485714285714287e-05,
+ "loss": 0.9457,
+ "step": 446
+ },
+ {
+ "epoch": 2.5542857142857143,
+ "grad_norm": 57.02019500732422,
+ "learning_rate": 2.5542857142857146e-05,
+ "loss": 1.0577,
+ "step": 447
+ },
+ {
+ "epoch": 2.56,
+ "grad_norm": 91.19580078125,
+ "learning_rate": 2.5600000000000002e-05,
+ "loss": 0.9295,
+ "step": 448
+ },
+ {
+ "epoch": 2.565714285714286,
+ "grad_norm": 70.9061279296875,
+ "learning_rate": 2.565714285714286e-05,
+ "loss": 1.1014,
+ "step": 449
+ },
+ {
+ "epoch": 2.571428571428571,
+ "grad_norm": 62.63069152832031,
+ "learning_rate": 2.5714285714285714e-05,
+ "loss": 0.9374,
+ "step": 450
+ },
+ {
+ "epoch": 2.5771428571428574,
+ "grad_norm": 54.828643798828125,
+ "learning_rate": 2.5771428571428574e-05,
+ "loss": 0.9032,
+ "step": 451
+ },
+ {
+ "epoch": 2.5828571428571427,
+ "grad_norm": 64.81256866455078,
+ "learning_rate": 2.582857142857143e-05,
+ "loss": 1.3023,
+ "step": 452
+ },
+ {
+ "epoch": 2.5885714285714285,
+ "grad_norm": 79.58660125732422,
+ "learning_rate": 2.5885714285714286e-05,
+ "loss": 0.849,
+ "step": 453
+ },
+ {
+ "epoch": 2.5942857142857143,
+ "grad_norm": 39.68455505371094,
+ "learning_rate": 2.5942857142857146e-05,
+ "loss": 1.2345,
+ "step": 454
+ },
+ {
+ "epoch": 2.6,
+ "grad_norm": 39.04762649536133,
+ "learning_rate": 2.6000000000000002e-05,
+ "loss": 1.0372,
+ "step": 455
+ },
+ {
+ "epoch": 2.605714285714286,
+ "grad_norm": 25.01093864440918,
+ "learning_rate": 2.6057142857142858e-05,
+ "loss": 0.9171,
+ "step": 456
+ },
+ {
+ "epoch": 2.611428571428571,
+ "grad_norm": 35.115135192871094,
+ "learning_rate": 2.6114285714285714e-05,
+ "loss": 1.1131,
+ "step": 457
+ },
+ {
+ "epoch": 2.617142857142857,
+ "grad_norm": 73.82764434814453,
+ "learning_rate": 2.6171428571428574e-05,
+ "loss": 1.02,
+ "step": 458
+ },
+ {
+ "epoch": 2.6228571428571428,
+ "grad_norm": 34.32424545288086,
+ "learning_rate": 2.622857142857143e-05,
+ "loss": 0.8844,
+ "step": 459
+ },
+ {
+ "epoch": 2.6285714285714286,
+ "grad_norm": 28.010997772216797,
+ "learning_rate": 2.6285714285714286e-05,
+ "loss": 1.1304,
+ "step": 460
+ },
+ {
+ "epoch": 2.6342857142857143,
+ "grad_norm": 49.68282699584961,
+ "learning_rate": 2.6342857142857142e-05,
+ "loss": 0.8364,
+ "step": 461
+ },
+ {
+ "epoch": 2.64,
+ "grad_norm": 30.49330711364746,
+ "learning_rate": 2.64e-05,
+ "loss": 1.0332,
+ "step": 462
+ },
+ {
+ "epoch": 2.645714285714286,
+ "grad_norm": 54.5880012512207,
+ "learning_rate": 2.6457142857142857e-05,
+ "loss": 1.0662,
+ "step": 463
+ },
+ {
+ "epoch": 2.6514285714285712,
+ "grad_norm": 33.37671661376953,
+ "learning_rate": 2.6514285714285714e-05,
+ "loss": 0.9994,
+ "step": 464
+ },
+ {
+ "epoch": 2.657142857142857,
+ "grad_norm": 83.88538360595703,
+ "learning_rate": 2.6571428571428576e-05,
+ "loss": 1.0517,
+ "step": 465
+ },
+ {
+ "epoch": 2.662857142857143,
+ "grad_norm": 43.34347152709961,
+ "learning_rate": 2.662857142857143e-05,
+ "loss": 1.0674,
+ "step": 466
+ },
+ {
+ "epoch": 2.6685714285714286,
+ "grad_norm": 60.542274475097656,
+ "learning_rate": 2.6685714285714285e-05,
+ "loss": 1.036,
+ "step": 467
+ },
+ {
+ "epoch": 2.6742857142857144,
+ "grad_norm": 25.915237426757812,
+ "learning_rate": 2.674285714285714e-05,
+ "loss": 1.033,
+ "step": 468
+ },
+ {
+ "epoch": 2.68,
+ "grad_norm": 25.848262786865234,
+ "learning_rate": 2.6800000000000004e-05,
+ "loss": 0.7942,
+ "step": 469
+ },
+ {
+ "epoch": 2.685714285714286,
+ "grad_norm": 36.423561096191406,
+ "learning_rate": 2.6857142857142857e-05,
+ "loss": 1.1182,
+ "step": 470
+ },
+ {
+ "epoch": 2.6914285714285713,
+ "grad_norm": 35.71501922607422,
+ "learning_rate": 2.6914285714285713e-05,
+ "loss": 0.9274,
+ "step": 471
+ },
+ {
+ "epoch": 2.697142857142857,
+ "grad_norm": 39.506500244140625,
+ "learning_rate": 2.6971428571428576e-05,
+ "loss": 0.7635,
+ "step": 472
+ },
+ {
+ "epoch": 2.702857142857143,
+ "grad_norm": 56.73497772216797,
+ "learning_rate": 2.7028571428571432e-05,
+ "loss": 1.04,
+ "step": 473
+ },
+ {
+ "epoch": 2.7085714285714286,
+ "grad_norm": 37.4494743347168,
+ "learning_rate": 2.7085714285714285e-05,
+ "loss": 0.956,
+ "step": 474
+ },
+ {
+ "epoch": 2.7142857142857144,
+ "grad_norm": 42.21010971069336,
+ "learning_rate": 2.714285714285714e-05,
+ "loss": 0.9411,
+ "step": 475
+ },
+ {
+ "epoch": 2.7199999999999998,
+ "grad_norm": 34.31499099731445,
+ "learning_rate": 2.7200000000000004e-05,
+ "loss": 1.1114,
+ "step": 476
+ },
+ {
+ "epoch": 2.725714285714286,
+ "grad_norm": 53.104976654052734,
+ "learning_rate": 2.725714285714286e-05,
+ "loss": 1.0545,
+ "step": 477
+ },
+ {
+ "epoch": 2.7314285714285713,
+ "grad_norm": 40.905887603759766,
+ "learning_rate": 2.7314285714285716e-05,
+ "loss": 1.1411,
+ "step": 478
+ },
+ {
+ "epoch": 2.737142857142857,
+ "grad_norm": 29.45627212524414,
+ "learning_rate": 2.737142857142857e-05,
+ "loss": 1.0638,
+ "step": 479
+ },
+ {
+ "epoch": 2.742857142857143,
+ "grad_norm": 41.70409393310547,
+ "learning_rate": 2.742857142857143e-05,
+ "loss": 0.9043,
+ "step": 480
+ },
+ {
+ "epoch": 2.7485714285714287,
+ "grad_norm": 46.97590637207031,
+ "learning_rate": 2.7485714285714288e-05,
+ "loss": 0.9821,
+ "step": 481
+ },
+ {
+ "epoch": 2.7542857142857144,
+ "grad_norm": 64.2392807006836,
+ "learning_rate": 2.7542857142857144e-05,
+ "loss": 0.9294,
+ "step": 482
+ },
+ {
+ "epoch": 2.76,
+ "grad_norm": 30.05347442626953,
+ "learning_rate": 2.7600000000000003e-05,
+ "loss": 1.1204,
+ "step": 483
+ },
+ {
+ "epoch": 2.7657142857142856,
+ "grad_norm": 55.71738815307617,
+ "learning_rate": 2.765714285714286e-05,
+ "loss": 0.9528,
+ "step": 484
+ },
+ {
+ "epoch": 2.7714285714285714,
+ "grad_norm": 73.06256866455078,
+ "learning_rate": 2.7714285714285716e-05,
+ "loss": 0.9858,
+ "step": 485
+ },
+ {
+ "epoch": 2.777142857142857,
+ "grad_norm": 91.91522979736328,
+ "learning_rate": 2.7771428571428572e-05,
+ "loss": 1.0217,
+ "step": 486
+ },
+ {
+ "epoch": 2.782857142857143,
+ "grad_norm": 38.642330169677734,
+ "learning_rate": 2.782857142857143e-05,
+ "loss": 0.7838,
+ "step": 487
+ },
+ {
+ "epoch": 2.7885714285714287,
+ "grad_norm": 44.415470123291016,
+ "learning_rate": 2.7885714285714287e-05,
+ "loss": 0.8804,
+ "step": 488
+ },
+ {
+ "epoch": 2.7942857142857145,
+ "grad_norm": 45.02664566040039,
+ "learning_rate": 2.7942857142857143e-05,
+ "loss": 1.4952,
+ "step": 489
+ },
+ {
+ "epoch": 2.8,
+ "grad_norm": 66.58822631835938,
+ "learning_rate": 2.8000000000000003e-05,
+ "loss": 0.8409,
+ "step": 490
+ },
+ {
+ "epoch": 2.8057142857142856,
+ "grad_norm": 35.23710632324219,
+ "learning_rate": 2.805714285714286e-05,
+ "loss": 1.3371,
+ "step": 491
+ },
+ {
+ "epoch": 2.8114285714285714,
+ "grad_norm": 41.119258880615234,
+ "learning_rate": 2.8114285714285715e-05,
+ "loss": 0.9477,
+ "step": 492
+ },
+ {
+ "epoch": 2.817142857142857,
+ "grad_norm": 223.302734375,
+ "learning_rate": 2.817142857142857e-05,
+ "loss": 0.8615,
+ "step": 493
+ },
+ {
+ "epoch": 2.822857142857143,
+ "grad_norm": 59.39470672607422,
+ "learning_rate": 2.822857142857143e-05,
+ "loss": 1.1558,
+ "step": 494
+ },
+ {
+ "epoch": 2.8285714285714287,
+ "grad_norm": 45.34219741821289,
+ "learning_rate": 2.8285714285714287e-05,
+ "loss": 0.7759,
+ "step": 495
+ },
+ {
+ "epoch": 2.8342857142857145,
+ "grad_norm": 36.19575119018555,
+ "learning_rate": 2.8342857142857143e-05,
+ "loss": 0.9873,
+ "step": 496
+ },
+ {
+ "epoch": 2.84,
+ "grad_norm": 77.39373779296875,
+ "learning_rate": 2.84e-05,
+ "loss": 0.8189,
+ "step": 497
+ },
+ {
+ "epoch": 2.8457142857142856,
+ "grad_norm": 60.195552825927734,
+ "learning_rate": 2.845714285714286e-05,
+ "loss": 0.9145,
+ "step": 498
+ },
+ {
+ "epoch": 2.8514285714285714,
+ "grad_norm": 64.12262725830078,
+ "learning_rate": 2.8514285714285715e-05,
+ "loss": 0.8064,
+ "step": 499
+ },
+ {
+ "epoch": 2.857142857142857,
+ "grad_norm": 33.90835952758789,
+ "learning_rate": 2.857142857142857e-05,
+ "loss": 0.9596,
+ "step": 500
+ },
+ {
+ "epoch": 2.862857142857143,
+ "grad_norm": 317.7658996582031,
+ "learning_rate": 2.8628571428571434e-05,
+ "loss": 1.1806,
+ "step": 501
+ },
+ {
+ "epoch": 2.8685714285714283,
+ "grad_norm": 51.08092498779297,
+ "learning_rate": 2.8685714285714286e-05,
+ "loss": 1.0974,
+ "step": 502
+ },
+ {
+ "epoch": 2.8742857142857146,
+ "grad_norm": 59.075836181640625,
+ "learning_rate": 2.8742857142857143e-05,
+ "loss": 1.0331,
+ "step": 503
+ },
+ {
+ "epoch": 2.88,
+ "grad_norm": 52.50455093383789,
+ "learning_rate": 2.88e-05,
+ "loss": 0.9406,
+ "step": 504
+ },
+ {
+ "epoch": 2.8857142857142857,
+ "grad_norm": 60.88606643676758,
+ "learning_rate": 2.885714285714286e-05,
+ "loss": 0.9156,
+ "step": 505
+ },
+ {
+ "epoch": 2.8914285714285715,
+ "grad_norm": 87.92547607421875,
+ "learning_rate": 2.8914285714285714e-05,
+ "loss": 1.2719,
+ "step": 506
+ },
+ {
+ "epoch": 2.8971428571428572,
+ "grad_norm": 39.06029510498047,
+ "learning_rate": 2.897142857142857e-05,
+ "loss": 1.0777,
+ "step": 507
+ },
+ {
+ "epoch": 2.902857142857143,
+ "grad_norm": 194.26075744628906,
+ "learning_rate": 2.9028571428571427e-05,
+ "loss": 0.9401,
+ "step": 508
+ },
+ {
+ "epoch": 2.9085714285714284,
+ "grad_norm": 56.89970016479492,
+ "learning_rate": 2.908571428571429e-05,
+ "loss": 0.9576,
+ "step": 509
+ },
+ {
+ "epoch": 2.914285714285714,
+ "grad_norm": 38.246788024902344,
+ "learning_rate": 2.9142857142857146e-05,
+ "loss": 1.0757,
+ "step": 510
+ },
+ {
+ "epoch": 2.92,
+ "grad_norm": 39.16490936279297,
+ "learning_rate": 2.9199999999999998e-05,
+ "loss": 1.2789,
+ "step": 511
+ },
+ {
+ "epoch": 2.9257142857142857,
+ "grad_norm": 68.37799835205078,
+ "learning_rate": 2.925714285714286e-05,
+ "loss": 0.7949,
+ "step": 512
+ },
+ {
+ "epoch": 2.9314285714285715,
+ "grad_norm": 46.202537536621094,
+ "learning_rate": 2.9314285714285717e-05,
+ "loss": 0.9752,
+ "step": 513
+ },
+ {
+ "epoch": 2.9371428571428573,
+ "grad_norm": 25.075580596923828,
+ "learning_rate": 2.9371428571428573e-05,
+ "loss": 0.9919,
+ "step": 514
+ },
+ {
+ "epoch": 2.942857142857143,
+ "grad_norm": 45.28491973876953,
+ "learning_rate": 2.9428571428571426e-05,
+ "loss": 1.0573,
+ "step": 515
+ },
+ {
+ "epoch": 2.9485714285714284,
+ "grad_norm": 69.61450958251953,
+ "learning_rate": 2.948571428571429e-05,
+ "loss": 1.1779,
+ "step": 516
+ },
+ {
+ "epoch": 2.954285714285714,
+ "grad_norm": 32.18259048461914,
+ "learning_rate": 2.9542857142857145e-05,
+ "loss": 0.8433,
+ "step": 517
+ },
+ {
+ "epoch": 2.96,
+ "grad_norm": 46.77888488769531,
+ "learning_rate": 2.96e-05,
+ "loss": 1.1483,
+ "step": 518
+ },
+ {
+ "epoch": 2.9657142857142857,
+ "grad_norm": 42.754432678222656,
+ "learning_rate": 2.965714285714286e-05,
+ "loss": 0.9295,
+ "step": 519
+ },
+ {
+ "epoch": 2.9714285714285715,
+ "grad_norm": 48.782291412353516,
+ "learning_rate": 2.9714285714285717e-05,
+ "loss": 1.3811,
+ "step": 520
+ },
+ {
+ "epoch": 2.977142857142857,
+ "grad_norm": 76.11039733886719,
+ "learning_rate": 2.9771428571428573e-05,
+ "loss": 1.3558,
+ "step": 521
+ },
+ {
+ "epoch": 2.982857142857143,
+ "grad_norm": 63.38977813720703,
+ "learning_rate": 2.982857142857143e-05,
+ "loss": 1.1242,
+ "step": 522
+ },
+ {
+ "epoch": 2.9885714285714284,
+ "grad_norm": 37.63509750366211,
+ "learning_rate": 2.988571428571429e-05,
+ "loss": 1.2458,
+ "step": 523
+ },
+ {
+ "epoch": 2.994285714285714,
+ "grad_norm": 68.86089324951172,
+ "learning_rate": 2.9942857142857145e-05,
+ "loss": 0.8439,
+ "step": 524
+ },
+ {
+ "epoch": 3.0,
+ "grad_norm": 50.35411834716797,
+ "learning_rate": 3e-05,
+ "loss": 0.8578,
+ "step": 525
+ },
+ {
+ "epoch": 3.0,
+ "eval_classes": 0,
+ "eval_loss": 0.8842275142669678,
+ "eval_map": 0.5188,
+ "eval_map_50": 0.5846,
+ "eval_map_75": 0.5479,
+ "eval_map_large": 0.5189,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.5188,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.6949,
+ "eval_mar_10": 0.9317,
+ "eval_mar_100": 0.9768,
+ "eval_mar_100_per_class": 0.9768,
+ "eval_mar_large": 0.9768,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 19.1252,
+ "eval_samples_per_second": 15.372,
+ "eval_steps_per_second": 1.935,
+ "step": 525
+ },
+ {
+ "epoch": 3.005714285714286,
+ "grad_norm": 47.68229293823242,
+ "learning_rate": 3.0057142857142857e-05,
+ "loss": 0.7792,
+ "step": 526
+ },
+ {
+ "epoch": 3.0114285714285716,
+ "grad_norm": 59.04511260986328,
+ "learning_rate": 3.0114285714285716e-05,
+ "loss": 0.85,
+ "step": 527
+ },
+ {
+ "epoch": 3.0171428571428573,
+ "grad_norm": 61.92051315307617,
+ "learning_rate": 3.0171428571428572e-05,
+ "loss": 0.9446,
+ "step": 528
+ },
+ {
+ "epoch": 3.0228571428571427,
+ "grad_norm": 30.297792434692383,
+ "learning_rate": 3.022857142857143e-05,
+ "loss": 1.1596,
+ "step": 529
+ },
+ {
+ "epoch": 3.0285714285714285,
+ "grad_norm": 54.8390007019043,
+ "learning_rate": 3.0285714285714288e-05,
+ "loss": 0.852,
+ "step": 530
+ },
+ {
+ "epoch": 3.0342857142857143,
+ "grad_norm": 45.49747848510742,
+ "learning_rate": 3.0342857142857144e-05,
+ "loss": 0.8763,
+ "step": 531
+ },
+ {
+ "epoch": 3.04,
+ "grad_norm": 52.076026916503906,
+ "learning_rate": 3.04e-05,
+ "loss": 1.164,
+ "step": 532
+ },
+ {
+ "epoch": 3.045714285714286,
+ "grad_norm": 28.280960083007812,
+ "learning_rate": 3.0457142857142856e-05,
+ "loss": 0.8567,
+ "step": 533
+ },
+ {
+ "epoch": 3.0514285714285716,
+ "grad_norm": 43.7817268371582,
+ "learning_rate": 3.0514285714285716e-05,
+ "loss": 1.1253,
+ "step": 534
+ },
+ {
+ "epoch": 3.057142857142857,
+ "grad_norm": 24.932104110717773,
+ "learning_rate": 3.057142857142857e-05,
+ "loss": 1.0088,
+ "step": 535
+ },
+ {
+ "epoch": 3.0628571428571427,
+ "grad_norm": 74.39136505126953,
+ "learning_rate": 3.062857142857143e-05,
+ "loss": 0.7345,
+ "step": 536
+ },
+ {
+ "epoch": 3.0685714285714285,
+ "grad_norm": 30.405521392822266,
+ "learning_rate": 3.068571428571429e-05,
+ "loss": 0.7931,
+ "step": 537
+ },
+ {
+ "epoch": 3.0742857142857143,
+ "grad_norm": 65.0182876586914,
+ "learning_rate": 3.0742857142857144e-05,
+ "loss": 0.7928,
+ "step": 538
+ },
+ {
+ "epoch": 3.08,
+ "grad_norm": 98.8231201171875,
+ "learning_rate": 3.08e-05,
+ "loss": 0.9738,
+ "step": 539
+ },
+ {
+ "epoch": 3.085714285714286,
+ "grad_norm": 54.99164581298828,
+ "learning_rate": 3.0857142857142856e-05,
+ "loss": 1.0894,
+ "step": 540
+ },
+ {
+ "epoch": 3.0914285714285716,
+ "grad_norm": 44.222469329833984,
+ "learning_rate": 3.0914285714285715e-05,
+ "loss": 0.867,
+ "step": 541
+ },
+ {
+ "epoch": 3.097142857142857,
+ "grad_norm": 73.86441040039062,
+ "learning_rate": 3.0971428571428575e-05,
+ "loss": 0.9246,
+ "step": 542
+ },
+ {
+ "epoch": 3.1028571428571428,
+ "grad_norm": 28.863567352294922,
+ "learning_rate": 3.102857142857143e-05,
+ "loss": 0.9498,
+ "step": 543
+ },
+ {
+ "epoch": 3.1085714285714285,
+ "grad_norm": 66.83555603027344,
+ "learning_rate": 3.108571428571429e-05,
+ "loss": 1.0356,
+ "step": 544
+ },
+ {
+ "epoch": 3.1142857142857143,
+ "grad_norm": 41.72322463989258,
+ "learning_rate": 3.114285714285715e-05,
+ "loss": 0.9656,
+ "step": 545
+ },
+ {
+ "epoch": 3.12,
+ "grad_norm": 49.59086608886719,
+ "learning_rate": 3.12e-05,
+ "loss": 0.8414,
+ "step": 546
+ },
+ {
+ "epoch": 3.125714285714286,
+ "grad_norm": 45.67478942871094,
+ "learning_rate": 3.125714285714286e-05,
+ "loss": 1.1142,
+ "step": 547
+ },
+ {
+ "epoch": 3.1314285714285712,
+ "grad_norm": 57.42824172973633,
+ "learning_rate": 3.131428571428572e-05,
+ "loss": 1.0558,
+ "step": 548
+ },
+ {
+ "epoch": 3.137142857142857,
+ "grad_norm": 51.68975830078125,
+ "learning_rate": 3.137142857142857e-05,
+ "loss": 0.7937,
+ "step": 549
+ },
+ {
+ "epoch": 3.142857142857143,
+ "grad_norm": 46.31576919555664,
+ "learning_rate": 3.142857142857143e-05,
+ "loss": 0.9352,
+ "step": 550
+ },
+ {
+ "epoch": 3.1485714285714286,
+ "grad_norm": 38.15770721435547,
+ "learning_rate": 3.148571428571428e-05,
+ "loss": 0.8738,
+ "step": 551
+ },
+ {
+ "epoch": 3.1542857142857144,
+ "grad_norm": 74.9398193359375,
+ "learning_rate": 3.154285714285714e-05,
+ "loss": 1.0197,
+ "step": 552
+ },
+ {
+ "epoch": 3.16,
+ "grad_norm": 92.85323333740234,
+ "learning_rate": 3.16e-05,
+ "loss": 0.857,
+ "step": 553
+ },
+ {
+ "epoch": 3.1657142857142855,
+ "grad_norm": 55.174171447753906,
+ "learning_rate": 3.1657142857142855e-05,
+ "loss": 0.8328,
+ "step": 554
+ },
+ {
+ "epoch": 3.1714285714285713,
+ "grad_norm": 41.23771286010742,
+ "learning_rate": 3.1714285714285715e-05,
+ "loss": 1.0083,
+ "step": 555
+ },
+ {
+ "epoch": 3.177142857142857,
+ "grad_norm": 37.808570861816406,
+ "learning_rate": 3.1771428571428574e-05,
+ "loss": 0.9961,
+ "step": 556
+ },
+ {
+ "epoch": 3.182857142857143,
+ "grad_norm": 119.2252426147461,
+ "learning_rate": 3.182857142857143e-05,
+ "loss": 0.9984,
+ "step": 557
+ },
+ {
+ "epoch": 3.1885714285714286,
+ "grad_norm": 52.03147888183594,
+ "learning_rate": 3.1885714285714286e-05,
+ "loss": 1.1348,
+ "step": 558
+ },
+ {
+ "epoch": 3.1942857142857144,
+ "grad_norm": 43.718875885009766,
+ "learning_rate": 3.1942857142857146e-05,
+ "loss": 0.9218,
+ "step": 559
+ },
+ {
+ "epoch": 3.2,
+ "grad_norm": 33.92399978637695,
+ "learning_rate": 3.2000000000000005e-05,
+ "loss": 1.4163,
+ "step": 560
+ },
+ {
+ "epoch": 3.2057142857142855,
+ "grad_norm": 63.245704650878906,
+ "learning_rate": 3.205714285714286e-05,
+ "loss": 0.9589,
+ "step": 561
+ },
+ {
+ "epoch": 3.2114285714285713,
+ "grad_norm": 26.542739868164062,
+ "learning_rate": 3.211428571428571e-05,
+ "loss": 0.9147,
+ "step": 562
+ },
+ {
+ "epoch": 3.217142857142857,
+ "grad_norm": 116.9144287109375,
+ "learning_rate": 3.217142857142858e-05,
+ "loss": 1.251,
+ "step": 563
+ },
+ {
+ "epoch": 3.222857142857143,
+ "grad_norm": 45.26572036743164,
+ "learning_rate": 3.222857142857143e-05,
+ "loss": 0.7846,
+ "step": 564
+ },
+ {
+ "epoch": 3.2285714285714286,
+ "grad_norm": 40.648685455322266,
+ "learning_rate": 3.228571428571428e-05,
+ "loss": 0.778,
+ "step": 565
+ },
+ {
+ "epoch": 3.2342857142857144,
+ "grad_norm": 79.68229675292969,
+ "learning_rate": 3.234285714285715e-05,
+ "loss": 1.1479,
+ "step": 566
+ },
+ {
+ "epoch": 3.24,
+ "grad_norm": 62.468353271484375,
+ "learning_rate": 3.24e-05,
+ "loss": 0.873,
+ "step": 567
+ },
+ {
+ "epoch": 3.2457142857142856,
+ "grad_norm": 46.446678161621094,
+ "learning_rate": 3.245714285714286e-05,
+ "loss": 0.7665,
+ "step": 568
+ },
+ {
+ "epoch": 3.2514285714285713,
+ "grad_norm": 30.566055297851562,
+ "learning_rate": 3.2514285714285714e-05,
+ "loss": 0.9141,
+ "step": 569
+ },
+ {
+ "epoch": 3.257142857142857,
+ "grad_norm": 43.58727264404297,
+ "learning_rate": 3.257142857142857e-05,
+ "loss": 0.8457,
+ "step": 570
+ },
+ {
+ "epoch": 3.262857142857143,
+ "grad_norm": 38.891563415527344,
+ "learning_rate": 3.262857142857143e-05,
+ "loss": 0.7681,
+ "step": 571
+ },
+ {
+ "epoch": 3.2685714285714287,
+ "grad_norm": 73.75027465820312,
+ "learning_rate": 3.2685714285714285e-05,
+ "loss": 1.3875,
+ "step": 572
+ },
+ {
+ "epoch": 3.2742857142857145,
+ "grad_norm": 21.43327522277832,
+ "learning_rate": 3.2742857142857145e-05,
+ "loss": 0.948,
+ "step": 573
+ },
+ {
+ "epoch": 3.2800000000000002,
+ "grad_norm": 47.069461822509766,
+ "learning_rate": 3.2800000000000004e-05,
+ "loss": 0.8676,
+ "step": 574
+ },
+ {
+ "epoch": 3.2857142857142856,
+ "grad_norm": 36.93059158325195,
+ "learning_rate": 3.285714285714286e-05,
+ "loss": 1.0315,
+ "step": 575
+ },
+ {
+ "epoch": 3.2914285714285714,
+ "grad_norm": 38.3972282409668,
+ "learning_rate": 3.291428571428572e-05,
+ "loss": 0.92,
+ "step": 576
+ },
+ {
+ "epoch": 3.297142857142857,
+ "grad_norm": 32.330604553222656,
+ "learning_rate": 3.2971428571428576e-05,
+ "loss": 1.0177,
+ "step": 577
+ },
+ {
+ "epoch": 3.302857142857143,
+ "grad_norm": 60.15842819213867,
+ "learning_rate": 3.302857142857143e-05,
+ "loss": 0.856,
+ "step": 578
+ },
+ {
+ "epoch": 3.3085714285714287,
+ "grad_norm": 29.508712768554688,
+ "learning_rate": 3.308571428571429e-05,
+ "loss": 0.9265,
+ "step": 579
+ },
+ {
+ "epoch": 3.314285714285714,
+ "grad_norm": 26.6241512298584,
+ "learning_rate": 3.314285714285714e-05,
+ "loss": 1.0629,
+ "step": 580
+ },
+ {
+ "epoch": 3.32,
+ "grad_norm": 55.71906280517578,
+ "learning_rate": 3.32e-05,
+ "loss": 0.8574,
+ "step": 581
+ },
+ {
+ "epoch": 3.3257142857142856,
+ "grad_norm": 32.77925109863281,
+ "learning_rate": 3.325714285714286e-05,
+ "loss": 0.913,
+ "step": 582
+ },
+ {
+ "epoch": 3.3314285714285714,
+ "grad_norm": 391.8411560058594,
+ "learning_rate": 3.331428571428571e-05,
+ "loss": 0.8752,
+ "step": 583
+ },
+ {
+ "epoch": 3.337142857142857,
+ "grad_norm": 88.31011199951172,
+ "learning_rate": 3.337142857142857e-05,
+ "loss": 0.9255,
+ "step": 584
+ },
+ {
+ "epoch": 3.342857142857143,
+ "grad_norm": 45.067344665527344,
+ "learning_rate": 3.342857142857143e-05,
+ "loss": 0.989,
+ "step": 585
+ },
+ {
+ "epoch": 3.3485714285714288,
+ "grad_norm": 26.812660217285156,
+ "learning_rate": 3.3485714285714285e-05,
+ "loss": 0.7538,
+ "step": 586
+ },
+ {
+ "epoch": 3.354285714285714,
+ "grad_norm": 62.75189208984375,
+ "learning_rate": 3.3542857142857144e-05,
+ "loss": 0.9422,
+ "step": 587
+ },
+ {
+ "epoch": 3.36,
+ "grad_norm": 47.160892486572266,
+ "learning_rate": 3.3600000000000004e-05,
+ "loss": 1.0516,
+ "step": 588
+ },
+ {
+ "epoch": 3.3657142857142857,
+ "grad_norm": 45.98322677612305,
+ "learning_rate": 3.3657142857142856e-05,
+ "loss": 1.0043,
+ "step": 589
+ },
+ {
+ "epoch": 3.3714285714285714,
+ "grad_norm": 49.24179458618164,
+ "learning_rate": 3.3714285714285716e-05,
+ "loss": 0.8443,
+ "step": 590
+ },
+ {
+ "epoch": 3.3771428571428572,
+ "grad_norm": 28.958084106445312,
+ "learning_rate": 3.377142857142857e-05,
+ "loss": 1.1926,
+ "step": 591
+ },
+ {
+ "epoch": 3.382857142857143,
+ "grad_norm": 47.162452697753906,
+ "learning_rate": 3.3828571428571435e-05,
+ "loss": 0.796,
+ "step": 592
+ },
+ {
+ "epoch": 3.388571428571429,
+ "grad_norm": 98.75343322753906,
+ "learning_rate": 3.388571428571429e-05,
+ "loss": 0.8616,
+ "step": 593
+ },
+ {
+ "epoch": 3.394285714285714,
+ "grad_norm": 40.074527740478516,
+ "learning_rate": 3.394285714285714e-05,
+ "loss": 0.9336,
+ "step": 594
+ },
+ {
+ "epoch": 3.4,
+ "grad_norm": 37.29640197753906,
+ "learning_rate": 3.4000000000000007e-05,
+ "loss": 1.0572,
+ "step": 595
+ },
+ {
+ "epoch": 3.4057142857142857,
+ "grad_norm": 27.140880584716797,
+ "learning_rate": 3.405714285714286e-05,
+ "loss": 1.0782,
+ "step": 596
+ },
+ {
+ "epoch": 3.4114285714285715,
+ "grad_norm": 40.091896057128906,
+ "learning_rate": 3.411428571428571e-05,
+ "loss": 1.0635,
+ "step": 597
+ },
+ {
+ "epoch": 3.4171428571428573,
+ "grad_norm": 24.463085174560547,
+ "learning_rate": 3.417142857142857e-05,
+ "loss": 0.9266,
+ "step": 598
+ },
+ {
+ "epoch": 3.422857142857143,
+ "grad_norm": 30.32758903503418,
+ "learning_rate": 3.422857142857143e-05,
+ "loss": 0.942,
+ "step": 599
+ },
+ {
+ "epoch": 3.4285714285714284,
+ "grad_norm": 39.49159622192383,
+ "learning_rate": 3.428571428571429e-05,
+ "loss": 0.8498,
+ "step": 600
+ },
+ {
+ "epoch": 3.434285714285714,
+ "grad_norm": 39.12627410888672,
+ "learning_rate": 3.434285714285714e-05,
+ "loss": 0.6839,
+ "step": 601
+ },
+ {
+ "epoch": 3.44,
+ "grad_norm": 38.3946647644043,
+ "learning_rate": 3.4399999999999996e-05,
+ "loss": 0.971,
+ "step": 602
+ },
+ {
+ "epoch": 3.4457142857142857,
+ "grad_norm": 73.4950180053711,
+ "learning_rate": 3.445714285714286e-05,
+ "loss": 1.1149,
+ "step": 603
+ },
+ {
+ "epoch": 3.4514285714285715,
+ "grad_norm": 84.41697692871094,
+ "learning_rate": 3.4514285714285715e-05,
+ "loss": 0.8855,
+ "step": 604
+ },
+ {
+ "epoch": 3.4571428571428573,
+ "grad_norm": 32.518768310546875,
+ "learning_rate": 3.4571428571428574e-05,
+ "loss": 1.0337,
+ "step": 605
+ },
+ {
+ "epoch": 3.4628571428571426,
+ "grad_norm": 45.78841781616211,
+ "learning_rate": 3.4628571428571434e-05,
+ "loss": 0.9791,
+ "step": 606
+ },
+ {
+ "epoch": 3.4685714285714284,
+ "grad_norm": 45.05651092529297,
+ "learning_rate": 3.468571428571429e-05,
+ "loss": 1.1856,
+ "step": 607
+ },
+ {
+ "epoch": 3.474285714285714,
+ "grad_norm": 129.0909881591797,
+ "learning_rate": 3.4742857142857146e-05,
+ "loss": 0.809,
+ "step": 608
+ },
+ {
+ "epoch": 3.48,
+ "grad_norm": 49.62913131713867,
+ "learning_rate": 3.48e-05,
+ "loss": 0.9248,
+ "step": 609
+ },
+ {
+ "epoch": 3.4857142857142858,
+ "grad_norm": 96.46662139892578,
+ "learning_rate": 3.485714285714286e-05,
+ "loss": 0.9165,
+ "step": 610
+ },
+ {
+ "epoch": 3.4914285714285715,
+ "grad_norm": 66.09587097167969,
+ "learning_rate": 3.491428571428572e-05,
+ "loss": 0.7754,
+ "step": 611
+ },
+ {
+ "epoch": 3.4971428571428573,
+ "grad_norm": 54.851680755615234,
+ "learning_rate": 3.497142857142857e-05,
+ "loss": 0.9728,
+ "step": 612
+ },
+ {
+ "epoch": 3.5028571428571427,
+ "grad_norm": 44.88764953613281,
+ "learning_rate": 3.502857142857143e-05,
+ "loss": 1.0519,
+ "step": 613
+ },
+ {
+ "epoch": 3.5085714285714285,
+ "grad_norm": 40.02257537841797,
+ "learning_rate": 3.508571428571429e-05,
+ "loss": 0.9248,
+ "step": 614
+ },
+ {
+ "epoch": 3.5142857142857142,
+ "grad_norm": 78.39165496826172,
+ "learning_rate": 3.514285714285714e-05,
+ "loss": 1.0941,
+ "step": 615
+ },
+ {
+ "epoch": 3.52,
+ "grad_norm": 41.34626770019531,
+ "learning_rate": 3.52e-05,
+ "loss": 0.9319,
+ "step": 616
+ },
+ {
+ "epoch": 3.525714285714286,
+ "grad_norm": 41.48112869262695,
+ "learning_rate": 3.525714285714286e-05,
+ "loss": 0.9119,
+ "step": 617
+ },
+ {
+ "epoch": 3.5314285714285716,
+ "grad_norm": 117.76349639892578,
+ "learning_rate": 3.5314285714285714e-05,
+ "loss": 0.6957,
+ "step": 618
+ },
+ {
+ "epoch": 3.5371428571428574,
+ "grad_norm": 55.89582061767578,
+ "learning_rate": 3.5371428571428574e-05,
+ "loss": 1.2014,
+ "step": 619
+ },
+ {
+ "epoch": 3.5428571428571427,
+ "grad_norm": 47.29049301147461,
+ "learning_rate": 3.5428571428571426e-05,
+ "loss": 0.9072,
+ "step": 620
+ },
+ {
+ "epoch": 3.5485714285714285,
+ "grad_norm": 52.945316314697266,
+ "learning_rate": 3.5485714285714286e-05,
+ "loss": 1.0195,
+ "step": 621
+ },
+ {
+ "epoch": 3.5542857142857143,
+ "grad_norm": 65.0621109008789,
+ "learning_rate": 3.5542857142857145e-05,
+ "loss": 0.8933,
+ "step": 622
+ },
+ {
+ "epoch": 3.56,
+ "grad_norm": 47.37184524536133,
+ "learning_rate": 3.56e-05,
+ "loss": 0.7727,
+ "step": 623
+ },
+ {
+ "epoch": 3.565714285714286,
+ "grad_norm": 46.98302459716797,
+ "learning_rate": 3.5657142857142864e-05,
+ "loss": 0.9441,
+ "step": 624
+ },
+ {
+ "epoch": 3.571428571428571,
+ "grad_norm": 41.932762145996094,
+ "learning_rate": 3.571428571428572e-05,
+ "loss": 1.0913,
+ "step": 625
+ },
+ {
+ "epoch": 3.5771428571428574,
+ "grad_norm": 80.59725189208984,
+ "learning_rate": 3.577142857142857e-05,
+ "loss": 0.7176,
+ "step": 626
+ },
+ {
+ "epoch": 3.5828571428571427,
+ "grad_norm": 33.14458465576172,
+ "learning_rate": 3.582857142857143e-05,
+ "loss": 1.0436,
+ "step": 627
+ },
+ {
+ "epoch": 3.5885714285714285,
+ "grad_norm": 52.75685119628906,
+ "learning_rate": 3.588571428571429e-05,
+ "loss": 0.9163,
+ "step": 628
+ },
+ {
+ "epoch": 3.5942857142857143,
+ "grad_norm": 31.633190155029297,
+ "learning_rate": 3.594285714285714e-05,
+ "loss": 1.1692,
+ "step": 629
+ },
+ {
+ "epoch": 3.6,
+ "grad_norm": 29.593358993530273,
+ "learning_rate": 3.6e-05,
+ "loss": 0.8321,
+ "step": 630
+ },
+ {
+ "epoch": 3.605714285714286,
+ "grad_norm": 88.72626495361328,
+ "learning_rate": 3.605714285714286e-05,
+ "loss": 1.0763,
+ "step": 631
+ },
+ {
+ "epoch": 3.611428571428571,
+ "grad_norm": 45.46866989135742,
+ "learning_rate": 3.611428571428572e-05,
+ "loss": 0.9867,
+ "step": 632
+ },
+ {
+ "epoch": 3.617142857142857,
+ "grad_norm": 39.636932373046875,
+ "learning_rate": 3.617142857142857e-05,
+ "loss": 0.8277,
+ "step": 633
+ },
+ {
+ "epoch": 3.6228571428571428,
+ "grad_norm": 48.63605880737305,
+ "learning_rate": 3.6228571428571425e-05,
+ "loss": 0.9193,
+ "step": 634
+ },
+ {
+ "epoch": 3.6285714285714286,
+ "grad_norm": 36.28878402709961,
+ "learning_rate": 3.628571428571429e-05,
+ "loss": 1.3838,
+ "step": 635
+ },
+ {
+ "epoch": 3.6342857142857143,
+ "grad_norm": 25.930580139160156,
+ "learning_rate": 3.6342857142857144e-05,
+ "loss": 0.814,
+ "step": 636
+ },
+ {
+ "epoch": 3.64,
+ "grad_norm": 56.88616943359375,
+ "learning_rate": 3.6400000000000004e-05,
+ "loss": 1.0445,
+ "step": 637
+ },
+ {
+ "epoch": 3.645714285714286,
+ "grad_norm": 38.71080017089844,
+ "learning_rate": 3.6457142857142857e-05,
+ "loss": 0.9755,
+ "step": 638
+ },
+ {
+ "epoch": 3.6514285714285712,
+ "grad_norm": 47.88688659667969,
+ "learning_rate": 3.6514285714285716e-05,
+ "loss": 1.0582,
+ "step": 639
+ },
+ {
+ "epoch": 3.657142857142857,
+ "grad_norm": 29.28702735900879,
+ "learning_rate": 3.6571428571428576e-05,
+ "loss": 0.845,
+ "step": 640
+ },
+ {
+ "epoch": 3.662857142857143,
+ "grad_norm": 19.560808181762695,
+ "learning_rate": 3.662857142857143e-05,
+ "loss": 0.9432,
+ "step": 641
+ },
+ {
+ "epoch": 3.6685714285714286,
+ "grad_norm": 28.74838638305664,
+ "learning_rate": 3.668571428571429e-05,
+ "loss": 1.2524,
+ "step": 642
+ },
+ {
+ "epoch": 3.6742857142857144,
+ "grad_norm": 100.5932388305664,
+ "learning_rate": 3.674285714285715e-05,
+ "loss": 0.9939,
+ "step": 643
+ },
+ {
+ "epoch": 3.68,
+ "grad_norm": 48.24791717529297,
+ "learning_rate": 3.68e-05,
+ "loss": 0.8666,
+ "step": 644
+ },
+ {
+ "epoch": 3.685714285714286,
+ "grad_norm": 38.138423919677734,
+ "learning_rate": 3.685714285714286e-05,
+ "loss": 0.9091,
+ "step": 645
+ },
+ {
+ "epoch": 3.6914285714285713,
+ "grad_norm": 149.0504913330078,
+ "learning_rate": 3.691428571428572e-05,
+ "loss": 1.1256,
+ "step": 646
+ },
+ {
+ "epoch": 3.697142857142857,
+ "grad_norm": 34.19496154785156,
+ "learning_rate": 3.697142857142857e-05,
+ "loss": 0.9972,
+ "step": 647
+ },
+ {
+ "epoch": 3.702857142857143,
+ "grad_norm": 33.34417724609375,
+ "learning_rate": 3.702857142857143e-05,
+ "loss": 0.7458,
+ "step": 648
+ },
+ {
+ "epoch": 3.7085714285714286,
+ "grad_norm": 26.71389389038086,
+ "learning_rate": 3.7085714285714284e-05,
+ "loss": 1.0144,
+ "step": 649
+ },
+ {
+ "epoch": 3.7142857142857144,
+ "grad_norm": 49.832374572753906,
+ "learning_rate": 3.7142857142857143e-05,
+ "loss": 0.8261,
+ "step": 650
+ },
+ {
+ "epoch": 3.7199999999999998,
+ "grad_norm": 50.95949172973633,
+ "learning_rate": 3.72e-05,
+ "loss": 0.7317,
+ "step": 651
+ },
+ {
+ "epoch": 3.725714285714286,
+ "grad_norm": 38.240806579589844,
+ "learning_rate": 3.7257142857142856e-05,
+ "loss": 0.9429,
+ "step": 652
+ },
+ {
+ "epoch": 3.7314285714285713,
+ "grad_norm": 102.51292419433594,
+ "learning_rate": 3.7314285714285715e-05,
+ "loss": 0.8528,
+ "step": 653
+ },
+ {
+ "epoch": 3.737142857142857,
+ "grad_norm": 66.62725830078125,
+ "learning_rate": 3.7371428571428575e-05,
+ "loss": 0.8035,
+ "step": 654
+ },
+ {
+ "epoch": 3.742857142857143,
+ "grad_norm": 39.304439544677734,
+ "learning_rate": 3.742857142857143e-05,
+ "loss": 0.8531,
+ "step": 655
+ },
+ {
+ "epoch": 3.7485714285714287,
+ "grad_norm": 41.29141616821289,
+ "learning_rate": 3.748571428571429e-05,
+ "loss": 0.9209,
+ "step": 656
+ },
+ {
+ "epoch": 3.7542857142857144,
+ "grad_norm": 42.41242599487305,
+ "learning_rate": 3.7542857142857146e-05,
+ "loss": 1.2478,
+ "step": 657
+ },
+ {
+ "epoch": 3.76,
+ "grad_norm": 31.831212997436523,
+ "learning_rate": 3.76e-05,
+ "loss": 1.1195,
+ "step": 658
+ },
+ {
+ "epoch": 3.7657142857142856,
+ "grad_norm": 74.06922149658203,
+ "learning_rate": 3.765714285714286e-05,
+ "loss": 1.0407,
+ "step": 659
+ },
+ {
+ "epoch": 3.7714285714285714,
+ "grad_norm": 54.0971794128418,
+ "learning_rate": 3.771428571428572e-05,
+ "loss": 0.9377,
+ "step": 660
+ },
+ {
+ "epoch": 3.777142857142857,
+ "grad_norm": 29.178192138671875,
+ "learning_rate": 3.777142857142858e-05,
+ "loss": 0.7888,
+ "step": 661
+ },
+ {
+ "epoch": 3.782857142857143,
+ "grad_norm": 52.03077697753906,
+ "learning_rate": 3.782857142857143e-05,
+ "loss": 0.9519,
+ "step": 662
+ },
+ {
+ "epoch": 3.7885714285714287,
+ "grad_norm": 31.274682998657227,
+ "learning_rate": 3.788571428571428e-05,
+ "loss": 0.7655,
+ "step": 663
+ },
+ {
+ "epoch": 3.7942857142857145,
+ "grad_norm": 78.56085968017578,
+ "learning_rate": 3.794285714285715e-05,
+ "loss": 0.8495,
+ "step": 664
+ },
+ {
+ "epoch": 3.8,
+ "grad_norm": 40.677398681640625,
+ "learning_rate": 3.8e-05,
+ "loss": 0.7555,
+ "step": 665
+ },
+ {
+ "epoch": 3.8057142857142856,
+ "grad_norm": 25.844785690307617,
+ "learning_rate": 3.8057142857142855e-05,
+ "loss": 0.9978,
+ "step": 666
+ },
+ {
+ "epoch": 3.8114285714285714,
+ "grad_norm": 38.2746467590332,
+ "learning_rate": 3.8114285714285714e-05,
+ "loss": 0.8147,
+ "step": 667
+ },
+ {
+ "epoch": 3.817142857142857,
+ "grad_norm": 85.11207580566406,
+ "learning_rate": 3.8171428571428574e-05,
+ "loss": 0.8933,
+ "step": 668
+ },
+ {
+ "epoch": 3.822857142857143,
+ "grad_norm": 79.5720443725586,
+ "learning_rate": 3.822857142857143e-05,
+ "loss": 0.8698,
+ "step": 669
+ },
+ {
+ "epoch": 3.8285714285714287,
+ "grad_norm": 66.55155181884766,
+ "learning_rate": 3.8285714285714286e-05,
+ "loss": 1.2136,
+ "step": 670
+ },
+ {
+ "epoch": 3.8342857142857145,
+ "grad_norm": 50.9844970703125,
+ "learning_rate": 3.8342857142857146e-05,
+ "loss": 1.1532,
+ "step": 671
+ },
+ {
+ "epoch": 3.84,
+ "grad_norm": 40.85847854614258,
+ "learning_rate": 3.8400000000000005e-05,
+ "loss": 1.304,
+ "step": 672
+ },
+ {
+ "epoch": 3.8457142857142856,
+ "grad_norm": 49.67957305908203,
+ "learning_rate": 3.845714285714286e-05,
+ "loss": 1.1971,
+ "step": 673
+ },
+ {
+ "epoch": 3.8514285714285714,
+ "grad_norm": 68.415771484375,
+ "learning_rate": 3.851428571428571e-05,
+ "loss": 1.0518,
+ "step": 674
+ },
+ {
+ "epoch": 3.857142857142857,
+ "grad_norm": 45.53511428833008,
+ "learning_rate": 3.857142857142858e-05,
+ "loss": 1.0611,
+ "step": 675
+ },
+ {
+ "epoch": 3.862857142857143,
+ "grad_norm": 42.6379280090332,
+ "learning_rate": 3.862857142857143e-05,
+ "loss": 1.096,
+ "step": 676
+ },
+ {
+ "epoch": 3.8685714285714283,
+ "grad_norm": 39.18658447265625,
+ "learning_rate": 3.868571428571429e-05,
+ "loss": 1.2532,
+ "step": 677
+ },
+ {
+ "epoch": 3.8742857142857146,
+ "grad_norm": 68.9410629272461,
+ "learning_rate": 3.874285714285715e-05,
+ "loss": 0.8739,
+ "step": 678
+ },
+ {
+ "epoch": 3.88,
+ "grad_norm": 53.28384780883789,
+ "learning_rate": 3.88e-05,
+ "loss": 0.9341,
+ "step": 679
+ },
+ {
+ "epoch": 3.8857142857142857,
+ "grad_norm": 30.44455909729004,
+ "learning_rate": 3.885714285714286e-05,
+ "loss": 0.8476,
+ "step": 680
+ },
+ {
+ "epoch": 3.8914285714285715,
+ "grad_norm": 60.77157974243164,
+ "learning_rate": 3.8914285714285713e-05,
+ "loss": 0.8708,
+ "step": 681
+ },
+ {
+ "epoch": 3.8971428571428572,
+ "grad_norm": 90.17772674560547,
+ "learning_rate": 3.897142857142857e-05,
+ "loss": 0.8231,
+ "step": 682
+ },
+ {
+ "epoch": 3.902857142857143,
+ "grad_norm": 133.7808837890625,
+ "learning_rate": 3.902857142857143e-05,
+ "loss": 1.1568,
+ "step": 683
+ },
+ {
+ "epoch": 3.9085714285714284,
+ "grad_norm": 64.70571899414062,
+ "learning_rate": 3.9085714285714285e-05,
+ "loss": 1.0223,
+ "step": 684
+ },
+ {
+ "epoch": 3.914285714285714,
+ "grad_norm": 29.478635787963867,
+ "learning_rate": 3.9142857142857145e-05,
+ "loss": 0.9871,
+ "step": 685
+ },
+ {
+ "epoch": 3.92,
+ "grad_norm": 56.8112678527832,
+ "learning_rate": 3.9200000000000004e-05,
+ "loss": 0.8216,
+ "step": 686
+ },
+ {
+ "epoch": 3.9257142857142857,
+ "grad_norm": 240.74072265625,
+ "learning_rate": 3.925714285714286e-05,
+ "loss": 1.1837,
+ "step": 687
+ },
+ {
+ "epoch": 3.9314285714285715,
+ "grad_norm": 26.388124465942383,
+ "learning_rate": 3.9314285714285716e-05,
+ "loss": 0.9203,
+ "step": 688
+ },
+ {
+ "epoch": 3.9371428571428573,
+ "grad_norm": 74.51782989501953,
+ "learning_rate": 3.9371428571428576e-05,
+ "loss": 0.7446,
+ "step": 689
+ },
+ {
+ "epoch": 3.942857142857143,
+ "grad_norm": 24.310230255126953,
+ "learning_rate": 3.942857142857143e-05,
+ "loss": 1.0372,
+ "step": 690
+ },
+ {
+ "epoch": 3.9485714285714284,
+ "grad_norm": 36.657264709472656,
+ "learning_rate": 3.948571428571429e-05,
+ "loss": 1.037,
+ "step": 691
+ },
+ {
+ "epoch": 3.954285714285714,
+ "grad_norm": 42.5634880065918,
+ "learning_rate": 3.954285714285714e-05,
+ "loss": 0.871,
+ "step": 692
+ },
+ {
+ "epoch": 3.96,
+ "grad_norm": 119.99665832519531,
+ "learning_rate": 3.960000000000001e-05,
+ "loss": 1.2884,
+ "step": 693
+ },
+ {
+ "epoch": 3.9657142857142857,
+ "grad_norm": 107.50180053710938,
+ "learning_rate": 3.965714285714286e-05,
+ "loss": 0.7411,
+ "step": 694
+ },
+ {
+ "epoch": 3.9714285714285715,
+ "grad_norm": 40.23571014404297,
+ "learning_rate": 3.971428571428571e-05,
+ "loss": 0.877,
+ "step": 695
+ },
+ {
+ "epoch": 3.977142857142857,
+ "grad_norm": 251.15298461914062,
+ "learning_rate": 3.977142857142857e-05,
+ "loss": 1.0882,
+ "step": 696
+ },
+ {
+ "epoch": 3.982857142857143,
+ "grad_norm": 24.243986129760742,
+ "learning_rate": 3.982857142857143e-05,
+ "loss": 0.8619,
+ "step": 697
+ },
+ {
+ "epoch": 3.9885714285714284,
+ "grad_norm": 38.35419464111328,
+ "learning_rate": 3.9885714285714284e-05,
+ "loss": 1.1251,
+ "step": 698
+ },
+ {
+ "epoch": 3.994285714285714,
+ "grad_norm": 27.93589973449707,
+ "learning_rate": 3.9942857142857144e-05,
+ "loss": 0.8057,
+ "step": 699
+ },
+ {
+ "epoch": 4.0,
+ "grad_norm": 27.46137046813965,
+ "learning_rate": 4e-05,
+ "loss": 0.8113,
+ "step": 700
+ },
+ {
+ "epoch": 4.0,
+ "eval_classes": 0,
+ "eval_loss": 0.9107489585876465,
+ "eval_map": 0.7488,
+ "eval_map_50": 0.8322,
+ "eval_map_75": 0.8001,
+ "eval_map_large": 0.7489,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.7488,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.6825,
+ "eval_mar_10": 0.934,
+ "eval_mar_100": 0.9676,
+ "eval_mar_100_per_class": 0.9676,
+ "eval_mar_large": 0.9676,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 17.1506,
+ "eval_samples_per_second": 17.142,
+ "eval_steps_per_second": 2.157,
+ "step": 700
+ },
+ {
+ "epoch": 4.005714285714285,
+ "grad_norm": 43.37449645996094,
+ "learning_rate": 4.005714285714286e-05,
+ "loss": 0.8611,
+ "step": 701
+ },
+ {
+ "epoch": 4.011428571428572,
+ "grad_norm": 67.10334014892578,
+ "learning_rate": 4.0114285714285715e-05,
+ "loss": 1.0242,
+ "step": 702
+ },
+ {
+ "epoch": 4.017142857142857,
+ "grad_norm": 31.47416877746582,
+ "learning_rate": 4.017142857142857e-05,
+ "loss": 1.1063,
+ "step": 703
+ },
+ {
+ "epoch": 4.022857142857143,
+ "grad_norm": 50.016170501708984,
+ "learning_rate": 4.0228571428571434e-05,
+ "loss": 0.8446,
+ "step": 704
+ },
+ {
+ "epoch": 4.0285714285714285,
+ "grad_norm": 75.51326751708984,
+ "learning_rate": 4.028571428571429e-05,
+ "loss": 0.9451,
+ "step": 705
+ },
+ {
+ "epoch": 4.034285714285715,
+ "grad_norm": 40.627681732177734,
+ "learning_rate": 4.034285714285715e-05,
+ "loss": 0.9035,
+ "step": 706
+ },
+ {
+ "epoch": 4.04,
+ "grad_norm": 31.454418182373047,
+ "learning_rate": 4.0400000000000006e-05,
+ "loss": 0.9115,
+ "step": 707
+ },
+ {
+ "epoch": 4.045714285714285,
+ "grad_norm": 43.80349349975586,
+ "learning_rate": 4.045714285714286e-05,
+ "loss": 0.9127,
+ "step": 708
+ },
+ {
+ "epoch": 4.051428571428572,
+ "grad_norm": 53.53621292114258,
+ "learning_rate": 4.051428571428572e-05,
+ "loss": 0.75,
+ "step": 709
+ },
+ {
+ "epoch": 4.057142857142857,
+ "grad_norm": 34.03127670288086,
+ "learning_rate": 4.057142857142857e-05,
+ "loss": 0.8359,
+ "step": 710
+ },
+ {
+ "epoch": 4.062857142857143,
+ "grad_norm": 38.53433609008789,
+ "learning_rate": 4.062857142857143e-05,
+ "loss": 0.7569,
+ "step": 711
+ },
+ {
+ "epoch": 4.0685714285714285,
+ "grad_norm": 23.258892059326172,
+ "learning_rate": 4.068571428571429e-05,
+ "loss": 0.8482,
+ "step": 712
+ },
+ {
+ "epoch": 4.074285714285715,
+ "grad_norm": 48.58507537841797,
+ "learning_rate": 4.074285714285714e-05,
+ "loss": 0.8351,
+ "step": 713
+ },
+ {
+ "epoch": 4.08,
+ "grad_norm": 92.51113891601562,
+ "learning_rate": 4.08e-05,
+ "loss": 0.7758,
+ "step": 714
+ },
+ {
+ "epoch": 4.085714285714285,
+ "grad_norm": 40.11365509033203,
+ "learning_rate": 4.085714285714286e-05,
+ "loss": 0.607,
+ "step": 715
+ },
+ {
+ "epoch": 4.091428571428572,
+ "grad_norm": 54.20933532714844,
+ "learning_rate": 4.0914285714285715e-05,
+ "loss": 0.8703,
+ "step": 716
+ },
+ {
+ "epoch": 4.097142857142857,
+ "grad_norm": 30.776321411132812,
+ "learning_rate": 4.0971428571428574e-05,
+ "loss": 0.8529,
+ "step": 717
+ },
+ {
+ "epoch": 4.102857142857143,
+ "grad_norm": 61.20046615600586,
+ "learning_rate": 4.1028571428571434e-05,
+ "loss": 0.8714,
+ "step": 718
+ },
+ {
+ "epoch": 4.1085714285714285,
+ "grad_norm": 49.445125579833984,
+ "learning_rate": 4.1085714285714286e-05,
+ "loss": 0.9068,
+ "step": 719
+ },
+ {
+ "epoch": 4.114285714285714,
+ "grad_norm": 48.27702331542969,
+ "learning_rate": 4.1142857142857146e-05,
+ "loss": 0.8664,
+ "step": 720
+ },
+ {
+ "epoch": 4.12,
+ "grad_norm": 75.87247467041016,
+ "learning_rate": 4.12e-05,
+ "loss": 1.0875,
+ "step": 721
+ },
+ {
+ "epoch": 4.1257142857142854,
+ "grad_norm": 29.371505737304688,
+ "learning_rate": 4.125714285714286e-05,
+ "loss": 0.679,
+ "step": 722
+ },
+ {
+ "epoch": 4.131428571428572,
+ "grad_norm": 110.02105712890625,
+ "learning_rate": 4.131428571428572e-05,
+ "loss": 1.1682,
+ "step": 723
+ },
+ {
+ "epoch": 4.137142857142857,
+ "grad_norm": 36.369163513183594,
+ "learning_rate": 4.137142857142857e-05,
+ "loss": 0.839,
+ "step": 724
+ },
+ {
+ "epoch": 4.142857142857143,
+ "grad_norm": 50.09689712524414,
+ "learning_rate": 4.1428571428571437e-05,
+ "loss": 0.8611,
+ "step": 725
+ },
+ {
+ "epoch": 4.148571428571429,
+ "grad_norm": 40.9254264831543,
+ "learning_rate": 4.148571428571429e-05,
+ "loss": 0.9962,
+ "step": 726
+ },
+ {
+ "epoch": 4.154285714285714,
+ "grad_norm": 50.97928237915039,
+ "learning_rate": 4.154285714285714e-05,
+ "loss": 1.0002,
+ "step": 727
+ },
+ {
+ "epoch": 4.16,
+ "grad_norm": 67.4588394165039,
+ "learning_rate": 4.16e-05,
+ "loss": 0.9571,
+ "step": 728
+ },
+ {
+ "epoch": 4.1657142857142855,
+ "grad_norm": 63.54456329345703,
+ "learning_rate": 4.165714285714286e-05,
+ "loss": 0.9622,
+ "step": 729
+ },
+ {
+ "epoch": 4.171428571428572,
+ "grad_norm": 30.054279327392578,
+ "learning_rate": 4.1714285714285714e-05,
+ "loss": 1.0214,
+ "step": 730
+ },
+ {
+ "epoch": 4.177142857142857,
+ "grad_norm": 50.66481018066406,
+ "learning_rate": 4.177142857142857e-05,
+ "loss": 0.8283,
+ "step": 731
+ },
+ {
+ "epoch": 4.182857142857143,
+ "grad_norm": 55.17572784423828,
+ "learning_rate": 4.1828571428571426e-05,
+ "loss": 0.9337,
+ "step": 732
+ },
+ {
+ "epoch": 4.188571428571429,
+ "grad_norm": 56.75128936767578,
+ "learning_rate": 4.188571428571429e-05,
+ "loss": 0.6922,
+ "step": 733
+ },
+ {
+ "epoch": 4.194285714285714,
+ "grad_norm": 46.1561279296875,
+ "learning_rate": 4.1942857142857145e-05,
+ "loss": 1.0934,
+ "step": 734
+ },
+ {
+ "epoch": 4.2,
+ "grad_norm": 56.81748962402344,
+ "learning_rate": 4.2e-05,
+ "loss": 0.7729,
+ "step": 735
+ },
+ {
+ "epoch": 4.2057142857142855,
+ "grad_norm": 51.30953598022461,
+ "learning_rate": 4.2057142857142864e-05,
+ "loss": 0.7968,
+ "step": 736
+ },
+ {
+ "epoch": 4.211428571428572,
+ "grad_norm": 36.7042236328125,
+ "learning_rate": 4.211428571428572e-05,
+ "loss": 0.8805,
+ "step": 737
+ },
+ {
+ "epoch": 4.217142857142857,
+ "grad_norm": 37.41236114501953,
+ "learning_rate": 4.2171428571428576e-05,
+ "loss": 1.0618,
+ "step": 738
+ },
+ {
+ "epoch": 4.222857142857142,
+ "grad_norm": 45.798583984375,
+ "learning_rate": 4.222857142857143e-05,
+ "loss": 0.9918,
+ "step": 739
+ },
+ {
+ "epoch": 4.228571428571429,
+ "grad_norm": 48.581058502197266,
+ "learning_rate": 4.228571428571429e-05,
+ "loss": 1.1005,
+ "step": 740
+ },
+ {
+ "epoch": 4.234285714285714,
+ "grad_norm": 70.04324340820312,
+ "learning_rate": 4.234285714285715e-05,
+ "loss": 0.7554,
+ "step": 741
+ },
+ {
+ "epoch": 4.24,
+ "grad_norm": 35.53608322143555,
+ "learning_rate": 4.24e-05,
+ "loss": 0.8303,
+ "step": 742
+ },
+ {
+ "epoch": 4.2457142857142856,
+ "grad_norm": 23.287988662719727,
+ "learning_rate": 4.245714285714285e-05,
+ "loss": 0.6577,
+ "step": 743
+ },
+ {
+ "epoch": 4.251428571428572,
+ "grad_norm": 31.08119010925293,
+ "learning_rate": 4.251428571428572e-05,
+ "loss": 0.8763,
+ "step": 744
+ },
+ {
+ "epoch": 4.257142857142857,
+ "grad_norm": 35.31334686279297,
+ "learning_rate": 4.257142857142857e-05,
+ "loss": 0.9691,
+ "step": 745
+ },
+ {
+ "epoch": 4.2628571428571425,
+ "grad_norm": 87.34281921386719,
+ "learning_rate": 4.262857142857143e-05,
+ "loss": 0.8698,
+ "step": 746
+ },
+ {
+ "epoch": 4.268571428571429,
+ "grad_norm": 48.2530517578125,
+ "learning_rate": 4.268571428571429e-05,
+ "loss": 0.8988,
+ "step": 747
+ },
+ {
+ "epoch": 4.274285714285714,
+ "grad_norm": 84.64009857177734,
+ "learning_rate": 4.2742857142857144e-05,
+ "loss": 1.1475,
+ "step": 748
+ },
+ {
+ "epoch": 4.28,
+ "grad_norm": 40.60107421875,
+ "learning_rate": 4.2800000000000004e-05,
+ "loss": 0.9865,
+ "step": 749
+ },
+ {
+ "epoch": 4.285714285714286,
+ "grad_norm": 52.748878479003906,
+ "learning_rate": 4.2857142857142856e-05,
+ "loss": 0.9031,
+ "step": 750
+ },
+ {
+ "epoch": 4.291428571428572,
+ "grad_norm": 101.19278717041016,
+ "learning_rate": 4.2914285714285716e-05,
+ "loss": 0.8664,
+ "step": 751
+ },
+ {
+ "epoch": 4.297142857142857,
+ "grad_norm": 73.33796691894531,
+ "learning_rate": 4.2971428571428575e-05,
+ "loss": 1.039,
+ "step": 752
+ },
+ {
+ "epoch": 4.3028571428571425,
+ "grad_norm": 33.10040283203125,
+ "learning_rate": 4.302857142857143e-05,
+ "loss": 0.9188,
+ "step": 753
+ },
+ {
+ "epoch": 4.308571428571429,
+ "grad_norm": 48.70182800292969,
+ "learning_rate": 4.308571428571429e-05,
+ "loss": 0.8472,
+ "step": 754
+ },
+ {
+ "epoch": 4.314285714285714,
+ "grad_norm": 44.06749725341797,
+ "learning_rate": 4.314285714285715e-05,
+ "loss": 0.8524,
+ "step": 755
+ },
+ {
+ "epoch": 4.32,
+ "grad_norm": 138.39503479003906,
+ "learning_rate": 4.32e-05,
+ "loss": 0.8525,
+ "step": 756
+ },
+ {
+ "epoch": 4.325714285714286,
+ "grad_norm": 42.73065948486328,
+ "learning_rate": 4.325714285714286e-05,
+ "loss": 0.9098,
+ "step": 757
+ },
+ {
+ "epoch": 4.331428571428571,
+ "grad_norm": 38.09630584716797,
+ "learning_rate": 4.331428571428572e-05,
+ "loss": 0.9752,
+ "step": 758
+ },
+ {
+ "epoch": 4.337142857142857,
+ "grad_norm": 70.47635650634766,
+ "learning_rate": 4.337142857142857e-05,
+ "loss": 1.0822,
+ "step": 759
+ },
+ {
+ "epoch": 4.3428571428571425,
+ "grad_norm": 55.644107818603516,
+ "learning_rate": 4.342857142857143e-05,
+ "loss": 1.0015,
+ "step": 760
+ },
+ {
+ "epoch": 4.348571428571429,
+ "grad_norm": 88.07673645019531,
+ "learning_rate": 4.3485714285714284e-05,
+ "loss": 1.0137,
+ "step": 761
+ },
+ {
+ "epoch": 4.354285714285714,
+ "grad_norm": 25.13230323791504,
+ "learning_rate": 4.354285714285714e-05,
+ "loss": 0.6897,
+ "step": 762
+ },
+ {
+ "epoch": 4.36,
+ "grad_norm": 59.5438346862793,
+ "learning_rate": 4.36e-05,
+ "loss": 1.0062,
+ "step": 763
+ },
+ {
+ "epoch": 4.365714285714286,
+ "grad_norm": 70.91270446777344,
+ "learning_rate": 4.3657142857142855e-05,
+ "loss": 0.9846,
+ "step": 764
+ },
+ {
+ "epoch": 4.371428571428572,
+ "grad_norm": 101.4989242553711,
+ "learning_rate": 4.371428571428572e-05,
+ "loss": 0.8398,
+ "step": 765
+ },
+ {
+ "epoch": 4.377142857142857,
+ "grad_norm": 38.134315490722656,
+ "learning_rate": 4.3771428571428574e-05,
+ "loss": 0.8914,
+ "step": 766
+ },
+ {
+ "epoch": 4.382857142857143,
+ "grad_norm": 45.63557434082031,
+ "learning_rate": 4.382857142857143e-05,
+ "loss": 0.8271,
+ "step": 767
+ },
+ {
+ "epoch": 4.388571428571429,
+ "grad_norm": 67.12691497802734,
+ "learning_rate": 4.388571428571429e-05,
+ "loss": 1.0859,
+ "step": 768
+ },
+ {
+ "epoch": 4.394285714285714,
+ "grad_norm": 29.074275970458984,
+ "learning_rate": 4.3942857142857146e-05,
+ "loss": 0.7501,
+ "step": 769
+ },
+ {
+ "epoch": 4.4,
+ "grad_norm": 46.619049072265625,
+ "learning_rate": 4.4000000000000006e-05,
+ "loss": 0.9039,
+ "step": 770
+ },
+ {
+ "epoch": 4.405714285714286,
+ "grad_norm": 54.82202911376953,
+ "learning_rate": 4.405714285714286e-05,
+ "loss": 0.9477,
+ "step": 771
+ },
+ {
+ "epoch": 4.411428571428571,
+ "grad_norm": 27.81842041015625,
+ "learning_rate": 4.411428571428572e-05,
+ "loss": 0.8866,
+ "step": 772
+ },
+ {
+ "epoch": 4.417142857142857,
+ "grad_norm": 43.718441009521484,
+ "learning_rate": 4.417142857142858e-05,
+ "loss": 0.7752,
+ "step": 773
+ },
+ {
+ "epoch": 4.422857142857143,
+ "grad_norm": 39.321292877197266,
+ "learning_rate": 4.422857142857143e-05,
+ "loss": 0.8403,
+ "step": 774
+ },
+ {
+ "epoch": 4.428571428571429,
+ "grad_norm": 35.265846252441406,
+ "learning_rate": 4.428571428571428e-05,
+ "loss": 0.7081,
+ "step": 775
+ },
+ {
+ "epoch": 4.434285714285714,
+ "grad_norm": 24.192285537719727,
+ "learning_rate": 4.434285714285715e-05,
+ "loss": 0.8386,
+ "step": 776
+ },
+ {
+ "epoch": 4.44,
+ "grad_norm": 52.77553176879883,
+ "learning_rate": 4.44e-05,
+ "loss": 0.7076,
+ "step": 777
+ },
+ {
+ "epoch": 4.445714285714286,
+ "grad_norm": 45.69184494018555,
+ "learning_rate": 4.445714285714286e-05,
+ "loss": 0.9516,
+ "step": 778
+ },
+ {
+ "epoch": 4.451428571428571,
+ "grad_norm": 64.34020233154297,
+ "learning_rate": 4.4514285714285714e-05,
+ "loss": 0.7131,
+ "step": 779
+ },
+ {
+ "epoch": 4.457142857142857,
+ "grad_norm": 39.51726150512695,
+ "learning_rate": 4.4571428571428574e-05,
+ "loss": 0.8413,
+ "step": 780
+ },
+ {
+ "epoch": 4.462857142857143,
+ "grad_norm": 31.498125076293945,
+ "learning_rate": 4.462857142857143e-05,
+ "loss": 0.7674,
+ "step": 781
+ },
+ {
+ "epoch": 4.468571428571429,
+ "grad_norm": 39.82917785644531,
+ "learning_rate": 4.4685714285714286e-05,
+ "loss": 0.8519,
+ "step": 782
+ },
+ {
+ "epoch": 4.474285714285714,
+ "grad_norm": 103.14328002929688,
+ "learning_rate": 4.4742857142857145e-05,
+ "loss": 0.8795,
+ "step": 783
+ },
+ {
+ "epoch": 4.48,
+ "grad_norm": 50.75999069213867,
+ "learning_rate": 4.4800000000000005e-05,
+ "loss": 0.8551,
+ "step": 784
+ },
+ {
+ "epoch": 4.485714285714286,
+ "grad_norm": 35.23714828491211,
+ "learning_rate": 4.485714285714286e-05,
+ "loss": 0.7303,
+ "step": 785
+ },
+ {
+ "epoch": 4.491428571428571,
+ "grad_norm": 49.85736846923828,
+ "learning_rate": 4.491428571428572e-05,
+ "loss": 1.111,
+ "step": 786
+ },
+ {
+ "epoch": 4.497142857142857,
+ "grad_norm": 38.57877731323242,
+ "learning_rate": 4.4971428571428576e-05,
+ "loss": 0.8739,
+ "step": 787
+ },
+ {
+ "epoch": 4.502857142857143,
+ "grad_norm": 42.49000549316406,
+ "learning_rate": 4.502857142857143e-05,
+ "loss": 0.6994,
+ "step": 788
+ },
+ {
+ "epoch": 4.508571428571429,
+ "grad_norm": 163.63182067871094,
+ "learning_rate": 4.508571428571429e-05,
+ "loss": 0.8117,
+ "step": 789
+ },
+ {
+ "epoch": 4.514285714285714,
+ "grad_norm": 44.8123664855957,
+ "learning_rate": 4.514285714285714e-05,
+ "loss": 0.7256,
+ "step": 790
+ },
+ {
+ "epoch": 4.52,
+ "grad_norm": 44.75035858154297,
+ "learning_rate": 4.52e-05,
+ "loss": 0.7779,
+ "step": 791
+ },
+ {
+ "epoch": 4.525714285714286,
+ "grad_norm": 48.99624252319336,
+ "learning_rate": 4.525714285714286e-05,
+ "loss": 0.8817,
+ "step": 792
+ },
+ {
+ "epoch": 4.531428571428571,
+ "grad_norm": 24.009414672851562,
+ "learning_rate": 4.531428571428571e-05,
+ "loss": 0.8026,
+ "step": 793
+ },
+ {
+ "epoch": 4.537142857142857,
+ "grad_norm": 27.89925765991211,
+ "learning_rate": 4.537142857142857e-05,
+ "loss": 1.189,
+ "step": 794
+ },
+ {
+ "epoch": 4.542857142857143,
+ "grad_norm": 53.633575439453125,
+ "learning_rate": 4.542857142857143e-05,
+ "loss": 0.7647,
+ "step": 795
+ },
+ {
+ "epoch": 4.548571428571429,
+ "grad_norm": 49.11115646362305,
+ "learning_rate": 4.5485714285714285e-05,
+ "loss": 1.0091,
+ "step": 796
+ },
+ {
+ "epoch": 4.554285714285714,
+ "grad_norm": 42.562679290771484,
+ "learning_rate": 4.5542857142857144e-05,
+ "loss": 0.882,
+ "step": 797
+ },
+ {
+ "epoch": 4.5600000000000005,
+ "grad_norm": 44.13538360595703,
+ "learning_rate": 4.5600000000000004e-05,
+ "loss": 0.9015,
+ "step": 798
+ },
+ {
+ "epoch": 4.565714285714286,
+ "grad_norm": 95.10523223876953,
+ "learning_rate": 4.5657142857142857e-05,
+ "loss": 0.6015,
+ "step": 799
+ },
+ {
+ "epoch": 4.571428571428571,
+ "grad_norm": 37.34461975097656,
+ "learning_rate": 4.5714285714285716e-05,
+ "loss": 0.7921,
+ "step": 800
+ },
+ {
+ "epoch": 4.577142857142857,
+ "grad_norm": 57.865299224853516,
+ "learning_rate": 4.5771428571428576e-05,
+ "loss": 0.8411,
+ "step": 801
+ },
+ {
+ "epoch": 4.582857142857143,
+ "grad_norm": 50.93311309814453,
+ "learning_rate": 4.5828571428571435e-05,
+ "loss": 1.0067,
+ "step": 802
+ },
+ {
+ "epoch": 4.588571428571429,
+ "grad_norm": 43.336788177490234,
+ "learning_rate": 4.588571428571429e-05,
+ "loss": 1.073,
+ "step": 803
+ },
+ {
+ "epoch": 4.594285714285714,
+ "grad_norm": 33.9623908996582,
+ "learning_rate": 4.594285714285714e-05,
+ "loss": 0.6566,
+ "step": 804
+ },
+ {
+ "epoch": 4.6,
+ "grad_norm": 72.55892181396484,
+ "learning_rate": 4.600000000000001e-05,
+ "loss": 0.8664,
+ "step": 805
+ },
+ {
+ "epoch": 4.605714285714286,
+ "grad_norm": 43.72401428222656,
+ "learning_rate": 4.605714285714286e-05,
+ "loss": 0.8379,
+ "step": 806
+ },
+ {
+ "epoch": 4.611428571428571,
+ "grad_norm": 30.538040161132812,
+ "learning_rate": 4.611428571428571e-05,
+ "loss": 0.7254,
+ "step": 807
+ },
+ {
+ "epoch": 4.617142857142857,
+ "grad_norm": 35.443058013916016,
+ "learning_rate": 4.617142857142857e-05,
+ "loss": 0.9433,
+ "step": 808
+ },
+ {
+ "epoch": 4.622857142857143,
+ "grad_norm": 26.423377990722656,
+ "learning_rate": 4.622857142857143e-05,
+ "loss": 0.8263,
+ "step": 809
+ },
+ {
+ "epoch": 4.628571428571428,
+ "grad_norm": 35.029911041259766,
+ "learning_rate": 4.628571428571429e-05,
+ "loss": 0.9768,
+ "step": 810
+ },
+ {
+ "epoch": 4.634285714285714,
+ "grad_norm": 26.144128799438477,
+ "learning_rate": 4.6342857142857143e-05,
+ "loss": 0.7838,
+ "step": 811
+ },
+ {
+ "epoch": 4.64,
+ "grad_norm": 55.07661056518555,
+ "learning_rate": 4.64e-05,
+ "loss": 0.7111,
+ "step": 812
+ },
+ {
+ "epoch": 4.645714285714286,
+ "grad_norm": 33.119075775146484,
+ "learning_rate": 4.645714285714286e-05,
+ "loss": 1.148,
+ "step": 813
+ },
+ {
+ "epoch": 4.651428571428571,
+ "grad_norm": 48.644649505615234,
+ "learning_rate": 4.6514285714285715e-05,
+ "loss": 1.2616,
+ "step": 814
+ },
+ {
+ "epoch": 4.6571428571428575,
+ "grad_norm": 45.484859466552734,
+ "learning_rate": 4.6571428571428575e-05,
+ "loss": 0.9413,
+ "step": 815
+ },
+ {
+ "epoch": 4.662857142857143,
+ "grad_norm": 40.2558708190918,
+ "learning_rate": 4.6628571428571434e-05,
+ "loss": 0.8274,
+ "step": 816
+ },
+ {
+ "epoch": 4.668571428571429,
+ "grad_norm": 41.27311325073242,
+ "learning_rate": 4.668571428571429e-05,
+ "loss": 1.2366,
+ "step": 817
+ },
+ {
+ "epoch": 4.674285714285714,
+ "grad_norm": 29.65941047668457,
+ "learning_rate": 4.6742857142857146e-05,
+ "loss": 1.2603,
+ "step": 818
+ },
+ {
+ "epoch": 4.68,
+ "grad_norm": 49.044586181640625,
+ "learning_rate": 4.6800000000000006e-05,
+ "loss": 1.4969,
+ "step": 819
+ },
+ {
+ "epoch": 4.685714285714286,
+ "grad_norm": 34.03653335571289,
+ "learning_rate": 4.685714285714286e-05,
+ "loss": 0.7342,
+ "step": 820
+ },
+ {
+ "epoch": 4.691428571428571,
+ "grad_norm": 54.41427230834961,
+ "learning_rate": 4.691428571428572e-05,
+ "loss": 0.7467,
+ "step": 821
+ },
+ {
+ "epoch": 4.6971428571428575,
+ "grad_norm": 54.1391716003418,
+ "learning_rate": 4.697142857142857e-05,
+ "loss": 0.7678,
+ "step": 822
+ },
+ {
+ "epoch": 4.702857142857143,
+ "grad_norm": 81.07489013671875,
+ "learning_rate": 4.702857142857143e-05,
+ "loss": 0.5855,
+ "step": 823
+ },
+ {
+ "epoch": 4.708571428571428,
+ "grad_norm": 35.899017333984375,
+ "learning_rate": 4.708571428571429e-05,
+ "loss": 0.9845,
+ "step": 824
+ },
+ {
+ "epoch": 4.714285714285714,
+ "grad_norm": 52.27177047729492,
+ "learning_rate": 4.714285714285714e-05,
+ "loss": 0.8779,
+ "step": 825
+ },
+ {
+ "epoch": 4.72,
+ "grad_norm": 33.994163513183594,
+ "learning_rate": 4.72e-05,
+ "loss": 1.1185,
+ "step": 826
+ },
+ {
+ "epoch": 4.725714285714286,
+ "grad_norm": 42.173828125,
+ "learning_rate": 4.725714285714286e-05,
+ "loss": 1.0608,
+ "step": 827
+ },
+ {
+ "epoch": 4.731428571428571,
+ "grad_norm": 28.176475524902344,
+ "learning_rate": 4.7314285714285714e-05,
+ "loss": 1.2005,
+ "step": 828
+ },
+ {
+ "epoch": 4.737142857142857,
+ "grad_norm": 43.679264068603516,
+ "learning_rate": 4.7371428571428574e-05,
+ "loss": 0.675,
+ "step": 829
+ },
+ {
+ "epoch": 4.742857142857143,
+ "grad_norm": 26.070119857788086,
+ "learning_rate": 4.742857142857143e-05,
+ "loss": 0.6874,
+ "step": 830
+ },
+ {
+ "epoch": 4.748571428571428,
+ "grad_norm": 43.402976989746094,
+ "learning_rate": 4.7485714285714286e-05,
+ "loss": 0.7073,
+ "step": 831
+ },
+ {
+ "epoch": 4.7542857142857144,
+ "grad_norm": 42.3155403137207,
+ "learning_rate": 4.7542857142857146e-05,
+ "loss": 0.9456,
+ "step": 832
+ },
+ {
+ "epoch": 4.76,
+ "grad_norm": 31.70867156982422,
+ "learning_rate": 4.76e-05,
+ "loss": 0.8882,
+ "step": 833
+ },
+ {
+ "epoch": 4.765714285714286,
+ "grad_norm": 37.32833480834961,
+ "learning_rate": 4.7657142857142865e-05,
+ "loss": 0.7486,
+ "step": 834
+ },
+ {
+ "epoch": 4.771428571428571,
+ "grad_norm": 34.27206039428711,
+ "learning_rate": 4.771428571428572e-05,
+ "loss": 0.7769,
+ "step": 835
+ },
+ {
+ "epoch": 4.777142857142858,
+ "grad_norm": 21.571674346923828,
+ "learning_rate": 4.777142857142857e-05,
+ "loss": 0.9401,
+ "step": 836
+ },
+ {
+ "epoch": 4.782857142857143,
+ "grad_norm": 20.9748477935791,
+ "learning_rate": 4.782857142857143e-05,
+ "loss": 0.7598,
+ "step": 837
+ },
+ {
+ "epoch": 4.788571428571428,
+ "grad_norm": 27.693876266479492,
+ "learning_rate": 4.788571428571429e-05,
+ "loss": 0.5936,
+ "step": 838
+ },
+ {
+ "epoch": 4.7942857142857145,
+ "grad_norm": 38.792903900146484,
+ "learning_rate": 4.794285714285714e-05,
+ "loss": 1.0826,
+ "step": 839
+ },
+ {
+ "epoch": 4.8,
+ "grad_norm": 28.459941864013672,
+ "learning_rate": 4.8e-05,
+ "loss": 0.8539,
+ "step": 840
+ },
+ {
+ "epoch": 4.805714285714286,
+ "grad_norm": 89.08366394042969,
+ "learning_rate": 4.805714285714286e-05,
+ "loss": 0.8214,
+ "step": 841
+ },
+ {
+ "epoch": 4.811428571428571,
+ "grad_norm": 53.74074172973633,
+ "learning_rate": 4.811428571428572e-05,
+ "loss": 0.9308,
+ "step": 842
+ },
+ {
+ "epoch": 4.817142857142857,
+ "grad_norm": 16.795307159423828,
+ "learning_rate": 4.817142857142857e-05,
+ "loss": 0.9332,
+ "step": 843
+ },
+ {
+ "epoch": 4.822857142857143,
+ "grad_norm": 24.547548294067383,
+ "learning_rate": 4.8228571428571426e-05,
+ "loss": 0.6237,
+ "step": 844
+ },
+ {
+ "epoch": 4.828571428571428,
+ "grad_norm": 28.066781997680664,
+ "learning_rate": 4.828571428571429e-05,
+ "loss": 0.7644,
+ "step": 845
+ },
+ {
+ "epoch": 4.8342857142857145,
+ "grad_norm": 70.24605560302734,
+ "learning_rate": 4.8342857142857145e-05,
+ "loss": 0.7757,
+ "step": 846
+ },
+ {
+ "epoch": 4.84,
+ "grad_norm": 85.10889434814453,
+ "learning_rate": 4.8400000000000004e-05,
+ "loss": 0.9301,
+ "step": 847
+ },
+ {
+ "epoch": 4.845714285714286,
+ "grad_norm": 40.65078353881836,
+ "learning_rate": 4.8457142857142864e-05,
+ "loss": 0.8808,
+ "step": 848
+ },
+ {
+ "epoch": 4.851428571428571,
+ "grad_norm": 47.48426055908203,
+ "learning_rate": 4.8514285714285716e-05,
+ "loss": 0.6732,
+ "step": 849
+ },
+ {
+ "epoch": 4.857142857142857,
+ "grad_norm": 32.60896301269531,
+ "learning_rate": 4.8571428571428576e-05,
+ "loss": 1.0011,
+ "step": 850
+ },
+ {
+ "epoch": 4.862857142857143,
+ "grad_norm": 39.34482955932617,
+ "learning_rate": 4.862857142857143e-05,
+ "loss": 0.9106,
+ "step": 851
+ },
+ {
+ "epoch": 4.868571428571428,
+ "grad_norm": 45.15939712524414,
+ "learning_rate": 4.868571428571429e-05,
+ "loss": 0.8759,
+ "step": 852
+ },
+ {
+ "epoch": 4.8742857142857146,
+ "grad_norm": 32.01213455200195,
+ "learning_rate": 4.874285714285715e-05,
+ "loss": 0.8889,
+ "step": 853
+ },
+ {
+ "epoch": 4.88,
+ "grad_norm": 51.3536262512207,
+ "learning_rate": 4.88e-05,
+ "loss": 0.9379,
+ "step": 854
+ },
+ {
+ "epoch": 4.885714285714286,
+ "grad_norm": 37.42449951171875,
+ "learning_rate": 4.885714285714286e-05,
+ "loss": 0.8884,
+ "step": 855
+ },
+ {
+ "epoch": 4.8914285714285715,
+ "grad_norm": 60.717830657958984,
+ "learning_rate": 4.891428571428572e-05,
+ "loss": 0.9361,
+ "step": 856
+ },
+ {
+ "epoch": 4.897142857142857,
+ "grad_norm": 62.75659942626953,
+ "learning_rate": 4.897142857142857e-05,
+ "loss": 0.6528,
+ "step": 857
+ },
+ {
+ "epoch": 4.902857142857143,
+ "grad_norm": 29.475698471069336,
+ "learning_rate": 4.902857142857143e-05,
+ "loss": 0.7498,
+ "step": 858
+ },
+ {
+ "epoch": 4.908571428571428,
+ "grad_norm": 29.30791473388672,
+ "learning_rate": 4.908571428571429e-05,
+ "loss": 0.7958,
+ "step": 859
+ },
+ {
+ "epoch": 4.914285714285715,
+ "grad_norm": 40.96413040161133,
+ "learning_rate": 4.9142857142857144e-05,
+ "loss": 0.8702,
+ "step": 860
+ },
+ {
+ "epoch": 4.92,
+ "grad_norm": 25.47064971923828,
+ "learning_rate": 4.92e-05,
+ "loss": 0.9104,
+ "step": 861
+ },
+ {
+ "epoch": 4.925714285714285,
+ "grad_norm": 40.89109802246094,
+ "learning_rate": 4.9257142857142856e-05,
+ "loss": 0.6985,
+ "step": 862
+ },
+ {
+ "epoch": 4.9314285714285715,
+ "grad_norm": 45.912967681884766,
+ "learning_rate": 4.9314285714285716e-05,
+ "loss": 0.7432,
+ "step": 863
+ },
+ {
+ "epoch": 4.937142857142857,
+ "grad_norm": 54.912818908691406,
+ "learning_rate": 4.9371428571428575e-05,
+ "loss": 0.8789,
+ "step": 864
+ },
+ {
+ "epoch": 4.942857142857143,
+ "grad_norm": 36.55717849731445,
+ "learning_rate": 4.942857142857143e-05,
+ "loss": 0.7154,
+ "step": 865
+ },
+ {
+ "epoch": 4.948571428571428,
+ "grad_norm": 34.00408935546875,
+ "learning_rate": 4.9485714285714294e-05,
+ "loss": 0.9432,
+ "step": 866
+ },
+ {
+ "epoch": 4.954285714285715,
+ "grad_norm": 37.49279022216797,
+ "learning_rate": 4.954285714285715e-05,
+ "loss": 1.0998,
+ "step": 867
+ },
+ {
+ "epoch": 4.96,
+ "grad_norm": 72.15148162841797,
+ "learning_rate": 4.96e-05,
+ "loss": 0.7201,
+ "step": 868
+ },
+ {
+ "epoch": 4.965714285714286,
+ "grad_norm": 122.64563751220703,
+ "learning_rate": 4.965714285714286e-05,
+ "loss": 1.2198,
+ "step": 869
+ },
+ {
+ "epoch": 4.9714285714285715,
+ "grad_norm": 38.418392181396484,
+ "learning_rate": 4.971428571428572e-05,
+ "loss": 0.9186,
+ "step": 870
+ },
+ {
+ "epoch": 4.977142857142857,
+ "grad_norm": 86.30313110351562,
+ "learning_rate": 4.977142857142857e-05,
+ "loss": 0.7804,
+ "step": 871
+ },
+ {
+ "epoch": 4.982857142857143,
+ "grad_norm": 57.824893951416016,
+ "learning_rate": 4.982857142857143e-05,
+ "loss": 0.7648,
+ "step": 872
+ },
+ {
+ "epoch": 4.988571428571428,
+ "grad_norm": 30.781469345092773,
+ "learning_rate": 4.9885714285714283e-05,
+ "loss": 0.8022,
+ "step": 873
+ },
+ {
+ "epoch": 4.994285714285715,
+ "grad_norm": 60.66427230834961,
+ "learning_rate": 4.994285714285715e-05,
+ "loss": 1.0601,
+ "step": 874
+ },
+ {
+ "epoch": 5.0,
+ "grad_norm": 37.57851791381836,
+ "learning_rate": 5e-05,
+ "loss": 0.7755,
+ "step": 875
+ },
+ {
+ "epoch": 5.0,
+ "eval_classes": 0,
+ "eval_loss": 0.9345681071281433,
+ "eval_map": 0.827,
+ "eval_map_50": 0.914,
+ "eval_map_75": 0.8966,
+ "eval_map_large": 0.827,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.827,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7444,
+ "eval_mar_10": 0.939,
+ "eval_mar_100": 0.9556,
+ "eval_mar_100_per_class": 0.9556,
+ "eval_mar_large": 0.9556,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 17.7962,
+ "eval_samples_per_second": 16.52,
+ "eval_steps_per_second": 2.079,
+ "step": 875
+ },
+ {
+ "epoch": 5.005714285714285,
+ "grad_norm": 28.773176193237305,
+ "learning_rate": 4.9993650793650795e-05,
+ "loss": 0.786,
+ "step": 876
+ },
+ {
+ "epoch": 5.011428571428572,
+ "grad_norm": 20.412450790405273,
+ "learning_rate": 4.998730158730159e-05,
+ "loss": 1.1136,
+ "step": 877
+ },
+ {
+ "epoch": 5.017142857142857,
+ "grad_norm": 30.777389526367188,
+ "learning_rate": 4.998095238095239e-05,
+ "loss": 0.7639,
+ "step": 878
+ },
+ {
+ "epoch": 5.022857142857143,
+ "grad_norm": 26.684669494628906,
+ "learning_rate": 4.997460317460318e-05,
+ "loss": 0.7194,
+ "step": 879
+ },
+ {
+ "epoch": 5.0285714285714285,
+ "grad_norm": 32.90359115600586,
+ "learning_rate": 4.996825396825397e-05,
+ "loss": 1.1397,
+ "step": 880
+ },
+ {
+ "epoch": 5.034285714285715,
+ "grad_norm": 36.00369644165039,
+ "learning_rate": 4.9961904761904765e-05,
+ "loss": 1.0612,
+ "step": 881
+ },
+ {
+ "epoch": 5.04,
+ "grad_norm": 47.34328079223633,
+ "learning_rate": 4.995555555555556e-05,
+ "loss": 0.9564,
+ "step": 882
+ },
+ {
+ "epoch": 5.045714285714285,
+ "grad_norm": 29.983821868896484,
+ "learning_rate": 4.994920634920635e-05,
+ "loss": 1.1852,
+ "step": 883
+ },
+ {
+ "epoch": 5.051428571428572,
+ "grad_norm": 62.797935485839844,
+ "learning_rate": 4.994285714285715e-05,
+ "loss": 0.9991,
+ "step": 884
+ },
+ {
+ "epoch": 5.057142857142857,
+ "grad_norm": 34.72334671020508,
+ "learning_rate": 4.9936507936507936e-05,
+ "loss": 0.746,
+ "step": 885
+ },
+ {
+ "epoch": 5.062857142857143,
+ "grad_norm": 35.5029296875,
+ "learning_rate": 4.9930158730158735e-05,
+ "loss": 0.724,
+ "step": 886
+ },
+ {
+ "epoch": 5.0685714285714285,
+ "grad_norm": 66.18411254882812,
+ "learning_rate": 4.992380952380953e-05,
+ "loss": 0.8453,
+ "step": 887
+ },
+ {
+ "epoch": 5.074285714285715,
+ "grad_norm": 124.14593505859375,
+ "learning_rate": 4.991746031746032e-05,
+ "loss": 0.683,
+ "step": 888
+ },
+ {
+ "epoch": 5.08,
+ "grad_norm": 49.367523193359375,
+ "learning_rate": 4.991111111111111e-05,
+ "loss": 0.8275,
+ "step": 889
+ },
+ {
+ "epoch": 5.085714285714285,
+ "grad_norm": 17.534074783325195,
+ "learning_rate": 4.990476190476191e-05,
+ "loss": 0.8603,
+ "step": 890
+ },
+ {
+ "epoch": 5.091428571428572,
+ "grad_norm": 43.27067947387695,
+ "learning_rate": 4.98984126984127e-05,
+ "loss": 0.9633,
+ "step": 891
+ },
+ {
+ "epoch": 5.097142857142857,
+ "grad_norm": 28.913188934326172,
+ "learning_rate": 4.98920634920635e-05,
+ "loss": 0.7867,
+ "step": 892
+ },
+ {
+ "epoch": 5.102857142857143,
+ "grad_norm": 58.211517333984375,
+ "learning_rate": 4.9885714285714283e-05,
+ "loss": 0.8475,
+ "step": 893
+ },
+ {
+ "epoch": 5.1085714285714285,
+ "grad_norm": 36.70371627807617,
+ "learning_rate": 4.987936507936508e-05,
+ "loss": 0.8455,
+ "step": 894
+ },
+ {
+ "epoch": 5.114285714285714,
+ "grad_norm": 45.08826446533203,
+ "learning_rate": 4.9873015873015875e-05,
+ "loss": 0.7288,
+ "step": 895
+ },
+ {
+ "epoch": 5.12,
+ "grad_norm": 27.22219467163086,
+ "learning_rate": 4.986666666666667e-05,
+ "loss": 0.7087,
+ "step": 896
+ },
+ {
+ "epoch": 5.1257142857142854,
+ "grad_norm": 36.46015167236328,
+ "learning_rate": 4.986031746031746e-05,
+ "loss": 1.0503,
+ "step": 897
+ },
+ {
+ "epoch": 5.131428571428572,
+ "grad_norm": 33.393348693847656,
+ "learning_rate": 4.985396825396826e-05,
+ "loss": 0.7937,
+ "step": 898
+ },
+ {
+ "epoch": 5.137142857142857,
+ "grad_norm": 53.71676254272461,
+ "learning_rate": 4.9847619047619046e-05,
+ "loss": 0.6919,
+ "step": 899
+ },
+ {
+ "epoch": 5.142857142857143,
+ "grad_norm": 49.26475143432617,
+ "learning_rate": 4.9841269841269845e-05,
+ "loss": 0.8403,
+ "step": 900
+ },
+ {
+ "epoch": 5.148571428571429,
+ "grad_norm": 32.67655944824219,
+ "learning_rate": 4.983492063492064e-05,
+ "loss": 0.7301,
+ "step": 901
+ },
+ {
+ "epoch": 5.154285714285714,
+ "grad_norm": 67.62574005126953,
+ "learning_rate": 4.982857142857143e-05,
+ "loss": 1.0139,
+ "step": 902
+ },
+ {
+ "epoch": 5.16,
+ "grad_norm": 69.98014068603516,
+ "learning_rate": 4.982222222222222e-05,
+ "loss": 0.9043,
+ "step": 903
+ },
+ {
+ "epoch": 5.1657142857142855,
+ "grad_norm": 263.18438720703125,
+ "learning_rate": 4.9815873015873016e-05,
+ "loss": 0.6178,
+ "step": 904
+ },
+ {
+ "epoch": 5.171428571428572,
+ "grad_norm": 30.691463470458984,
+ "learning_rate": 4.980952380952381e-05,
+ "loss": 0.9376,
+ "step": 905
+ },
+ {
+ "epoch": 5.177142857142857,
+ "grad_norm": 22.068796157836914,
+ "learning_rate": 4.980317460317461e-05,
+ "loss": 0.9483,
+ "step": 906
+ },
+ {
+ "epoch": 5.182857142857143,
+ "grad_norm": 47.959068298339844,
+ "learning_rate": 4.97968253968254e-05,
+ "loss": 1.1893,
+ "step": 907
+ },
+ {
+ "epoch": 5.188571428571429,
+ "grad_norm": 24.972097396850586,
+ "learning_rate": 4.979047619047619e-05,
+ "loss": 0.8632,
+ "step": 908
+ },
+ {
+ "epoch": 5.194285714285714,
+ "grad_norm": 23.600223541259766,
+ "learning_rate": 4.9784126984126986e-05,
+ "loss": 1.0488,
+ "step": 909
+ },
+ {
+ "epoch": 5.2,
+ "grad_norm": 82.63482666015625,
+ "learning_rate": 4.977777777777778e-05,
+ "loss": 0.7555,
+ "step": 910
+ },
+ {
+ "epoch": 5.2057142857142855,
+ "grad_norm": 43.265769958496094,
+ "learning_rate": 4.977142857142857e-05,
+ "loss": 0.7966,
+ "step": 911
+ },
+ {
+ "epoch": 5.211428571428572,
+ "grad_norm": 43.13532638549805,
+ "learning_rate": 4.976507936507937e-05,
+ "loss": 0.8966,
+ "step": 912
+ },
+ {
+ "epoch": 5.217142857142857,
+ "grad_norm": 25.240903854370117,
+ "learning_rate": 4.975873015873016e-05,
+ "loss": 0.9683,
+ "step": 913
+ },
+ {
+ "epoch": 5.222857142857142,
+ "grad_norm": 19.700618743896484,
+ "learning_rate": 4.9752380952380956e-05,
+ "loss": 1.0708,
+ "step": 914
+ },
+ {
+ "epoch": 5.228571428571429,
+ "grad_norm": 39.732566833496094,
+ "learning_rate": 4.974603174603175e-05,
+ "loss": 0.5733,
+ "step": 915
+ },
+ {
+ "epoch": 5.234285714285714,
+ "grad_norm": 28.90740203857422,
+ "learning_rate": 4.973968253968254e-05,
+ "loss": 0.7555,
+ "step": 916
+ },
+ {
+ "epoch": 5.24,
+ "grad_norm": 63.37759017944336,
+ "learning_rate": 4.973333333333334e-05,
+ "loss": 0.8653,
+ "step": 917
+ },
+ {
+ "epoch": 5.2457142857142856,
+ "grad_norm": 59.77758026123047,
+ "learning_rate": 4.9726984126984126e-05,
+ "loss": 0.5666,
+ "step": 918
+ },
+ {
+ "epoch": 5.251428571428572,
+ "grad_norm": 32.72135543823242,
+ "learning_rate": 4.9720634920634926e-05,
+ "loss": 0.6469,
+ "step": 919
+ },
+ {
+ "epoch": 5.257142857142857,
+ "grad_norm": 50.85166931152344,
+ "learning_rate": 4.971428571428572e-05,
+ "loss": 1.2446,
+ "step": 920
+ },
+ {
+ "epoch": 5.2628571428571425,
+ "grad_norm": 41.593135833740234,
+ "learning_rate": 4.970793650793651e-05,
+ "loss": 1.0042,
+ "step": 921
+ },
+ {
+ "epoch": 5.268571428571429,
+ "grad_norm": 90.62390899658203,
+ "learning_rate": 4.9701587301587304e-05,
+ "loss": 1.2134,
+ "step": 922
+ },
+ {
+ "epoch": 5.274285714285714,
+ "grad_norm": 53.12033462524414,
+ "learning_rate": 4.96952380952381e-05,
+ "loss": 0.7162,
+ "step": 923
+ },
+ {
+ "epoch": 5.28,
+ "grad_norm": 24.408414840698242,
+ "learning_rate": 4.968888888888889e-05,
+ "loss": 0.7488,
+ "step": 924
+ },
+ {
+ "epoch": 5.285714285714286,
+ "grad_norm": 73.4721450805664,
+ "learning_rate": 4.968253968253969e-05,
+ "loss": 0.6289,
+ "step": 925
+ },
+ {
+ "epoch": 5.291428571428572,
+ "grad_norm": 40.17009353637695,
+ "learning_rate": 4.9676190476190474e-05,
+ "loss": 0.7478,
+ "step": 926
+ },
+ {
+ "epoch": 5.297142857142857,
+ "grad_norm": 48.18302536010742,
+ "learning_rate": 4.9669841269841274e-05,
+ "loss": 0.7197,
+ "step": 927
+ },
+ {
+ "epoch": 5.3028571428571425,
+ "grad_norm": 66.0279312133789,
+ "learning_rate": 4.9663492063492066e-05,
+ "loss": 0.9326,
+ "step": 928
+ },
+ {
+ "epoch": 5.308571428571429,
+ "grad_norm": 25.411184310913086,
+ "learning_rate": 4.965714285714286e-05,
+ "loss": 0.9918,
+ "step": 929
+ },
+ {
+ "epoch": 5.314285714285714,
+ "grad_norm": 63.230499267578125,
+ "learning_rate": 4.965079365079365e-05,
+ "loss": 0.8927,
+ "step": 930
+ },
+ {
+ "epoch": 5.32,
+ "grad_norm": 44.250484466552734,
+ "learning_rate": 4.964444444444445e-05,
+ "loss": 0.9199,
+ "step": 931
+ },
+ {
+ "epoch": 5.325714285714286,
+ "grad_norm": 51.27559280395508,
+ "learning_rate": 4.963809523809524e-05,
+ "loss": 0.8719,
+ "step": 932
+ },
+ {
+ "epoch": 5.331428571428571,
+ "grad_norm": 59.98835372924805,
+ "learning_rate": 4.9631746031746036e-05,
+ "loss": 0.8632,
+ "step": 933
+ },
+ {
+ "epoch": 5.337142857142857,
+ "grad_norm": 37.64426040649414,
+ "learning_rate": 4.962539682539683e-05,
+ "loss": 1.1554,
+ "step": 934
+ },
+ {
+ "epoch": 5.3428571428571425,
+ "grad_norm": 41.51221466064453,
+ "learning_rate": 4.961904761904762e-05,
+ "loss": 0.8295,
+ "step": 935
+ },
+ {
+ "epoch": 5.348571428571429,
+ "grad_norm": 46.496395111083984,
+ "learning_rate": 4.9612698412698414e-05,
+ "loss": 0.643,
+ "step": 936
+ },
+ {
+ "epoch": 5.354285714285714,
+ "grad_norm": 32.01964569091797,
+ "learning_rate": 4.960634920634921e-05,
+ "loss": 0.8428,
+ "step": 937
+ },
+ {
+ "epoch": 5.36,
+ "grad_norm": 24.820205688476562,
+ "learning_rate": 4.96e-05,
+ "loss": 0.5908,
+ "step": 938
+ },
+ {
+ "epoch": 5.365714285714286,
+ "grad_norm": 26.27269744873047,
+ "learning_rate": 4.95936507936508e-05,
+ "loss": 0.8872,
+ "step": 939
+ },
+ {
+ "epoch": 5.371428571428572,
+ "grad_norm": 30.581790924072266,
+ "learning_rate": 4.9587301587301585e-05,
+ "loss": 0.765,
+ "step": 940
+ },
+ {
+ "epoch": 5.377142857142857,
+ "grad_norm": 43.756492614746094,
+ "learning_rate": 4.9580952380952384e-05,
+ "loss": 1.0176,
+ "step": 941
+ },
+ {
+ "epoch": 5.382857142857143,
+ "grad_norm": 31.745285034179688,
+ "learning_rate": 4.957460317460318e-05,
+ "loss": 0.6567,
+ "step": 942
+ },
+ {
+ "epoch": 5.388571428571429,
+ "grad_norm": 29.369361877441406,
+ "learning_rate": 4.956825396825397e-05,
+ "loss": 0.9612,
+ "step": 943
+ },
+ {
+ "epoch": 5.394285714285714,
+ "grad_norm": 38.0923957824707,
+ "learning_rate": 4.956190476190476e-05,
+ "loss": 1.0614,
+ "step": 944
+ },
+ {
+ "epoch": 5.4,
+ "grad_norm": 36.38419723510742,
+ "learning_rate": 4.955555555555556e-05,
+ "loss": 0.8156,
+ "step": 945
+ },
+ {
+ "epoch": 5.405714285714286,
+ "grad_norm": 47.70192337036133,
+ "learning_rate": 4.9549206349206354e-05,
+ "loss": 0.8118,
+ "step": 946
+ },
+ {
+ "epoch": 5.411428571428571,
+ "grad_norm": 48.491085052490234,
+ "learning_rate": 4.954285714285715e-05,
+ "loss": 0.9573,
+ "step": 947
+ },
+ {
+ "epoch": 5.417142857142857,
+ "grad_norm": 59.52389907836914,
+ "learning_rate": 4.953650793650794e-05,
+ "loss": 0.813,
+ "step": 948
+ },
+ {
+ "epoch": 5.422857142857143,
+ "grad_norm": 39.79739761352539,
+ "learning_rate": 4.953015873015873e-05,
+ "loss": 0.7645,
+ "step": 949
+ },
+ {
+ "epoch": 5.428571428571429,
+ "grad_norm": 45.72932052612305,
+ "learning_rate": 4.9523809523809525e-05,
+ "loss": 0.6376,
+ "step": 950
+ },
+ {
+ "epoch": 5.434285714285714,
+ "grad_norm": 34.376991271972656,
+ "learning_rate": 4.951746031746032e-05,
+ "loss": 0.7663,
+ "step": 951
+ },
+ {
+ "epoch": 5.44,
+ "grad_norm": 22.378835678100586,
+ "learning_rate": 4.951111111111112e-05,
+ "loss": 0.7292,
+ "step": 952
+ },
+ {
+ "epoch": 5.445714285714286,
+ "grad_norm": 1413.2232666015625,
+ "learning_rate": 4.950476190476191e-05,
+ "loss": 0.6517,
+ "step": 953
+ },
+ {
+ "epoch": 5.451428571428571,
+ "grad_norm": 52.54072189331055,
+ "learning_rate": 4.94984126984127e-05,
+ "loss": 0.7968,
+ "step": 954
+ },
+ {
+ "epoch": 5.457142857142857,
+ "grad_norm": 53.125953674316406,
+ "learning_rate": 4.9492063492063495e-05,
+ "loss": 0.753,
+ "step": 955
+ },
+ {
+ "epoch": 5.462857142857143,
+ "grad_norm": 44.3760986328125,
+ "learning_rate": 4.9485714285714294e-05,
+ "loss": 0.6988,
+ "step": 956
+ },
+ {
+ "epoch": 5.468571428571429,
+ "grad_norm": 38.103187561035156,
+ "learning_rate": 4.947936507936508e-05,
+ "loss": 0.9433,
+ "step": 957
+ },
+ {
+ "epoch": 5.474285714285714,
+ "grad_norm": 57.01421356201172,
+ "learning_rate": 4.947301587301588e-05,
+ "loss": 0.6795,
+ "step": 958
+ },
+ {
+ "epoch": 5.48,
+ "grad_norm": 47.392242431640625,
+ "learning_rate": 4.9466666666666665e-05,
+ "loss": 0.7837,
+ "step": 959
+ },
+ {
+ "epoch": 5.485714285714286,
+ "grad_norm": 21.470067977905273,
+ "learning_rate": 4.9460317460317465e-05,
+ "loss": 1.0356,
+ "step": 960
+ },
+ {
+ "epoch": 5.491428571428571,
+ "grad_norm": 28.541791915893555,
+ "learning_rate": 4.945396825396826e-05,
+ "loss": 0.6212,
+ "step": 961
+ },
+ {
+ "epoch": 5.497142857142857,
+ "grad_norm": 37.67040252685547,
+ "learning_rate": 4.944761904761905e-05,
+ "loss": 0.6977,
+ "step": 962
+ },
+ {
+ "epoch": 5.502857142857143,
+ "grad_norm": 67.51469421386719,
+ "learning_rate": 4.944126984126984e-05,
+ "loss": 0.8533,
+ "step": 963
+ },
+ {
+ "epoch": 5.508571428571429,
+ "grad_norm": 78.92288208007812,
+ "learning_rate": 4.943492063492064e-05,
+ "loss": 0.7135,
+ "step": 964
+ },
+ {
+ "epoch": 5.514285714285714,
+ "grad_norm": 60.324790954589844,
+ "learning_rate": 4.942857142857143e-05,
+ "loss": 1.0517,
+ "step": 965
+ },
+ {
+ "epoch": 5.52,
+ "grad_norm": 66.66876220703125,
+ "learning_rate": 4.942222222222223e-05,
+ "loss": 0.7924,
+ "step": 966
+ },
+ {
+ "epoch": 5.525714285714286,
+ "grad_norm": 55.20885467529297,
+ "learning_rate": 4.941587301587302e-05,
+ "loss": 1.2296,
+ "step": 967
+ },
+ {
+ "epoch": 5.531428571428571,
+ "grad_norm": 40.8668212890625,
+ "learning_rate": 4.940952380952381e-05,
+ "loss": 0.8239,
+ "step": 968
+ },
+ {
+ "epoch": 5.537142857142857,
+ "grad_norm": 37.452362060546875,
+ "learning_rate": 4.9403174603174605e-05,
+ "loss": 0.665,
+ "step": 969
+ },
+ {
+ "epoch": 5.542857142857143,
+ "grad_norm": 78.19581604003906,
+ "learning_rate": 4.93968253968254e-05,
+ "loss": 0.9963,
+ "step": 970
+ },
+ {
+ "epoch": 5.548571428571429,
+ "grad_norm": 43.955406188964844,
+ "learning_rate": 4.939047619047619e-05,
+ "loss": 0.674,
+ "step": 971
+ },
+ {
+ "epoch": 5.554285714285714,
+ "grad_norm": 257.32330322265625,
+ "learning_rate": 4.938412698412699e-05,
+ "loss": 0.9603,
+ "step": 972
+ },
+ {
+ "epoch": 5.5600000000000005,
+ "grad_norm": 97.81949615478516,
+ "learning_rate": 4.9377777777777776e-05,
+ "loss": 0.824,
+ "step": 973
+ },
+ {
+ "epoch": 5.565714285714286,
+ "grad_norm": 66.90892028808594,
+ "learning_rate": 4.9371428571428575e-05,
+ "loss": 0.8135,
+ "step": 974
+ },
+ {
+ "epoch": 5.571428571428571,
+ "grad_norm": 49.98130416870117,
+ "learning_rate": 4.936507936507937e-05,
+ "loss": 1.0069,
+ "step": 975
+ },
+ {
+ "epoch": 5.577142857142857,
+ "grad_norm": 42.15127944946289,
+ "learning_rate": 4.935873015873016e-05,
+ "loss": 0.9639,
+ "step": 976
+ },
+ {
+ "epoch": 5.582857142857143,
+ "grad_norm": 18.45213508605957,
+ "learning_rate": 4.935238095238095e-05,
+ "loss": 0.7923,
+ "step": 977
+ },
+ {
+ "epoch": 5.588571428571429,
+ "grad_norm": 28.863927841186523,
+ "learning_rate": 4.934603174603175e-05,
+ "loss": 0.7692,
+ "step": 978
+ },
+ {
+ "epoch": 5.594285714285714,
+ "grad_norm": 24.05852508544922,
+ "learning_rate": 4.933968253968254e-05,
+ "loss": 0.7133,
+ "step": 979
+ },
+ {
+ "epoch": 5.6,
+ "grad_norm": 19.306772232055664,
+ "learning_rate": 4.933333333333334e-05,
+ "loss": 1.0405,
+ "step": 980
+ },
+ {
+ "epoch": 5.605714285714286,
+ "grad_norm": 41.536617279052734,
+ "learning_rate": 4.932698412698413e-05,
+ "loss": 0.829,
+ "step": 981
+ },
+ {
+ "epoch": 5.611428571428571,
+ "grad_norm": 48.1365966796875,
+ "learning_rate": 4.932063492063492e-05,
+ "loss": 0.9519,
+ "step": 982
+ },
+ {
+ "epoch": 5.617142857142857,
+ "grad_norm": 32.03211975097656,
+ "learning_rate": 4.9314285714285716e-05,
+ "loss": 0.7137,
+ "step": 983
+ },
+ {
+ "epoch": 5.622857142857143,
+ "grad_norm": 26.54397964477539,
+ "learning_rate": 4.930793650793651e-05,
+ "loss": 0.8798,
+ "step": 984
+ },
+ {
+ "epoch": 5.628571428571428,
+ "grad_norm": 66.23500061035156,
+ "learning_rate": 4.930158730158731e-05,
+ "loss": 0.7708,
+ "step": 985
+ },
+ {
+ "epoch": 5.634285714285714,
+ "grad_norm": 82.78375244140625,
+ "learning_rate": 4.92952380952381e-05,
+ "loss": 0.8031,
+ "step": 986
+ },
+ {
+ "epoch": 5.64,
+ "grad_norm": 69.4685287475586,
+ "learning_rate": 4.928888888888889e-05,
+ "loss": 0.9085,
+ "step": 987
+ },
+ {
+ "epoch": 5.645714285714286,
+ "grad_norm": 41.04307556152344,
+ "learning_rate": 4.9282539682539685e-05,
+ "loss": 0.6301,
+ "step": 988
+ },
+ {
+ "epoch": 5.651428571428571,
+ "grad_norm": 96.05631256103516,
+ "learning_rate": 4.927619047619048e-05,
+ "loss": 0.7981,
+ "step": 989
+ },
+ {
+ "epoch": 5.6571428571428575,
+ "grad_norm": 37.76736068725586,
+ "learning_rate": 4.926984126984127e-05,
+ "loss": 0.7604,
+ "step": 990
+ },
+ {
+ "epoch": 5.662857142857143,
+ "grad_norm": 64.40799713134766,
+ "learning_rate": 4.926349206349207e-05,
+ "loss": 0.8603,
+ "step": 991
+ },
+ {
+ "epoch": 5.668571428571429,
+ "grad_norm": 22.061336517333984,
+ "learning_rate": 4.9257142857142856e-05,
+ "loss": 0.8048,
+ "step": 992
+ },
+ {
+ "epoch": 5.674285714285714,
+ "grad_norm": 46.94278335571289,
+ "learning_rate": 4.9250793650793655e-05,
+ "loss": 0.7711,
+ "step": 993
+ },
+ {
+ "epoch": 5.68,
+ "grad_norm": 47.216609954833984,
+ "learning_rate": 4.924444444444445e-05,
+ "loss": 1.5314,
+ "step": 994
+ },
+ {
+ "epoch": 5.685714285714286,
+ "grad_norm": 24.833250045776367,
+ "learning_rate": 4.923809523809524e-05,
+ "loss": 0.6962,
+ "step": 995
+ },
+ {
+ "epoch": 5.691428571428571,
+ "grad_norm": 115.3731918334961,
+ "learning_rate": 4.923174603174603e-05,
+ "loss": 0.9526,
+ "step": 996
+ },
+ {
+ "epoch": 5.6971428571428575,
+ "grad_norm": 55.14940643310547,
+ "learning_rate": 4.922539682539683e-05,
+ "loss": 0.7778,
+ "step": 997
+ },
+ {
+ "epoch": 5.702857142857143,
+ "grad_norm": 25.706256866455078,
+ "learning_rate": 4.921904761904762e-05,
+ "loss": 0.6183,
+ "step": 998
+ },
+ {
+ "epoch": 5.708571428571428,
+ "grad_norm": 35.29422378540039,
+ "learning_rate": 4.921269841269842e-05,
+ "loss": 0.6405,
+ "step": 999
+ },
+ {
+ "epoch": 5.714285714285714,
+ "grad_norm": 341.161865234375,
+ "learning_rate": 4.9206349206349204e-05,
+ "loss": 0.7592,
+ "step": 1000
+ },
+ {
+ "epoch": 5.72,
+ "grad_norm": 60.03837585449219,
+ "learning_rate": 4.92e-05,
+ "loss": 0.5991,
+ "step": 1001
+ },
+ {
+ "epoch": 5.725714285714286,
+ "grad_norm": 41.89161682128906,
+ "learning_rate": 4.9193650793650796e-05,
+ "loss": 0.8142,
+ "step": 1002
+ },
+ {
+ "epoch": 5.731428571428571,
+ "grad_norm": 34.78620910644531,
+ "learning_rate": 4.918730158730159e-05,
+ "loss": 0.7668,
+ "step": 1003
+ },
+ {
+ "epoch": 5.737142857142857,
+ "grad_norm": 34.432884216308594,
+ "learning_rate": 4.918095238095238e-05,
+ "loss": 0.7478,
+ "step": 1004
+ },
+ {
+ "epoch": 5.742857142857143,
+ "grad_norm": 49.911746978759766,
+ "learning_rate": 4.917460317460318e-05,
+ "loss": 0.8738,
+ "step": 1005
+ },
+ {
+ "epoch": 5.748571428571428,
+ "grad_norm": 46.06341552734375,
+ "learning_rate": 4.9168253968253966e-05,
+ "loss": 0.6954,
+ "step": 1006
+ },
+ {
+ "epoch": 5.7542857142857144,
+ "grad_norm": 55.89427947998047,
+ "learning_rate": 4.9161904761904766e-05,
+ "loss": 0.5505,
+ "step": 1007
+ },
+ {
+ "epoch": 5.76,
+ "grad_norm": 36.81814193725586,
+ "learning_rate": 4.915555555555556e-05,
+ "loss": 0.6636,
+ "step": 1008
+ },
+ {
+ "epoch": 5.765714285714286,
+ "grad_norm": 57.308223724365234,
+ "learning_rate": 4.914920634920635e-05,
+ "loss": 0.7844,
+ "step": 1009
+ },
+ {
+ "epoch": 5.771428571428571,
+ "grad_norm": 55.2410774230957,
+ "learning_rate": 4.9142857142857144e-05,
+ "loss": 0.7025,
+ "step": 1010
+ },
+ {
+ "epoch": 5.777142857142858,
+ "grad_norm": 47.4276123046875,
+ "learning_rate": 4.9136507936507936e-05,
+ "loss": 0.8052,
+ "step": 1011
+ },
+ {
+ "epoch": 5.782857142857143,
+ "grad_norm": 44.55537033081055,
+ "learning_rate": 4.913015873015873e-05,
+ "loss": 0.759,
+ "step": 1012
+ },
+ {
+ "epoch": 5.788571428571428,
+ "grad_norm": 425.6323547363281,
+ "learning_rate": 4.912380952380953e-05,
+ "loss": 0.7912,
+ "step": 1013
+ },
+ {
+ "epoch": 5.7942857142857145,
+ "grad_norm": 22.003156661987305,
+ "learning_rate": 4.9117460317460314e-05,
+ "loss": 0.7615,
+ "step": 1014
+ },
+ {
+ "epoch": 5.8,
+ "grad_norm": 43.3438720703125,
+ "learning_rate": 4.9111111111111114e-05,
+ "loss": 0.7115,
+ "step": 1015
+ },
+ {
+ "epoch": 5.805714285714286,
+ "grad_norm": 77.6767578125,
+ "learning_rate": 4.9104761904761906e-05,
+ "loss": 0.9186,
+ "step": 1016
+ },
+ {
+ "epoch": 5.811428571428571,
+ "grad_norm": 57.89418029785156,
+ "learning_rate": 4.90984126984127e-05,
+ "loss": 0.759,
+ "step": 1017
+ },
+ {
+ "epoch": 5.817142857142857,
+ "grad_norm": 21.17589569091797,
+ "learning_rate": 4.909206349206349e-05,
+ "loss": 0.9768,
+ "step": 1018
+ },
+ {
+ "epoch": 5.822857142857143,
+ "grad_norm": 33.41554641723633,
+ "learning_rate": 4.908571428571429e-05,
+ "loss": 0.8376,
+ "step": 1019
+ },
+ {
+ "epoch": 5.828571428571428,
+ "grad_norm": 21.341121673583984,
+ "learning_rate": 4.9079365079365084e-05,
+ "loss": 0.6253,
+ "step": 1020
+ },
+ {
+ "epoch": 5.8342857142857145,
+ "grad_norm": 79.05905151367188,
+ "learning_rate": 4.9073015873015876e-05,
+ "loss": 0.8679,
+ "step": 1021
+ },
+ {
+ "epoch": 5.84,
+ "grad_norm": 34.44978332519531,
+ "learning_rate": 4.906666666666667e-05,
+ "loss": 1.0964,
+ "step": 1022
+ },
+ {
+ "epoch": 5.845714285714286,
+ "grad_norm": 27.523412704467773,
+ "learning_rate": 4.906031746031746e-05,
+ "loss": 0.5364,
+ "step": 1023
+ },
+ {
+ "epoch": 5.851428571428571,
+ "grad_norm": 47.8846435546875,
+ "learning_rate": 4.905396825396826e-05,
+ "loss": 0.7681,
+ "step": 1024
+ },
+ {
+ "epoch": 5.857142857142857,
+ "grad_norm": 60.275428771972656,
+ "learning_rate": 4.904761904761905e-05,
+ "loss": 0.7527,
+ "step": 1025
+ },
+ {
+ "epoch": 5.862857142857143,
+ "grad_norm": 31.929855346679688,
+ "learning_rate": 4.9041269841269846e-05,
+ "loss": 0.8223,
+ "step": 1026
+ },
+ {
+ "epoch": 5.868571428571428,
+ "grad_norm": 56.61876678466797,
+ "learning_rate": 4.903492063492064e-05,
+ "loss": 0.7186,
+ "step": 1027
+ },
+ {
+ "epoch": 5.8742857142857146,
+ "grad_norm": 33.72767639160156,
+ "learning_rate": 4.902857142857143e-05,
+ "loss": 0.8864,
+ "step": 1028
+ },
+ {
+ "epoch": 5.88,
+ "grad_norm": 52.361454010009766,
+ "learning_rate": 4.9022222222222224e-05,
+ "loss": 0.5077,
+ "step": 1029
+ },
+ {
+ "epoch": 5.885714285714286,
+ "grad_norm": 18.788724899291992,
+ "learning_rate": 4.9015873015873024e-05,
+ "loss": 0.6581,
+ "step": 1030
+ },
+ {
+ "epoch": 5.8914285714285715,
+ "grad_norm": 79.2744369506836,
+ "learning_rate": 4.900952380952381e-05,
+ "loss": 0.8661,
+ "step": 1031
+ },
+ {
+ "epoch": 5.897142857142857,
+ "grad_norm": 239.77687072753906,
+ "learning_rate": 4.900317460317461e-05,
+ "loss": 0.8039,
+ "step": 1032
+ },
+ {
+ "epoch": 5.902857142857143,
+ "grad_norm": 65.74775695800781,
+ "learning_rate": 4.8996825396825395e-05,
+ "loss": 0.7899,
+ "step": 1033
+ },
+ {
+ "epoch": 5.908571428571428,
+ "grad_norm": 34.65435791015625,
+ "learning_rate": 4.8990476190476194e-05,
+ "loss": 0.6254,
+ "step": 1034
+ },
+ {
+ "epoch": 5.914285714285715,
+ "grad_norm": 137.52218627929688,
+ "learning_rate": 4.898412698412699e-05,
+ "loss": 0.8915,
+ "step": 1035
+ },
+ {
+ "epoch": 5.92,
+ "grad_norm": 54.810523986816406,
+ "learning_rate": 4.897777777777778e-05,
+ "loss": 0.8686,
+ "step": 1036
+ },
+ {
+ "epoch": 5.925714285714285,
+ "grad_norm": 30.277795791625977,
+ "learning_rate": 4.897142857142857e-05,
+ "loss": 0.7904,
+ "step": 1037
+ },
+ {
+ "epoch": 5.9314285714285715,
+ "grad_norm": 42.795936584472656,
+ "learning_rate": 4.896507936507937e-05,
+ "loss": 0.6607,
+ "step": 1038
+ },
+ {
+ "epoch": 5.937142857142857,
+ "grad_norm": 53.916908264160156,
+ "learning_rate": 4.895873015873016e-05,
+ "loss": 0.6555,
+ "step": 1039
+ },
+ {
+ "epoch": 5.942857142857143,
+ "grad_norm": 45.74040985107422,
+ "learning_rate": 4.895238095238096e-05,
+ "loss": 0.6064,
+ "step": 1040
+ },
+ {
+ "epoch": 5.948571428571428,
+ "grad_norm": 40.435882568359375,
+ "learning_rate": 4.894603174603175e-05,
+ "loss": 0.7522,
+ "step": 1041
+ },
+ {
+ "epoch": 5.954285714285715,
+ "grad_norm": 61.07969665527344,
+ "learning_rate": 4.893968253968254e-05,
+ "loss": 0.9887,
+ "step": 1042
+ },
+ {
+ "epoch": 5.96,
+ "grad_norm": 46.681976318359375,
+ "learning_rate": 4.8933333333333335e-05,
+ "loss": 0.8548,
+ "step": 1043
+ },
+ {
+ "epoch": 5.965714285714286,
+ "grad_norm": 35.48418045043945,
+ "learning_rate": 4.892698412698413e-05,
+ "loss": 0.6326,
+ "step": 1044
+ },
+ {
+ "epoch": 5.9714285714285715,
+ "grad_norm": 56.91579818725586,
+ "learning_rate": 4.892063492063492e-05,
+ "loss": 0.8085,
+ "step": 1045
+ },
+ {
+ "epoch": 5.977142857142857,
+ "grad_norm": 26.985816955566406,
+ "learning_rate": 4.891428571428572e-05,
+ "loss": 0.8397,
+ "step": 1046
+ },
+ {
+ "epoch": 5.982857142857143,
+ "grad_norm": 48.0628662109375,
+ "learning_rate": 4.8907936507936505e-05,
+ "loss": 0.78,
+ "step": 1047
+ },
+ {
+ "epoch": 5.988571428571428,
+ "grad_norm": 59.83071517944336,
+ "learning_rate": 4.8901587301587305e-05,
+ "loss": 0.7693,
+ "step": 1048
+ },
+ {
+ "epoch": 5.994285714285715,
+ "grad_norm": 23.15542984008789,
+ "learning_rate": 4.88952380952381e-05,
+ "loss": 0.6467,
+ "step": 1049
+ },
+ {
+ "epoch": 6.0,
+ "grad_norm": 50.857933044433594,
+ "learning_rate": 4.888888888888889e-05,
+ "loss": 0.7164,
+ "step": 1050
+ },
+ {
+ "epoch": 6.0,
+ "eval_classes": 0,
+ "eval_loss": 0.8369960188865662,
+ "eval_map": 0.852,
+ "eval_map_50": 0.9449,
+ "eval_map_75": 0.924,
+ "eval_map_large": 0.852,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.852,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7546,
+ "eval_mar_10": 0.9413,
+ "eval_mar_100": 0.9635,
+ "eval_mar_100_per_class": 0.9635,
+ "eval_mar_large": 0.9635,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 17.0881,
+ "eval_samples_per_second": 17.205,
+ "eval_steps_per_second": 2.165,
+ "step": 1050
+ },
+ {
+ "epoch": 6.005714285714285,
+ "grad_norm": 24.954538345336914,
+ "learning_rate": 4.888253968253968e-05,
+ "loss": 0.7493,
+ "step": 1051
+ },
+ {
+ "epoch": 6.011428571428572,
+ "grad_norm": 23.112192153930664,
+ "learning_rate": 4.887619047619048e-05,
+ "loss": 0.6879,
+ "step": 1052
+ },
+ {
+ "epoch": 6.017142857142857,
+ "grad_norm": 102.9321517944336,
+ "learning_rate": 4.886984126984127e-05,
+ "loss": 0.7954,
+ "step": 1053
+ },
+ {
+ "epoch": 6.022857142857143,
+ "grad_norm": 33.70575714111328,
+ "learning_rate": 4.886349206349207e-05,
+ "loss": 0.8101,
+ "step": 1054
+ },
+ {
+ "epoch": 6.0285714285714285,
+ "grad_norm": 39.177547454833984,
+ "learning_rate": 4.885714285714286e-05,
+ "loss": 0.711,
+ "step": 1055
+ },
+ {
+ "epoch": 6.034285714285715,
+ "grad_norm": 55.89052963256836,
+ "learning_rate": 4.885079365079365e-05,
+ "loss": 0.8373,
+ "step": 1056
+ },
+ {
+ "epoch": 6.04,
+ "grad_norm": 37.87918472290039,
+ "learning_rate": 4.8844444444444445e-05,
+ "loss": 0.6406,
+ "step": 1057
+ },
+ {
+ "epoch": 6.045714285714285,
+ "grad_norm": 37.094642639160156,
+ "learning_rate": 4.883809523809524e-05,
+ "loss": 0.853,
+ "step": 1058
+ },
+ {
+ "epoch": 6.051428571428572,
+ "grad_norm": 52.7163200378418,
+ "learning_rate": 4.883174603174604e-05,
+ "loss": 0.5881,
+ "step": 1059
+ },
+ {
+ "epoch": 6.057142857142857,
+ "grad_norm": 31.145267486572266,
+ "learning_rate": 4.882539682539683e-05,
+ "loss": 0.6518,
+ "step": 1060
+ },
+ {
+ "epoch": 6.062857142857143,
+ "grad_norm": 68.63578796386719,
+ "learning_rate": 4.881904761904762e-05,
+ "loss": 0.7544,
+ "step": 1061
+ },
+ {
+ "epoch": 6.0685714285714285,
+ "grad_norm": 82.92304992675781,
+ "learning_rate": 4.8812698412698415e-05,
+ "loss": 0.59,
+ "step": 1062
+ },
+ {
+ "epoch": 6.074285714285715,
+ "grad_norm": 63.362648010253906,
+ "learning_rate": 4.8806349206349214e-05,
+ "loss": 0.6871,
+ "step": 1063
+ },
+ {
+ "epoch": 6.08,
+ "grad_norm": 52.70489501953125,
+ "learning_rate": 4.88e-05,
+ "loss": 0.649,
+ "step": 1064
+ },
+ {
+ "epoch": 6.085714285714285,
+ "grad_norm": 47.088253021240234,
+ "learning_rate": 4.87936507936508e-05,
+ "loss": 0.634,
+ "step": 1065
+ },
+ {
+ "epoch": 6.091428571428572,
+ "grad_norm": 39.26316833496094,
+ "learning_rate": 4.8787301587301586e-05,
+ "loss": 0.7083,
+ "step": 1066
+ },
+ {
+ "epoch": 6.097142857142857,
+ "grad_norm": 36.16307830810547,
+ "learning_rate": 4.8780952380952385e-05,
+ "loss": 0.776,
+ "step": 1067
+ },
+ {
+ "epoch": 6.102857142857143,
+ "grad_norm": 31.930225372314453,
+ "learning_rate": 4.877460317460318e-05,
+ "loss": 0.7513,
+ "step": 1068
+ },
+ {
+ "epoch": 6.1085714285714285,
+ "grad_norm": 77.57910919189453,
+ "learning_rate": 4.876825396825397e-05,
+ "loss": 0.5557,
+ "step": 1069
+ },
+ {
+ "epoch": 6.114285714285714,
+ "grad_norm": 57.64714431762695,
+ "learning_rate": 4.876190476190476e-05,
+ "loss": 0.644,
+ "step": 1070
+ },
+ {
+ "epoch": 6.12,
+ "grad_norm": 162.1901092529297,
+ "learning_rate": 4.875555555555556e-05,
+ "loss": 0.6925,
+ "step": 1071
+ },
+ {
+ "epoch": 6.1257142857142854,
+ "grad_norm": 19.613445281982422,
+ "learning_rate": 4.874920634920635e-05,
+ "loss": 0.6037,
+ "step": 1072
+ },
+ {
+ "epoch": 6.131428571428572,
+ "grad_norm": 41.59284973144531,
+ "learning_rate": 4.874285714285715e-05,
+ "loss": 1.1079,
+ "step": 1073
+ },
+ {
+ "epoch": 6.137142857142857,
+ "grad_norm": 79.42727661132812,
+ "learning_rate": 4.873650793650794e-05,
+ "loss": 0.4649,
+ "step": 1074
+ },
+ {
+ "epoch": 6.142857142857143,
+ "grad_norm": 26.3876953125,
+ "learning_rate": 4.873015873015873e-05,
+ "loss": 0.5414,
+ "step": 1075
+ },
+ {
+ "epoch": 6.148571428571429,
+ "grad_norm": 132.17300415039062,
+ "learning_rate": 4.8723809523809526e-05,
+ "loss": 0.9906,
+ "step": 1076
+ },
+ {
+ "epoch": 6.154285714285714,
+ "grad_norm": 35.66273880004883,
+ "learning_rate": 4.871746031746032e-05,
+ "loss": 0.8514,
+ "step": 1077
+ },
+ {
+ "epoch": 6.16,
+ "grad_norm": 70.73888397216797,
+ "learning_rate": 4.871111111111111e-05,
+ "loss": 0.6043,
+ "step": 1078
+ },
+ {
+ "epoch": 6.1657142857142855,
+ "grad_norm": 12.699053764343262,
+ "learning_rate": 4.870476190476191e-05,
+ "loss": 0.6757,
+ "step": 1079
+ },
+ {
+ "epoch": 6.171428571428572,
+ "grad_norm": 38.0023078918457,
+ "learning_rate": 4.8698412698412696e-05,
+ "loss": 0.5962,
+ "step": 1080
+ },
+ {
+ "epoch": 6.177142857142857,
+ "grad_norm": 39.17434310913086,
+ "learning_rate": 4.8692063492063495e-05,
+ "loss": 0.8336,
+ "step": 1081
+ },
+ {
+ "epoch": 6.182857142857143,
+ "grad_norm": 74.63424682617188,
+ "learning_rate": 4.868571428571429e-05,
+ "loss": 0.9345,
+ "step": 1082
+ },
+ {
+ "epoch": 6.188571428571429,
+ "grad_norm": 53.93844223022461,
+ "learning_rate": 4.867936507936508e-05,
+ "loss": 1.0408,
+ "step": 1083
+ },
+ {
+ "epoch": 6.194285714285714,
+ "grad_norm": 66.69645690917969,
+ "learning_rate": 4.867301587301587e-05,
+ "loss": 0.8289,
+ "step": 1084
+ },
+ {
+ "epoch": 6.2,
+ "grad_norm": 96.8067626953125,
+ "learning_rate": 4.866666666666667e-05,
+ "loss": 1.3159,
+ "step": 1085
+ },
+ {
+ "epoch": 6.2057142857142855,
+ "grad_norm": 59.63151168823242,
+ "learning_rate": 4.866031746031746e-05,
+ "loss": 0.8477,
+ "step": 1086
+ },
+ {
+ "epoch": 6.211428571428572,
+ "grad_norm": 45.384647369384766,
+ "learning_rate": 4.865396825396826e-05,
+ "loss": 0.9704,
+ "step": 1087
+ },
+ {
+ "epoch": 6.217142857142857,
+ "grad_norm": 54.9943733215332,
+ "learning_rate": 4.864761904761905e-05,
+ "loss": 0.8903,
+ "step": 1088
+ },
+ {
+ "epoch": 6.222857142857142,
+ "grad_norm": 56.89737319946289,
+ "learning_rate": 4.864126984126984e-05,
+ "loss": 0.6687,
+ "step": 1089
+ },
+ {
+ "epoch": 6.228571428571429,
+ "grad_norm": 32.90390396118164,
+ "learning_rate": 4.8634920634920636e-05,
+ "loss": 0.8242,
+ "step": 1090
+ },
+ {
+ "epoch": 6.234285714285714,
+ "grad_norm": 46.840389251708984,
+ "learning_rate": 4.862857142857143e-05,
+ "loss": 0.7521,
+ "step": 1091
+ },
+ {
+ "epoch": 6.24,
+ "grad_norm": 88.7275161743164,
+ "learning_rate": 4.862222222222222e-05,
+ "loss": 0.8655,
+ "step": 1092
+ },
+ {
+ "epoch": 6.2457142857142856,
+ "grad_norm": 58.718074798583984,
+ "learning_rate": 4.861587301587302e-05,
+ "loss": 0.7266,
+ "step": 1093
+ },
+ {
+ "epoch": 6.251428571428572,
+ "grad_norm": 43.9836311340332,
+ "learning_rate": 4.860952380952381e-05,
+ "loss": 0.6816,
+ "step": 1094
+ },
+ {
+ "epoch": 6.257142857142857,
+ "grad_norm": 38.8942985534668,
+ "learning_rate": 4.8603174603174606e-05,
+ "loss": 0.6908,
+ "step": 1095
+ },
+ {
+ "epoch": 6.2628571428571425,
+ "grad_norm": 93.62848663330078,
+ "learning_rate": 4.85968253968254e-05,
+ "loss": 0.6644,
+ "step": 1096
+ },
+ {
+ "epoch": 6.268571428571429,
+ "grad_norm": 58.32928466796875,
+ "learning_rate": 4.859047619047619e-05,
+ "loss": 0.6923,
+ "step": 1097
+ },
+ {
+ "epoch": 6.274285714285714,
+ "grad_norm": 46.724342346191406,
+ "learning_rate": 4.858412698412699e-05,
+ "loss": 0.6886,
+ "step": 1098
+ },
+ {
+ "epoch": 6.28,
+ "grad_norm": 61.18104934692383,
+ "learning_rate": 4.8577777777777776e-05,
+ "loss": 0.6711,
+ "step": 1099
+ },
+ {
+ "epoch": 6.285714285714286,
+ "grad_norm": 25.95333480834961,
+ "learning_rate": 4.8571428571428576e-05,
+ "loss": 0.9204,
+ "step": 1100
+ },
+ {
+ "epoch": 6.291428571428572,
+ "grad_norm": 44.279659271240234,
+ "learning_rate": 4.856507936507937e-05,
+ "loss": 0.468,
+ "step": 1101
+ },
+ {
+ "epoch": 6.297142857142857,
+ "grad_norm": 58.290977478027344,
+ "learning_rate": 4.855873015873016e-05,
+ "loss": 1.0472,
+ "step": 1102
+ },
+ {
+ "epoch": 6.3028571428571425,
+ "grad_norm": 19.661542892456055,
+ "learning_rate": 4.8552380952380954e-05,
+ "loss": 0.6991,
+ "step": 1103
+ },
+ {
+ "epoch": 6.308571428571429,
+ "grad_norm": 56.441322326660156,
+ "learning_rate": 4.854603174603175e-05,
+ "loss": 0.8712,
+ "step": 1104
+ },
+ {
+ "epoch": 6.314285714285714,
+ "grad_norm": 37.64546203613281,
+ "learning_rate": 4.853968253968254e-05,
+ "loss": 0.6974,
+ "step": 1105
+ },
+ {
+ "epoch": 6.32,
+ "grad_norm": 27.978652954101562,
+ "learning_rate": 4.853333333333334e-05,
+ "loss": 0.6804,
+ "step": 1106
+ },
+ {
+ "epoch": 6.325714285714286,
+ "grad_norm": 31.64655303955078,
+ "learning_rate": 4.852698412698413e-05,
+ "loss": 0.5039,
+ "step": 1107
+ },
+ {
+ "epoch": 6.331428571428571,
+ "grad_norm": 48.66095733642578,
+ "learning_rate": 4.8520634920634924e-05,
+ "loss": 0.4374,
+ "step": 1108
+ },
+ {
+ "epoch": 6.337142857142857,
+ "grad_norm": 35.47930908203125,
+ "learning_rate": 4.8514285714285716e-05,
+ "loss": 0.6162,
+ "step": 1109
+ },
+ {
+ "epoch": 6.3428571428571425,
+ "grad_norm": 22.606897354125977,
+ "learning_rate": 4.850793650793651e-05,
+ "loss": 0.8655,
+ "step": 1110
+ },
+ {
+ "epoch": 6.348571428571429,
+ "grad_norm": 46.42298126220703,
+ "learning_rate": 4.85015873015873e-05,
+ "loss": 0.4887,
+ "step": 1111
+ },
+ {
+ "epoch": 6.354285714285714,
+ "grad_norm": 40.30084991455078,
+ "learning_rate": 4.84952380952381e-05,
+ "loss": 0.8729,
+ "step": 1112
+ },
+ {
+ "epoch": 6.36,
+ "grad_norm": 18.799062728881836,
+ "learning_rate": 4.848888888888889e-05,
+ "loss": 0.3812,
+ "step": 1113
+ },
+ {
+ "epoch": 6.365714285714286,
+ "grad_norm": 47.63445281982422,
+ "learning_rate": 4.8482539682539686e-05,
+ "loss": 0.5685,
+ "step": 1114
+ },
+ {
+ "epoch": 6.371428571428572,
+ "grad_norm": 102.43451690673828,
+ "learning_rate": 4.847619047619048e-05,
+ "loss": 1.2137,
+ "step": 1115
+ },
+ {
+ "epoch": 6.377142857142857,
+ "grad_norm": 72.0689926147461,
+ "learning_rate": 4.846984126984127e-05,
+ "loss": 0.7643,
+ "step": 1116
+ },
+ {
+ "epoch": 6.382857142857143,
+ "grad_norm": 44.23721694946289,
+ "learning_rate": 4.8463492063492064e-05,
+ "loss": 0.6036,
+ "step": 1117
+ },
+ {
+ "epoch": 6.388571428571429,
+ "grad_norm": 25.187232971191406,
+ "learning_rate": 4.8457142857142864e-05,
+ "loss": 0.6362,
+ "step": 1118
+ },
+ {
+ "epoch": 6.394285714285714,
+ "grad_norm": 32.348670959472656,
+ "learning_rate": 4.845079365079365e-05,
+ "loss": 0.8032,
+ "step": 1119
+ },
+ {
+ "epoch": 6.4,
+ "grad_norm": 19.9083309173584,
+ "learning_rate": 4.844444444444445e-05,
+ "loss": 0.6215,
+ "step": 1120
+ },
+ {
+ "epoch": 6.405714285714286,
+ "grad_norm": 27.486642837524414,
+ "learning_rate": 4.8438095238095235e-05,
+ "loss": 0.9652,
+ "step": 1121
+ },
+ {
+ "epoch": 6.411428571428571,
+ "grad_norm": 46.83481979370117,
+ "learning_rate": 4.8431746031746034e-05,
+ "loss": 0.5544,
+ "step": 1122
+ },
+ {
+ "epoch": 6.417142857142857,
+ "grad_norm": 43.2258415222168,
+ "learning_rate": 4.842539682539683e-05,
+ "loss": 0.6362,
+ "step": 1123
+ },
+ {
+ "epoch": 6.422857142857143,
+ "grad_norm": 25.14783477783203,
+ "learning_rate": 4.841904761904762e-05,
+ "loss": 1.0612,
+ "step": 1124
+ },
+ {
+ "epoch": 6.428571428571429,
+ "grad_norm": 39.89846420288086,
+ "learning_rate": 4.841269841269841e-05,
+ "loss": 0.6556,
+ "step": 1125
+ },
+ {
+ "epoch": 6.434285714285714,
+ "grad_norm": 31.60659408569336,
+ "learning_rate": 4.840634920634921e-05,
+ "loss": 0.7325,
+ "step": 1126
+ },
+ {
+ "epoch": 6.44,
+ "grad_norm": 37.08771514892578,
+ "learning_rate": 4.8400000000000004e-05,
+ "loss": 0.783,
+ "step": 1127
+ },
+ {
+ "epoch": 6.445714285714286,
+ "grad_norm": 20.82538604736328,
+ "learning_rate": 4.83936507936508e-05,
+ "loss": 0.6206,
+ "step": 1128
+ },
+ {
+ "epoch": 6.451428571428571,
+ "grad_norm": 27.13603973388672,
+ "learning_rate": 4.838730158730159e-05,
+ "loss": 0.7035,
+ "step": 1129
+ },
+ {
+ "epoch": 6.457142857142857,
+ "grad_norm": 55.448936462402344,
+ "learning_rate": 4.838095238095238e-05,
+ "loss": 0.6635,
+ "step": 1130
+ },
+ {
+ "epoch": 6.462857142857143,
+ "grad_norm": 42.173458099365234,
+ "learning_rate": 4.8374603174603175e-05,
+ "loss": 1.2021,
+ "step": 1131
+ },
+ {
+ "epoch": 6.468571428571429,
+ "grad_norm": 288.70526123046875,
+ "learning_rate": 4.836825396825397e-05,
+ "loss": 0.7537,
+ "step": 1132
+ },
+ {
+ "epoch": 6.474285714285714,
+ "grad_norm": 64.1880874633789,
+ "learning_rate": 4.836190476190477e-05,
+ "loss": 0.7211,
+ "step": 1133
+ },
+ {
+ "epoch": 6.48,
+ "grad_norm": 31.468538284301758,
+ "learning_rate": 4.835555555555556e-05,
+ "loss": 0.9399,
+ "step": 1134
+ },
+ {
+ "epoch": 6.485714285714286,
+ "grad_norm": 32.73714065551758,
+ "learning_rate": 4.834920634920635e-05,
+ "loss": 0.6802,
+ "step": 1135
+ },
+ {
+ "epoch": 6.491428571428571,
+ "grad_norm": 45.5434455871582,
+ "learning_rate": 4.8342857142857145e-05,
+ "loss": 0.6964,
+ "step": 1136
+ },
+ {
+ "epoch": 6.497142857142857,
+ "grad_norm": 46.5738410949707,
+ "learning_rate": 4.8336507936507944e-05,
+ "loss": 0.639,
+ "step": 1137
+ },
+ {
+ "epoch": 6.502857142857143,
+ "grad_norm": 39.77674865722656,
+ "learning_rate": 4.833015873015873e-05,
+ "loss": 0.7366,
+ "step": 1138
+ },
+ {
+ "epoch": 6.508571428571429,
+ "grad_norm": 28.013656616210938,
+ "learning_rate": 4.832380952380953e-05,
+ "loss": 0.9194,
+ "step": 1139
+ },
+ {
+ "epoch": 6.514285714285714,
+ "grad_norm": 41.1294059753418,
+ "learning_rate": 4.831746031746032e-05,
+ "loss": 0.797,
+ "step": 1140
+ },
+ {
+ "epoch": 6.52,
+ "grad_norm": 50.677650451660156,
+ "learning_rate": 4.8311111111111115e-05,
+ "loss": 0.9168,
+ "step": 1141
+ },
+ {
+ "epoch": 6.525714285714286,
+ "grad_norm": 43.122467041015625,
+ "learning_rate": 4.830476190476191e-05,
+ "loss": 0.605,
+ "step": 1142
+ },
+ {
+ "epoch": 6.531428571428571,
+ "grad_norm": 52.74139404296875,
+ "learning_rate": 4.82984126984127e-05,
+ "loss": 0.6699,
+ "step": 1143
+ },
+ {
+ "epoch": 6.537142857142857,
+ "grad_norm": 39.470733642578125,
+ "learning_rate": 4.829206349206349e-05,
+ "loss": 0.5968,
+ "step": 1144
+ },
+ {
+ "epoch": 6.542857142857143,
+ "grad_norm": 23.233463287353516,
+ "learning_rate": 4.828571428571429e-05,
+ "loss": 0.7266,
+ "step": 1145
+ },
+ {
+ "epoch": 6.548571428571429,
+ "grad_norm": 43.671607971191406,
+ "learning_rate": 4.827936507936508e-05,
+ "loss": 0.4698,
+ "step": 1146
+ },
+ {
+ "epoch": 6.554285714285714,
+ "grad_norm": 75.19342041015625,
+ "learning_rate": 4.827301587301588e-05,
+ "loss": 0.892,
+ "step": 1147
+ },
+ {
+ "epoch": 6.5600000000000005,
+ "grad_norm": 21.33753776550293,
+ "learning_rate": 4.826666666666667e-05,
+ "loss": 0.5585,
+ "step": 1148
+ },
+ {
+ "epoch": 6.565714285714286,
+ "grad_norm": 56.81877517700195,
+ "learning_rate": 4.826031746031746e-05,
+ "loss": 0.7324,
+ "step": 1149
+ },
+ {
+ "epoch": 6.571428571428571,
+ "grad_norm": 21.56536102294922,
+ "learning_rate": 4.8253968253968255e-05,
+ "loss": 0.639,
+ "step": 1150
+ },
+ {
+ "epoch": 6.577142857142857,
+ "grad_norm": 59.72079086303711,
+ "learning_rate": 4.8247619047619055e-05,
+ "loss": 0.8404,
+ "step": 1151
+ },
+ {
+ "epoch": 6.582857142857143,
+ "grad_norm": 52.377994537353516,
+ "learning_rate": 4.824126984126984e-05,
+ "loss": 0.7984,
+ "step": 1152
+ },
+ {
+ "epoch": 6.588571428571429,
+ "grad_norm": 62.65300369262695,
+ "learning_rate": 4.823492063492064e-05,
+ "loss": 0.5371,
+ "step": 1153
+ },
+ {
+ "epoch": 6.594285714285714,
+ "grad_norm": 44.0941047668457,
+ "learning_rate": 4.8228571428571426e-05,
+ "loss": 0.5253,
+ "step": 1154
+ },
+ {
+ "epoch": 6.6,
+ "grad_norm": 43.50336456298828,
+ "learning_rate": 4.8222222222222225e-05,
+ "loss": 0.8101,
+ "step": 1155
+ },
+ {
+ "epoch": 6.605714285714286,
+ "grad_norm": 37.503929138183594,
+ "learning_rate": 4.821587301587302e-05,
+ "loss": 0.804,
+ "step": 1156
+ },
+ {
+ "epoch": 6.611428571428571,
+ "grad_norm": 54.677154541015625,
+ "learning_rate": 4.820952380952381e-05,
+ "loss": 0.7564,
+ "step": 1157
+ },
+ {
+ "epoch": 6.617142857142857,
+ "grad_norm": 48.551334381103516,
+ "learning_rate": 4.82031746031746e-05,
+ "loss": 0.7482,
+ "step": 1158
+ },
+ {
+ "epoch": 6.622857142857143,
+ "grad_norm": 46.53056335449219,
+ "learning_rate": 4.81968253968254e-05,
+ "loss": 0.7155,
+ "step": 1159
+ },
+ {
+ "epoch": 6.628571428571428,
+ "grad_norm": 31.326919555664062,
+ "learning_rate": 4.819047619047619e-05,
+ "loss": 0.9375,
+ "step": 1160
+ },
+ {
+ "epoch": 6.634285714285714,
+ "grad_norm": 42.24070358276367,
+ "learning_rate": 4.818412698412699e-05,
+ "loss": 0.7436,
+ "step": 1161
+ },
+ {
+ "epoch": 6.64,
+ "grad_norm": 62.546688079833984,
+ "learning_rate": 4.817777777777778e-05,
+ "loss": 0.6693,
+ "step": 1162
+ },
+ {
+ "epoch": 6.645714285714286,
+ "grad_norm": 52.980587005615234,
+ "learning_rate": 4.817142857142857e-05,
+ "loss": 0.9501,
+ "step": 1163
+ },
+ {
+ "epoch": 6.651428571428571,
+ "grad_norm": 44.53245162963867,
+ "learning_rate": 4.8165079365079366e-05,
+ "loss": 0.6875,
+ "step": 1164
+ },
+ {
+ "epoch": 6.6571428571428575,
+ "grad_norm": 37.25103759765625,
+ "learning_rate": 4.815873015873016e-05,
+ "loss": 0.65,
+ "step": 1165
+ },
+ {
+ "epoch": 6.662857142857143,
+ "grad_norm": 36.22304153442383,
+ "learning_rate": 4.815238095238096e-05,
+ "loss": 0.9507,
+ "step": 1166
+ },
+ {
+ "epoch": 6.668571428571429,
+ "grad_norm": 36.808326721191406,
+ "learning_rate": 4.814603174603175e-05,
+ "loss": 0.4853,
+ "step": 1167
+ },
+ {
+ "epoch": 6.674285714285714,
+ "grad_norm": 37.148014068603516,
+ "learning_rate": 4.813968253968254e-05,
+ "loss": 0.6508,
+ "step": 1168
+ },
+ {
+ "epoch": 6.68,
+ "grad_norm": 37.951263427734375,
+ "learning_rate": 4.8133333333333336e-05,
+ "loss": 0.6891,
+ "step": 1169
+ },
+ {
+ "epoch": 6.685714285714286,
+ "grad_norm": 31.897668838500977,
+ "learning_rate": 4.812698412698413e-05,
+ "loss": 0.8311,
+ "step": 1170
+ },
+ {
+ "epoch": 6.691428571428571,
+ "grad_norm": 50.647037506103516,
+ "learning_rate": 4.812063492063492e-05,
+ "loss": 0.6032,
+ "step": 1171
+ },
+ {
+ "epoch": 6.6971428571428575,
+ "grad_norm": 41.20783996582031,
+ "learning_rate": 4.811428571428572e-05,
+ "loss": 0.5875,
+ "step": 1172
+ },
+ {
+ "epoch": 6.702857142857143,
+ "grad_norm": 33.35331726074219,
+ "learning_rate": 4.810793650793651e-05,
+ "loss": 0.6017,
+ "step": 1173
+ },
+ {
+ "epoch": 6.708571428571428,
+ "grad_norm": 25.426799774169922,
+ "learning_rate": 4.8101587301587305e-05,
+ "loss": 0.7326,
+ "step": 1174
+ },
+ {
+ "epoch": 6.714285714285714,
+ "grad_norm": 40.838165283203125,
+ "learning_rate": 4.80952380952381e-05,
+ "loss": 0.9523,
+ "step": 1175
+ },
+ {
+ "epoch": 6.72,
+ "grad_norm": 152.543701171875,
+ "learning_rate": 4.808888888888889e-05,
+ "loss": 0.7916,
+ "step": 1176
+ },
+ {
+ "epoch": 6.725714285714286,
+ "grad_norm": 51.32667922973633,
+ "learning_rate": 4.8082539682539683e-05,
+ "loss": 1.0948,
+ "step": 1177
+ },
+ {
+ "epoch": 6.731428571428571,
+ "grad_norm": 28.265817642211914,
+ "learning_rate": 4.807619047619048e-05,
+ "loss": 0.5807,
+ "step": 1178
+ },
+ {
+ "epoch": 6.737142857142857,
+ "grad_norm": 116.4324722290039,
+ "learning_rate": 4.806984126984127e-05,
+ "loss": 0.669,
+ "step": 1179
+ },
+ {
+ "epoch": 6.742857142857143,
+ "grad_norm": 34.159423828125,
+ "learning_rate": 4.806349206349207e-05,
+ "loss": 0.8612,
+ "step": 1180
+ },
+ {
+ "epoch": 6.748571428571428,
+ "grad_norm": 39.030296325683594,
+ "learning_rate": 4.805714285714286e-05,
+ "loss": 0.873,
+ "step": 1181
+ },
+ {
+ "epoch": 6.7542857142857144,
+ "grad_norm": 37.374820709228516,
+ "learning_rate": 4.805079365079365e-05,
+ "loss": 0.8782,
+ "step": 1182
+ },
+ {
+ "epoch": 6.76,
+ "grad_norm": 119.02398681640625,
+ "learning_rate": 4.8044444444444446e-05,
+ "loss": 0.7441,
+ "step": 1183
+ },
+ {
+ "epoch": 6.765714285714286,
+ "grad_norm": 50.47015380859375,
+ "learning_rate": 4.8038095238095245e-05,
+ "loss": 0.669,
+ "step": 1184
+ },
+ {
+ "epoch": 6.771428571428571,
+ "grad_norm": 28.232337951660156,
+ "learning_rate": 4.803174603174603e-05,
+ "loss": 0.9575,
+ "step": 1185
+ },
+ {
+ "epoch": 6.777142857142858,
+ "grad_norm": 48.09858322143555,
+ "learning_rate": 4.802539682539683e-05,
+ "loss": 0.6097,
+ "step": 1186
+ },
+ {
+ "epoch": 6.782857142857143,
+ "grad_norm": 75.99871063232422,
+ "learning_rate": 4.8019047619047617e-05,
+ "loss": 0.8402,
+ "step": 1187
+ },
+ {
+ "epoch": 6.788571428571428,
+ "grad_norm": 36.24739074707031,
+ "learning_rate": 4.8012698412698416e-05,
+ "loss": 0.54,
+ "step": 1188
+ },
+ {
+ "epoch": 6.7942857142857145,
+ "grad_norm": 66.97037506103516,
+ "learning_rate": 4.800634920634921e-05,
+ "loss": 0.651,
+ "step": 1189
+ },
+ {
+ "epoch": 6.8,
+ "grad_norm": 110.5335464477539,
+ "learning_rate": 4.8e-05,
+ "loss": 0.5993,
+ "step": 1190
+ },
+ {
+ "epoch": 6.805714285714286,
+ "grad_norm": 24.29607582092285,
+ "learning_rate": 4.7993650793650794e-05,
+ "loss": 0.5497,
+ "step": 1191
+ },
+ {
+ "epoch": 6.811428571428571,
+ "grad_norm": 72.72708892822266,
+ "learning_rate": 4.798730158730159e-05,
+ "loss": 0.4804,
+ "step": 1192
+ },
+ {
+ "epoch": 6.817142857142857,
+ "grad_norm": 39.02521514892578,
+ "learning_rate": 4.798095238095238e-05,
+ "loss": 0.6332,
+ "step": 1193
+ },
+ {
+ "epoch": 6.822857142857143,
+ "grad_norm": 71.21878814697266,
+ "learning_rate": 4.797460317460318e-05,
+ "loss": 0.5008,
+ "step": 1194
+ },
+ {
+ "epoch": 6.828571428571428,
+ "grad_norm": 52.14028549194336,
+ "learning_rate": 4.7968253968253964e-05,
+ "loss": 0.848,
+ "step": 1195
+ },
+ {
+ "epoch": 6.8342857142857145,
+ "grad_norm": 64.03514099121094,
+ "learning_rate": 4.7961904761904764e-05,
+ "loss": 0.726,
+ "step": 1196
+ },
+ {
+ "epoch": 6.84,
+ "grad_norm": 64.52749633789062,
+ "learning_rate": 4.7955555555555556e-05,
+ "loss": 0.5717,
+ "step": 1197
+ },
+ {
+ "epoch": 6.845714285714286,
+ "grad_norm": 28.972579956054688,
+ "learning_rate": 4.794920634920635e-05,
+ "loss": 0.7247,
+ "step": 1198
+ },
+ {
+ "epoch": 6.851428571428571,
+ "grad_norm": 32.29021453857422,
+ "learning_rate": 4.794285714285714e-05,
+ "loss": 0.6866,
+ "step": 1199
+ },
+ {
+ "epoch": 6.857142857142857,
+ "grad_norm": 67.7391128540039,
+ "learning_rate": 4.793650793650794e-05,
+ "loss": 0.6087,
+ "step": 1200
+ },
+ {
+ "epoch": 6.862857142857143,
+ "grad_norm": 42.552764892578125,
+ "learning_rate": 4.7930158730158734e-05,
+ "loss": 0.5617,
+ "step": 1201
+ },
+ {
+ "epoch": 6.868571428571428,
+ "grad_norm": 24.80632209777832,
+ "learning_rate": 4.7923809523809526e-05,
+ "loss": 0.6602,
+ "step": 1202
+ },
+ {
+ "epoch": 6.8742857142857146,
+ "grad_norm": 59.35768508911133,
+ "learning_rate": 4.791746031746032e-05,
+ "loss": 0.7463,
+ "step": 1203
+ },
+ {
+ "epoch": 6.88,
+ "grad_norm": 98.59971618652344,
+ "learning_rate": 4.791111111111111e-05,
+ "loss": 0.4685,
+ "step": 1204
+ },
+ {
+ "epoch": 6.885714285714286,
+ "grad_norm": 67.02965545654297,
+ "learning_rate": 4.790476190476191e-05,
+ "loss": 0.7761,
+ "step": 1205
+ },
+ {
+ "epoch": 6.8914285714285715,
+ "grad_norm": 35.71647262573242,
+ "learning_rate": 4.78984126984127e-05,
+ "loss": 0.5647,
+ "step": 1206
+ },
+ {
+ "epoch": 6.897142857142857,
+ "grad_norm": 31.601408004760742,
+ "learning_rate": 4.7892063492063496e-05,
+ "loss": 0.5825,
+ "step": 1207
+ },
+ {
+ "epoch": 6.902857142857143,
+ "grad_norm": 28.59677505493164,
+ "learning_rate": 4.788571428571429e-05,
+ "loss": 0.5724,
+ "step": 1208
+ },
+ {
+ "epoch": 6.908571428571428,
+ "grad_norm": 43.35946273803711,
+ "learning_rate": 4.787936507936508e-05,
+ "loss": 0.748,
+ "step": 1209
+ },
+ {
+ "epoch": 6.914285714285715,
+ "grad_norm": 92.41816711425781,
+ "learning_rate": 4.7873015873015874e-05,
+ "loss": 0.7642,
+ "step": 1210
+ },
+ {
+ "epoch": 6.92,
+ "grad_norm": 30.898258209228516,
+ "learning_rate": 4.7866666666666674e-05,
+ "loss": 0.6595,
+ "step": 1211
+ },
+ {
+ "epoch": 6.925714285714285,
+ "grad_norm": 48.89678192138672,
+ "learning_rate": 4.786031746031746e-05,
+ "loss": 0.7994,
+ "step": 1212
+ },
+ {
+ "epoch": 6.9314285714285715,
+ "grad_norm": 51.480499267578125,
+ "learning_rate": 4.785396825396826e-05,
+ "loss": 0.6359,
+ "step": 1213
+ },
+ {
+ "epoch": 6.937142857142857,
+ "grad_norm": 53.00624465942383,
+ "learning_rate": 4.784761904761905e-05,
+ "loss": 0.6924,
+ "step": 1214
+ },
+ {
+ "epoch": 6.942857142857143,
+ "grad_norm": 23.87933349609375,
+ "learning_rate": 4.7841269841269844e-05,
+ "loss": 0.8247,
+ "step": 1215
+ },
+ {
+ "epoch": 6.948571428571428,
+ "grad_norm": 41.4123649597168,
+ "learning_rate": 4.783492063492064e-05,
+ "loss": 0.7448,
+ "step": 1216
+ },
+ {
+ "epoch": 6.954285714285715,
+ "grad_norm": 21.50604820251465,
+ "learning_rate": 4.782857142857143e-05,
+ "loss": 0.5331,
+ "step": 1217
+ },
+ {
+ "epoch": 6.96,
+ "grad_norm": 32.19643783569336,
+ "learning_rate": 4.782222222222222e-05,
+ "loss": 0.6036,
+ "step": 1218
+ },
+ {
+ "epoch": 6.965714285714286,
+ "grad_norm": 212.23252868652344,
+ "learning_rate": 4.781587301587302e-05,
+ "loss": 1.1959,
+ "step": 1219
+ },
+ {
+ "epoch": 6.9714285714285715,
+ "grad_norm": 52.73761749267578,
+ "learning_rate": 4.780952380952381e-05,
+ "loss": 0.7818,
+ "step": 1220
+ },
+ {
+ "epoch": 6.977142857142857,
+ "grad_norm": 43.433170318603516,
+ "learning_rate": 4.780317460317461e-05,
+ "loss": 0.9563,
+ "step": 1221
+ },
+ {
+ "epoch": 6.982857142857143,
+ "grad_norm": 70.16417694091797,
+ "learning_rate": 4.77968253968254e-05,
+ "loss": 0.9158,
+ "step": 1222
+ },
+ {
+ "epoch": 6.988571428571428,
+ "grad_norm": 49.823883056640625,
+ "learning_rate": 4.779047619047619e-05,
+ "loss": 0.8667,
+ "step": 1223
+ },
+ {
+ "epoch": 6.994285714285715,
+ "grad_norm": 53.50132369995117,
+ "learning_rate": 4.7784126984126985e-05,
+ "loss": 0.7879,
+ "step": 1224
+ },
+ {
+ "epoch": 7.0,
+ "grad_norm": 37.431339263916016,
+ "learning_rate": 4.7777777777777784e-05,
+ "loss": 0.6104,
+ "step": 1225
+ },
+ {
+ "epoch": 7.0,
+ "eval_classes": 0,
+ "eval_loss": 0.7867908477783203,
+ "eval_map": 0.8772,
+ "eval_map_50": 0.9382,
+ "eval_map_75": 0.9197,
+ "eval_map_large": 0.8773,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.8772,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7635,
+ "eval_mar_10": 0.9635,
+ "eval_mar_100": 0.9724,
+ "eval_mar_100_per_class": 0.9724,
+ "eval_mar_large": 0.9724,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 18.3837,
+ "eval_samples_per_second": 15.992,
+ "eval_steps_per_second": 2.013,
+ "step": 1225
+ },
+ {
+ "epoch": 7.005714285714285,
+ "grad_norm": 57.21017837524414,
+ "learning_rate": 4.777142857142857e-05,
+ "loss": 0.8237,
+ "step": 1226
+ },
+ {
+ "epoch": 7.011428571428572,
+ "grad_norm": 33.716190338134766,
+ "learning_rate": 4.776507936507937e-05,
+ "loss": 0.6962,
+ "step": 1227
+ },
+ {
+ "epoch": 7.017142857142857,
+ "grad_norm": 32.71946716308594,
+ "learning_rate": 4.7758730158730155e-05,
+ "loss": 0.5775,
+ "step": 1228
+ },
+ {
+ "epoch": 7.022857142857143,
+ "grad_norm": 19.849760055541992,
+ "learning_rate": 4.7752380952380955e-05,
+ "loss": 0.7152,
+ "step": 1229
+ },
+ {
+ "epoch": 7.0285714285714285,
+ "grad_norm": 61.321693420410156,
+ "learning_rate": 4.774603174603175e-05,
+ "loss": 0.505,
+ "step": 1230
+ },
+ {
+ "epoch": 7.034285714285715,
+ "grad_norm": 26.332571029663086,
+ "learning_rate": 4.773968253968254e-05,
+ "loss": 0.7814,
+ "step": 1231
+ },
+ {
+ "epoch": 7.04,
+ "grad_norm": 30.386167526245117,
+ "learning_rate": 4.773333333333333e-05,
+ "loss": 0.7395,
+ "step": 1232
+ },
+ {
+ "epoch": 7.045714285714285,
+ "grad_norm": 56.83769989013672,
+ "learning_rate": 4.772698412698413e-05,
+ "loss": 0.683,
+ "step": 1233
+ },
+ {
+ "epoch": 7.051428571428572,
+ "grad_norm": 24.878450393676758,
+ "learning_rate": 4.7720634920634925e-05,
+ "loss": 0.9383,
+ "step": 1234
+ },
+ {
+ "epoch": 7.057142857142857,
+ "grad_norm": 23.832763671875,
+ "learning_rate": 4.771428571428572e-05,
+ "loss": 0.6939,
+ "step": 1235
+ },
+ {
+ "epoch": 7.062857142857143,
+ "grad_norm": 35.99517822265625,
+ "learning_rate": 4.770793650793651e-05,
+ "loss": 0.6861,
+ "step": 1236
+ },
+ {
+ "epoch": 7.0685714285714285,
+ "grad_norm": 40.71198272705078,
+ "learning_rate": 4.77015873015873e-05,
+ "loss": 0.5809,
+ "step": 1237
+ },
+ {
+ "epoch": 7.074285714285715,
+ "grad_norm": 36.933433532714844,
+ "learning_rate": 4.7695238095238095e-05,
+ "loss": 0.466,
+ "step": 1238
+ },
+ {
+ "epoch": 7.08,
+ "grad_norm": 52.858970642089844,
+ "learning_rate": 4.768888888888889e-05,
+ "loss": 0.5116,
+ "step": 1239
+ },
+ {
+ "epoch": 7.085714285714285,
+ "grad_norm": 61.59458923339844,
+ "learning_rate": 4.768253968253969e-05,
+ "loss": 0.8465,
+ "step": 1240
+ },
+ {
+ "epoch": 7.091428571428572,
+ "grad_norm": 50.026336669921875,
+ "learning_rate": 4.767619047619048e-05,
+ "loss": 0.585,
+ "step": 1241
+ },
+ {
+ "epoch": 7.097142857142857,
+ "grad_norm": 98.24055480957031,
+ "learning_rate": 4.766984126984127e-05,
+ "loss": 0.5558,
+ "step": 1242
+ },
+ {
+ "epoch": 7.102857142857143,
+ "grad_norm": 52.92375564575195,
+ "learning_rate": 4.7663492063492065e-05,
+ "loss": 0.6798,
+ "step": 1243
+ },
+ {
+ "epoch": 7.1085714285714285,
+ "grad_norm": 144.95716857910156,
+ "learning_rate": 4.7657142857142865e-05,
+ "loss": 0.9123,
+ "step": 1244
+ },
+ {
+ "epoch": 7.114285714285714,
+ "grad_norm": 53.37465286254883,
+ "learning_rate": 4.765079365079365e-05,
+ "loss": 0.6079,
+ "step": 1245
+ },
+ {
+ "epoch": 7.12,
+ "grad_norm": 58.13810348510742,
+ "learning_rate": 4.764444444444445e-05,
+ "loss": 0.5448,
+ "step": 1246
+ },
+ {
+ "epoch": 7.1257142857142854,
+ "grad_norm": 51.799503326416016,
+ "learning_rate": 4.763809523809524e-05,
+ "loss": 0.4733,
+ "step": 1247
+ },
+ {
+ "epoch": 7.131428571428572,
+ "grad_norm": 41.86768341064453,
+ "learning_rate": 4.7631746031746035e-05,
+ "loss": 0.5872,
+ "step": 1248
+ },
+ {
+ "epoch": 7.137142857142857,
+ "grad_norm": 75.33160400390625,
+ "learning_rate": 4.762539682539683e-05,
+ "loss": 0.5285,
+ "step": 1249
+ },
+ {
+ "epoch": 7.142857142857143,
+ "grad_norm": 37.584312438964844,
+ "learning_rate": 4.761904761904762e-05,
+ "loss": 0.5908,
+ "step": 1250
+ },
+ {
+ "epoch": 7.148571428571429,
+ "grad_norm": 27.99728012084961,
+ "learning_rate": 4.761269841269841e-05,
+ "loss": 0.7489,
+ "step": 1251
+ },
+ {
+ "epoch": 7.154285714285714,
+ "grad_norm": 215.4183349609375,
+ "learning_rate": 4.760634920634921e-05,
+ "loss": 0.7163,
+ "step": 1252
+ },
+ {
+ "epoch": 7.16,
+ "grad_norm": 41.62477493286133,
+ "learning_rate": 4.76e-05,
+ "loss": 0.9882,
+ "step": 1253
+ },
+ {
+ "epoch": 7.1657142857142855,
+ "grad_norm": 28.22425651550293,
+ "learning_rate": 4.75936507936508e-05,
+ "loss": 0.847,
+ "step": 1254
+ },
+ {
+ "epoch": 7.171428571428572,
+ "grad_norm": 53.103084564208984,
+ "learning_rate": 4.758730158730159e-05,
+ "loss": 0.5099,
+ "step": 1255
+ },
+ {
+ "epoch": 7.177142857142857,
+ "grad_norm": 49.915714263916016,
+ "learning_rate": 4.758095238095238e-05,
+ "loss": 0.7455,
+ "step": 1256
+ },
+ {
+ "epoch": 7.182857142857143,
+ "grad_norm": 77.69715118408203,
+ "learning_rate": 4.7574603174603176e-05,
+ "loss": 0.7597,
+ "step": 1257
+ },
+ {
+ "epoch": 7.188571428571429,
+ "grad_norm": 50.848731994628906,
+ "learning_rate": 4.7568253968253975e-05,
+ "loss": 0.6128,
+ "step": 1258
+ },
+ {
+ "epoch": 7.194285714285714,
+ "grad_norm": 24.732351303100586,
+ "learning_rate": 4.756190476190476e-05,
+ "loss": 0.7537,
+ "step": 1259
+ },
+ {
+ "epoch": 7.2,
+ "grad_norm": 21.70195198059082,
+ "learning_rate": 4.755555555555556e-05,
+ "loss": 0.4549,
+ "step": 1260
+ },
+ {
+ "epoch": 7.2057142857142855,
+ "grad_norm": 25.89827537536621,
+ "learning_rate": 4.7549206349206346e-05,
+ "loss": 0.4765,
+ "step": 1261
+ },
+ {
+ "epoch": 7.211428571428572,
+ "grad_norm": 33.5347900390625,
+ "learning_rate": 4.7542857142857146e-05,
+ "loss": 0.772,
+ "step": 1262
+ },
+ {
+ "epoch": 7.217142857142857,
+ "grad_norm": 65.69261169433594,
+ "learning_rate": 4.753650793650794e-05,
+ "loss": 0.7734,
+ "step": 1263
+ },
+ {
+ "epoch": 7.222857142857142,
+ "grad_norm": 30.24980926513672,
+ "learning_rate": 4.753015873015873e-05,
+ "loss": 0.7583,
+ "step": 1264
+ },
+ {
+ "epoch": 7.228571428571429,
+ "grad_norm": 98.29232788085938,
+ "learning_rate": 4.7523809523809523e-05,
+ "loss": 0.7562,
+ "step": 1265
+ },
+ {
+ "epoch": 7.234285714285714,
+ "grad_norm": 179.77772521972656,
+ "learning_rate": 4.751746031746032e-05,
+ "loss": 0.8792,
+ "step": 1266
+ },
+ {
+ "epoch": 7.24,
+ "grad_norm": 29.68090057373047,
+ "learning_rate": 4.751111111111111e-05,
+ "loss": 0.6853,
+ "step": 1267
+ },
+ {
+ "epoch": 7.2457142857142856,
+ "grad_norm": 72.9825668334961,
+ "learning_rate": 4.750476190476191e-05,
+ "loss": 0.8319,
+ "step": 1268
+ },
+ {
+ "epoch": 7.251428571428572,
+ "grad_norm": 28.135278701782227,
+ "learning_rate": 4.74984126984127e-05,
+ "loss": 0.7565,
+ "step": 1269
+ },
+ {
+ "epoch": 7.257142857142857,
+ "grad_norm": 40.16902542114258,
+ "learning_rate": 4.7492063492063493e-05,
+ "loss": 0.5983,
+ "step": 1270
+ },
+ {
+ "epoch": 7.2628571428571425,
+ "grad_norm": 32.82265090942383,
+ "learning_rate": 4.7485714285714286e-05,
+ "loss": 0.5208,
+ "step": 1271
+ },
+ {
+ "epoch": 7.268571428571429,
+ "grad_norm": 36.688411712646484,
+ "learning_rate": 4.747936507936508e-05,
+ "loss": 0.6193,
+ "step": 1272
+ },
+ {
+ "epoch": 7.274285714285714,
+ "grad_norm": 63.6164665222168,
+ "learning_rate": 4.747301587301588e-05,
+ "loss": 0.6022,
+ "step": 1273
+ },
+ {
+ "epoch": 7.28,
+ "grad_norm": 29.837209701538086,
+ "learning_rate": 4.746666666666667e-05,
+ "loss": 0.5733,
+ "step": 1274
+ },
+ {
+ "epoch": 7.285714285714286,
+ "grad_norm": 45.36772155761719,
+ "learning_rate": 4.746031746031746e-05,
+ "loss": 0.5461,
+ "step": 1275
+ },
+ {
+ "epoch": 7.291428571428572,
+ "grad_norm": 23.052791595458984,
+ "learning_rate": 4.7453968253968256e-05,
+ "loss": 0.5568,
+ "step": 1276
+ },
+ {
+ "epoch": 7.297142857142857,
+ "grad_norm": 70.93611145019531,
+ "learning_rate": 4.744761904761905e-05,
+ "loss": 0.6384,
+ "step": 1277
+ },
+ {
+ "epoch": 7.3028571428571425,
+ "grad_norm": 28.31834602355957,
+ "learning_rate": 4.744126984126984e-05,
+ "loss": 0.6331,
+ "step": 1278
+ },
+ {
+ "epoch": 7.308571428571429,
+ "grad_norm": 60.236297607421875,
+ "learning_rate": 4.743492063492064e-05,
+ "loss": 0.6148,
+ "step": 1279
+ },
+ {
+ "epoch": 7.314285714285714,
+ "grad_norm": 371.16424560546875,
+ "learning_rate": 4.742857142857143e-05,
+ "loss": 0.4808,
+ "step": 1280
+ },
+ {
+ "epoch": 7.32,
+ "grad_norm": 75.56861877441406,
+ "learning_rate": 4.7422222222222226e-05,
+ "loss": 0.5723,
+ "step": 1281
+ },
+ {
+ "epoch": 7.325714285714286,
+ "grad_norm": 92.06929016113281,
+ "learning_rate": 4.741587301587302e-05,
+ "loss": 0.714,
+ "step": 1282
+ },
+ {
+ "epoch": 7.331428571428571,
+ "grad_norm": 58.06058120727539,
+ "learning_rate": 4.740952380952381e-05,
+ "loss": 0.6472,
+ "step": 1283
+ },
+ {
+ "epoch": 7.337142857142857,
+ "grad_norm": 49.69181823730469,
+ "learning_rate": 4.7403174603174604e-05,
+ "loss": 0.6676,
+ "step": 1284
+ },
+ {
+ "epoch": 7.3428571428571425,
+ "grad_norm": 43.00157928466797,
+ "learning_rate": 4.73968253968254e-05,
+ "loss": 0.7564,
+ "step": 1285
+ },
+ {
+ "epoch": 7.348571428571429,
+ "grad_norm": 22.465194702148438,
+ "learning_rate": 4.739047619047619e-05,
+ "loss": 0.7006,
+ "step": 1286
+ },
+ {
+ "epoch": 7.354285714285714,
+ "grad_norm": 48.402339935302734,
+ "learning_rate": 4.738412698412699e-05,
+ "loss": 0.5376,
+ "step": 1287
+ },
+ {
+ "epoch": 7.36,
+ "grad_norm": 26.851852416992188,
+ "learning_rate": 4.737777777777778e-05,
+ "loss": 0.7149,
+ "step": 1288
+ },
+ {
+ "epoch": 7.365714285714286,
+ "grad_norm": 102.73119354248047,
+ "learning_rate": 4.7371428571428574e-05,
+ "loss": 0.6729,
+ "step": 1289
+ },
+ {
+ "epoch": 7.371428571428572,
+ "grad_norm": 40.6550178527832,
+ "learning_rate": 4.7365079365079366e-05,
+ "loss": 0.6184,
+ "step": 1290
+ },
+ {
+ "epoch": 7.377142857142857,
+ "grad_norm": 42.261741638183594,
+ "learning_rate": 4.7358730158730166e-05,
+ "loss": 0.5753,
+ "step": 1291
+ },
+ {
+ "epoch": 7.382857142857143,
+ "grad_norm": 50.98457336425781,
+ "learning_rate": 4.735238095238095e-05,
+ "loss": 0.5469,
+ "step": 1292
+ },
+ {
+ "epoch": 7.388571428571429,
+ "grad_norm": 45.39360427856445,
+ "learning_rate": 4.734603174603175e-05,
+ "loss": 0.4814,
+ "step": 1293
+ },
+ {
+ "epoch": 7.394285714285714,
+ "grad_norm": 51.51216125488281,
+ "learning_rate": 4.733968253968254e-05,
+ "loss": 0.7583,
+ "step": 1294
+ },
+ {
+ "epoch": 7.4,
+ "grad_norm": 22.863304138183594,
+ "learning_rate": 4.7333333333333336e-05,
+ "loss": 0.6165,
+ "step": 1295
+ },
+ {
+ "epoch": 7.405714285714286,
+ "grad_norm": 17.304244995117188,
+ "learning_rate": 4.732698412698413e-05,
+ "loss": 0.6645,
+ "step": 1296
+ },
+ {
+ "epoch": 7.411428571428571,
+ "grad_norm": 67.12471771240234,
+ "learning_rate": 4.732063492063492e-05,
+ "loss": 0.7557,
+ "step": 1297
+ },
+ {
+ "epoch": 7.417142857142857,
+ "grad_norm": 35.99345397949219,
+ "learning_rate": 4.7314285714285714e-05,
+ "loss": 0.6616,
+ "step": 1298
+ },
+ {
+ "epoch": 7.422857142857143,
+ "grad_norm": 34.61998748779297,
+ "learning_rate": 4.7307936507936514e-05,
+ "loss": 0.5287,
+ "step": 1299
+ },
+ {
+ "epoch": 7.428571428571429,
+ "grad_norm": 31.36380386352539,
+ "learning_rate": 4.73015873015873e-05,
+ "loss": 0.5124,
+ "step": 1300
+ },
+ {
+ "epoch": 7.434285714285714,
+ "grad_norm": 52.2266845703125,
+ "learning_rate": 4.72952380952381e-05,
+ "loss": 0.7854,
+ "step": 1301
+ },
+ {
+ "epoch": 7.44,
+ "grad_norm": 30.61333465576172,
+ "learning_rate": 4.728888888888889e-05,
+ "loss": 0.6238,
+ "step": 1302
+ },
+ {
+ "epoch": 7.445714285714286,
+ "grad_norm": 48.91444778442383,
+ "learning_rate": 4.7282539682539684e-05,
+ "loss": 0.567,
+ "step": 1303
+ },
+ {
+ "epoch": 7.451428571428571,
+ "grad_norm": 18.876323699951172,
+ "learning_rate": 4.727619047619048e-05,
+ "loss": 0.7001,
+ "step": 1304
+ },
+ {
+ "epoch": 7.457142857142857,
+ "grad_norm": 75.31610107421875,
+ "learning_rate": 4.726984126984127e-05,
+ "loss": 0.6682,
+ "step": 1305
+ },
+ {
+ "epoch": 7.462857142857143,
+ "grad_norm": 24.598119735717773,
+ "learning_rate": 4.726349206349206e-05,
+ "loss": 0.7838,
+ "step": 1306
+ },
+ {
+ "epoch": 7.468571428571429,
+ "grad_norm": 56.07027816772461,
+ "learning_rate": 4.725714285714286e-05,
+ "loss": 0.6504,
+ "step": 1307
+ },
+ {
+ "epoch": 7.474285714285714,
+ "grad_norm": 41.39461135864258,
+ "learning_rate": 4.7250793650793654e-05,
+ "loss": 0.6751,
+ "step": 1308
+ },
+ {
+ "epoch": 7.48,
+ "grad_norm": 18.566652297973633,
+ "learning_rate": 4.724444444444445e-05,
+ "loss": 0.4242,
+ "step": 1309
+ },
+ {
+ "epoch": 7.485714285714286,
+ "grad_norm": 25.184492111206055,
+ "learning_rate": 4.723809523809524e-05,
+ "loss": 0.7987,
+ "step": 1310
+ },
+ {
+ "epoch": 7.491428571428571,
+ "grad_norm": 42.953147888183594,
+ "learning_rate": 4.723174603174603e-05,
+ "loss": 0.8759,
+ "step": 1311
+ },
+ {
+ "epoch": 7.497142857142857,
+ "grad_norm": 45.01292037963867,
+ "learning_rate": 4.722539682539683e-05,
+ "loss": 0.5476,
+ "step": 1312
+ },
+ {
+ "epoch": 7.502857142857143,
+ "grad_norm": 77.92054748535156,
+ "learning_rate": 4.7219047619047624e-05,
+ "loss": 0.6594,
+ "step": 1313
+ },
+ {
+ "epoch": 7.508571428571429,
+ "grad_norm": 72.87895202636719,
+ "learning_rate": 4.721269841269842e-05,
+ "loss": 0.8035,
+ "step": 1314
+ },
+ {
+ "epoch": 7.514285714285714,
+ "grad_norm": 74.46797943115234,
+ "learning_rate": 4.720634920634921e-05,
+ "loss": 1.1645,
+ "step": 1315
+ },
+ {
+ "epoch": 7.52,
+ "grad_norm": 26.602317810058594,
+ "learning_rate": 4.72e-05,
+ "loss": 0.6856,
+ "step": 1316
+ },
+ {
+ "epoch": 7.525714285714286,
+ "grad_norm": 25.693401336669922,
+ "learning_rate": 4.7193650793650795e-05,
+ "loss": 0.6172,
+ "step": 1317
+ },
+ {
+ "epoch": 7.531428571428571,
+ "grad_norm": 31.481958389282227,
+ "learning_rate": 4.7187301587301594e-05,
+ "loss": 0.7717,
+ "step": 1318
+ },
+ {
+ "epoch": 7.537142857142857,
+ "grad_norm": 83.2188491821289,
+ "learning_rate": 4.718095238095238e-05,
+ "loss": 0.7369,
+ "step": 1319
+ },
+ {
+ "epoch": 7.542857142857143,
+ "grad_norm": 366.4823303222656,
+ "learning_rate": 4.717460317460318e-05,
+ "loss": 0.7258,
+ "step": 1320
+ },
+ {
+ "epoch": 7.548571428571429,
+ "grad_norm": 86.60832977294922,
+ "learning_rate": 4.716825396825397e-05,
+ "loss": 0.5646,
+ "step": 1321
+ },
+ {
+ "epoch": 7.554285714285714,
+ "grad_norm": 51.94847106933594,
+ "learning_rate": 4.7161904761904765e-05,
+ "loss": 0.5762,
+ "step": 1322
+ },
+ {
+ "epoch": 7.5600000000000005,
+ "grad_norm": 50.4522590637207,
+ "learning_rate": 4.715555555555556e-05,
+ "loss": 0.5024,
+ "step": 1323
+ },
+ {
+ "epoch": 7.565714285714286,
+ "grad_norm": 30.071168899536133,
+ "learning_rate": 4.714920634920636e-05,
+ "loss": 0.6713,
+ "step": 1324
+ },
+ {
+ "epoch": 7.571428571428571,
+ "grad_norm": 33.680992126464844,
+ "learning_rate": 4.714285714285714e-05,
+ "loss": 0.7059,
+ "step": 1325
+ },
+ {
+ "epoch": 7.577142857142857,
+ "grad_norm": 34.198062896728516,
+ "learning_rate": 4.713650793650794e-05,
+ "loss": 0.7452,
+ "step": 1326
+ },
+ {
+ "epoch": 7.582857142857143,
+ "grad_norm": 60.24821090698242,
+ "learning_rate": 4.713015873015873e-05,
+ "loss": 0.5497,
+ "step": 1327
+ },
+ {
+ "epoch": 7.588571428571429,
+ "grad_norm": 35.51438903808594,
+ "learning_rate": 4.712380952380953e-05,
+ "loss": 0.6105,
+ "step": 1328
+ },
+ {
+ "epoch": 7.594285714285714,
+ "grad_norm": 73.2537612915039,
+ "learning_rate": 4.711746031746032e-05,
+ "loss": 0.5574,
+ "step": 1329
+ },
+ {
+ "epoch": 7.6,
+ "grad_norm": 30.033363342285156,
+ "learning_rate": 4.711111111111111e-05,
+ "loss": 0.5753,
+ "step": 1330
+ },
+ {
+ "epoch": 7.605714285714286,
+ "grad_norm": 73.85240936279297,
+ "learning_rate": 4.7104761904761905e-05,
+ "loss": 0.7066,
+ "step": 1331
+ },
+ {
+ "epoch": 7.611428571428571,
+ "grad_norm": 81.2567367553711,
+ "learning_rate": 4.7098412698412705e-05,
+ "loss": 0.817,
+ "step": 1332
+ },
+ {
+ "epoch": 7.617142857142857,
+ "grad_norm": 31.7690486907959,
+ "learning_rate": 4.709206349206349e-05,
+ "loss": 0.4587,
+ "step": 1333
+ },
+ {
+ "epoch": 7.622857142857143,
+ "grad_norm": 23.420303344726562,
+ "learning_rate": 4.708571428571429e-05,
+ "loss": 0.6593,
+ "step": 1334
+ },
+ {
+ "epoch": 7.628571428571428,
+ "grad_norm": 38.06245803833008,
+ "learning_rate": 4.707936507936508e-05,
+ "loss": 0.7267,
+ "step": 1335
+ },
+ {
+ "epoch": 7.634285714285714,
+ "grad_norm": 58.60105895996094,
+ "learning_rate": 4.7073015873015875e-05,
+ "loss": 0.5329,
+ "step": 1336
+ },
+ {
+ "epoch": 7.64,
+ "grad_norm": 46.24578094482422,
+ "learning_rate": 4.706666666666667e-05,
+ "loss": 0.885,
+ "step": 1337
+ },
+ {
+ "epoch": 7.645714285714286,
+ "grad_norm": 45.6176643371582,
+ "learning_rate": 4.706031746031746e-05,
+ "loss": 0.5294,
+ "step": 1338
+ },
+ {
+ "epoch": 7.651428571428571,
+ "grad_norm": 78.04761505126953,
+ "learning_rate": 4.705396825396825e-05,
+ "loss": 0.6382,
+ "step": 1339
+ },
+ {
+ "epoch": 7.6571428571428575,
+ "grad_norm": 321.0185241699219,
+ "learning_rate": 4.704761904761905e-05,
+ "loss": 0.7118,
+ "step": 1340
+ },
+ {
+ "epoch": 7.662857142857143,
+ "grad_norm": 45.66987991333008,
+ "learning_rate": 4.704126984126984e-05,
+ "loss": 0.9693,
+ "step": 1341
+ },
+ {
+ "epoch": 7.668571428571429,
+ "grad_norm": 47.87458038330078,
+ "learning_rate": 4.703492063492064e-05,
+ "loss": 0.6789,
+ "step": 1342
+ },
+ {
+ "epoch": 7.674285714285714,
+ "grad_norm": 32.397735595703125,
+ "learning_rate": 4.702857142857143e-05,
+ "loss": 0.543,
+ "step": 1343
+ },
+ {
+ "epoch": 7.68,
+ "grad_norm": 54.12117385864258,
+ "learning_rate": 4.702222222222222e-05,
+ "loss": 0.5905,
+ "step": 1344
+ },
+ {
+ "epoch": 7.685714285714286,
+ "grad_norm": 60.069007873535156,
+ "learning_rate": 4.7015873015873016e-05,
+ "loss": 0.8836,
+ "step": 1345
+ },
+ {
+ "epoch": 7.691428571428571,
+ "grad_norm": 67.7389144897461,
+ "learning_rate": 4.7009523809523815e-05,
+ "loss": 0.8242,
+ "step": 1346
+ },
+ {
+ "epoch": 7.6971428571428575,
+ "grad_norm": 24.725425720214844,
+ "learning_rate": 4.700317460317461e-05,
+ "loss": 0.6177,
+ "step": 1347
+ },
+ {
+ "epoch": 7.702857142857143,
+ "grad_norm": 34.804195404052734,
+ "learning_rate": 4.69968253968254e-05,
+ "loss": 0.4738,
+ "step": 1348
+ },
+ {
+ "epoch": 7.708571428571428,
+ "grad_norm": 125.37673950195312,
+ "learning_rate": 4.699047619047619e-05,
+ "loss": 0.6758,
+ "step": 1349
+ },
+ {
+ "epoch": 7.714285714285714,
+ "grad_norm": 50.93052673339844,
+ "learning_rate": 4.6984126984126986e-05,
+ "loss": 0.5257,
+ "step": 1350
+ },
+ {
+ "epoch": 7.72,
+ "grad_norm": 55.35873031616211,
+ "learning_rate": 4.6977777777777785e-05,
+ "loss": 0.6723,
+ "step": 1351
+ },
+ {
+ "epoch": 7.725714285714286,
+ "grad_norm": 27.543476104736328,
+ "learning_rate": 4.697142857142857e-05,
+ "loss": 0.6399,
+ "step": 1352
+ },
+ {
+ "epoch": 7.731428571428571,
+ "grad_norm": 114.15330505371094,
+ "learning_rate": 4.696507936507937e-05,
+ "loss": 0.6198,
+ "step": 1353
+ },
+ {
+ "epoch": 7.737142857142857,
+ "grad_norm": 51.38933181762695,
+ "learning_rate": 4.695873015873016e-05,
+ "loss": 0.5771,
+ "step": 1354
+ },
+ {
+ "epoch": 7.742857142857143,
+ "grad_norm": 40.537330627441406,
+ "learning_rate": 4.6952380952380956e-05,
+ "loss": 0.5921,
+ "step": 1355
+ },
+ {
+ "epoch": 7.748571428571428,
+ "grad_norm": 63.388702392578125,
+ "learning_rate": 4.694603174603175e-05,
+ "loss": 0.4777,
+ "step": 1356
+ },
+ {
+ "epoch": 7.7542857142857144,
+ "grad_norm": 39.542999267578125,
+ "learning_rate": 4.693968253968255e-05,
+ "loss": 0.6808,
+ "step": 1357
+ },
+ {
+ "epoch": 7.76,
+ "grad_norm": 35.982200622558594,
+ "learning_rate": 4.6933333333333333e-05,
+ "loss": 0.7238,
+ "step": 1358
+ },
+ {
+ "epoch": 7.765714285714286,
+ "grad_norm": 49.393272399902344,
+ "learning_rate": 4.692698412698413e-05,
+ "loss": 0.598,
+ "step": 1359
+ },
+ {
+ "epoch": 7.771428571428571,
+ "grad_norm": 34.336341857910156,
+ "learning_rate": 4.692063492063492e-05,
+ "loss": 0.9183,
+ "step": 1360
+ },
+ {
+ "epoch": 7.777142857142858,
+ "grad_norm": 35.617088317871094,
+ "learning_rate": 4.691428571428572e-05,
+ "loss": 0.7478,
+ "step": 1361
+ },
+ {
+ "epoch": 7.782857142857143,
+ "grad_norm": 61.48871612548828,
+ "learning_rate": 4.690793650793651e-05,
+ "loss": 0.636,
+ "step": 1362
+ },
+ {
+ "epoch": 7.788571428571428,
+ "grad_norm": 27.061933517456055,
+ "learning_rate": 4.6901587301587303e-05,
+ "loss": 0.3823,
+ "step": 1363
+ },
+ {
+ "epoch": 7.7942857142857145,
+ "grad_norm": 27.507038116455078,
+ "learning_rate": 4.6895238095238096e-05,
+ "loss": 0.7432,
+ "step": 1364
+ },
+ {
+ "epoch": 7.8,
+ "grad_norm": 24.477672576904297,
+ "learning_rate": 4.6888888888888895e-05,
+ "loss": 0.5468,
+ "step": 1365
+ },
+ {
+ "epoch": 7.805714285714286,
+ "grad_norm": 40.26399230957031,
+ "learning_rate": 4.688253968253968e-05,
+ "loss": 0.733,
+ "step": 1366
+ },
+ {
+ "epoch": 7.811428571428571,
+ "grad_norm": 103.63063049316406,
+ "learning_rate": 4.687619047619048e-05,
+ "loss": 0.517,
+ "step": 1367
+ },
+ {
+ "epoch": 7.817142857142857,
+ "grad_norm": 28.04522132873535,
+ "learning_rate": 4.686984126984127e-05,
+ "loss": 0.5636,
+ "step": 1368
+ },
+ {
+ "epoch": 7.822857142857143,
+ "grad_norm": 41.827449798583984,
+ "learning_rate": 4.6863492063492066e-05,
+ "loss": 0.4993,
+ "step": 1369
+ },
+ {
+ "epoch": 7.828571428571428,
+ "grad_norm": 32.13361740112305,
+ "learning_rate": 4.685714285714286e-05,
+ "loss": 0.5968,
+ "step": 1370
+ },
+ {
+ "epoch": 7.8342857142857145,
+ "grad_norm": 60.024898529052734,
+ "learning_rate": 4.685079365079365e-05,
+ "loss": 0.874,
+ "step": 1371
+ },
+ {
+ "epoch": 7.84,
+ "grad_norm": 42.77864456176758,
+ "learning_rate": 4.6844444444444444e-05,
+ "loss": 0.4321,
+ "step": 1372
+ },
+ {
+ "epoch": 7.845714285714286,
+ "grad_norm": 73.90796661376953,
+ "learning_rate": 4.683809523809524e-05,
+ "loss": 0.8918,
+ "step": 1373
+ },
+ {
+ "epoch": 7.851428571428571,
+ "grad_norm": 82.85469055175781,
+ "learning_rate": 4.683174603174603e-05,
+ "loss": 0.3985,
+ "step": 1374
+ },
+ {
+ "epoch": 7.857142857142857,
+ "grad_norm": 41.67444610595703,
+ "learning_rate": 4.682539682539683e-05,
+ "loss": 0.4506,
+ "step": 1375
+ },
+ {
+ "epoch": 7.862857142857143,
+ "grad_norm": 74.92281341552734,
+ "learning_rate": 4.681904761904762e-05,
+ "loss": 0.5536,
+ "step": 1376
+ },
+ {
+ "epoch": 7.868571428571428,
+ "grad_norm": 55.40838623046875,
+ "learning_rate": 4.6812698412698414e-05,
+ "loss": 0.6726,
+ "step": 1377
+ },
+ {
+ "epoch": 7.8742857142857146,
+ "grad_norm": 25.91588592529297,
+ "learning_rate": 4.6806349206349207e-05,
+ "loss": 0.5684,
+ "step": 1378
+ },
+ {
+ "epoch": 7.88,
+ "grad_norm": 60.40071105957031,
+ "learning_rate": 4.6800000000000006e-05,
+ "loss": 0.947,
+ "step": 1379
+ },
+ {
+ "epoch": 7.885714285714286,
+ "grad_norm": 50.07804489135742,
+ "learning_rate": 4.679365079365079e-05,
+ "loss": 0.5797,
+ "step": 1380
+ },
+ {
+ "epoch": 7.8914285714285715,
+ "grad_norm": 29.26346778869629,
+ "learning_rate": 4.678730158730159e-05,
+ "loss": 0.5418,
+ "step": 1381
+ },
+ {
+ "epoch": 7.897142857142857,
+ "grad_norm": 122.21076965332031,
+ "learning_rate": 4.6780952380952384e-05,
+ "loss": 0.5719,
+ "step": 1382
+ },
+ {
+ "epoch": 7.902857142857143,
+ "grad_norm": 175.2599639892578,
+ "learning_rate": 4.6774603174603176e-05,
+ "loss": 0.8481,
+ "step": 1383
+ },
+ {
+ "epoch": 7.908571428571428,
+ "grad_norm": 115.75862884521484,
+ "learning_rate": 4.676825396825397e-05,
+ "loss": 0.5336,
+ "step": 1384
+ },
+ {
+ "epoch": 7.914285714285715,
+ "grad_norm": 32.81181335449219,
+ "learning_rate": 4.676190476190476e-05,
+ "loss": 0.4968,
+ "step": 1385
+ },
+ {
+ "epoch": 7.92,
+ "grad_norm": 52.347774505615234,
+ "learning_rate": 4.675555555555556e-05,
+ "loss": 0.4297,
+ "step": 1386
+ },
+ {
+ "epoch": 7.925714285714285,
+ "grad_norm": 63.84890365600586,
+ "learning_rate": 4.6749206349206354e-05,
+ "loss": 0.4286,
+ "step": 1387
+ },
+ {
+ "epoch": 7.9314285714285715,
+ "grad_norm": 59.2192497253418,
+ "learning_rate": 4.6742857142857146e-05,
+ "loss": 0.5731,
+ "step": 1388
+ },
+ {
+ "epoch": 7.937142857142857,
+ "grad_norm": 75.16362762451172,
+ "learning_rate": 4.673650793650794e-05,
+ "loss": 0.6157,
+ "step": 1389
+ },
+ {
+ "epoch": 7.942857142857143,
+ "grad_norm": 19.244441986083984,
+ "learning_rate": 4.673015873015874e-05,
+ "loss": 0.889,
+ "step": 1390
+ },
+ {
+ "epoch": 7.948571428571428,
+ "grad_norm": 86.41683959960938,
+ "learning_rate": 4.6723809523809524e-05,
+ "loss": 0.7396,
+ "step": 1391
+ },
+ {
+ "epoch": 7.954285714285715,
+ "grad_norm": 36.017669677734375,
+ "learning_rate": 4.6717460317460324e-05,
+ "loss": 0.6135,
+ "step": 1392
+ },
+ {
+ "epoch": 7.96,
+ "grad_norm": 50.021759033203125,
+ "learning_rate": 4.671111111111111e-05,
+ "loss": 0.6035,
+ "step": 1393
+ },
+ {
+ "epoch": 7.965714285714286,
+ "grad_norm": 33.08448028564453,
+ "learning_rate": 4.670476190476191e-05,
+ "loss": 0.7405,
+ "step": 1394
+ },
+ {
+ "epoch": 7.9714285714285715,
+ "grad_norm": 113.28244018554688,
+ "learning_rate": 4.66984126984127e-05,
+ "loss": 0.7022,
+ "step": 1395
+ },
+ {
+ "epoch": 7.977142857142857,
+ "grad_norm": 44.64534378051758,
+ "learning_rate": 4.6692063492063494e-05,
+ "loss": 0.5826,
+ "step": 1396
+ },
+ {
+ "epoch": 7.982857142857143,
+ "grad_norm": 232.41506958007812,
+ "learning_rate": 4.668571428571429e-05,
+ "loss": 0.8322,
+ "step": 1397
+ },
+ {
+ "epoch": 7.988571428571428,
+ "grad_norm": 30.54139518737793,
+ "learning_rate": 4.6679365079365086e-05,
+ "loss": 0.6798,
+ "step": 1398
+ },
+ {
+ "epoch": 7.994285714285715,
+ "grad_norm": 41.59028244018555,
+ "learning_rate": 4.667301587301587e-05,
+ "loss": 0.8065,
+ "step": 1399
+ },
+ {
+ "epoch": 8.0,
+ "grad_norm": 36.8554801940918,
+ "learning_rate": 4.666666666666667e-05,
+ "loss": 0.6687,
+ "step": 1400
+ },
+ {
+ "epoch": 8.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6754283308982849,
+ "eval_map": 0.9018,
+ "eval_map_50": 0.9583,
+ "eval_map_75": 0.9421,
+ "eval_map_large": 0.9019,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9018,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7838,
+ "eval_mar_10": 0.9625,
+ "eval_mar_100": 0.9727,
+ "eval_mar_100_per_class": 0.9727,
+ "eval_mar_large": 0.9727,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 17.0817,
+ "eval_samples_per_second": 17.211,
+ "eval_steps_per_second": 2.166,
+ "step": 1400
+ },
+ {
+ "epoch": 8.005714285714285,
+ "grad_norm": 35.78780746459961,
+ "learning_rate": 4.6660317460317464e-05,
+ "loss": 0.4911,
+ "step": 1401
+ },
+ {
+ "epoch": 8.01142857142857,
+ "grad_norm": 22.01752281188965,
+ "learning_rate": 4.665396825396826e-05,
+ "loss": 0.6943,
+ "step": 1402
+ },
+ {
+ "epoch": 8.017142857142858,
+ "grad_norm": 80.87184143066406,
+ "learning_rate": 4.664761904761905e-05,
+ "loss": 0.4523,
+ "step": 1403
+ },
+ {
+ "epoch": 8.022857142857143,
+ "grad_norm": 53.428428649902344,
+ "learning_rate": 4.664126984126984e-05,
+ "loss": 0.536,
+ "step": 1404
+ },
+ {
+ "epoch": 8.028571428571428,
+ "grad_norm": 83.90179443359375,
+ "learning_rate": 4.6634920634920635e-05,
+ "loss": 0.5229,
+ "step": 1405
+ },
+ {
+ "epoch": 8.034285714285714,
+ "grad_norm": 31.12773895263672,
+ "learning_rate": 4.6628571428571434e-05,
+ "loss": 0.4013,
+ "step": 1406
+ },
+ {
+ "epoch": 8.04,
+ "grad_norm": 29.049760818481445,
+ "learning_rate": 4.662222222222222e-05,
+ "loss": 0.6954,
+ "step": 1407
+ },
+ {
+ "epoch": 8.045714285714286,
+ "grad_norm": 35.7201042175293,
+ "learning_rate": 4.661587301587302e-05,
+ "loss": 0.5723,
+ "step": 1408
+ },
+ {
+ "epoch": 8.051428571428572,
+ "grad_norm": 34.02865219116211,
+ "learning_rate": 4.660952380952381e-05,
+ "loss": 0.6863,
+ "step": 1409
+ },
+ {
+ "epoch": 8.057142857142857,
+ "grad_norm": 286.44732666015625,
+ "learning_rate": 4.6603174603174605e-05,
+ "loss": 0.5025,
+ "step": 1410
+ },
+ {
+ "epoch": 8.062857142857142,
+ "grad_norm": 55.78438949584961,
+ "learning_rate": 4.65968253968254e-05,
+ "loss": 0.6305,
+ "step": 1411
+ },
+ {
+ "epoch": 8.06857142857143,
+ "grad_norm": 51.090538024902344,
+ "learning_rate": 4.659047619047619e-05,
+ "loss": 0.6065,
+ "step": 1412
+ },
+ {
+ "epoch": 8.074285714285715,
+ "grad_norm": 34.32136917114258,
+ "learning_rate": 4.658412698412698e-05,
+ "loss": 0.6657,
+ "step": 1413
+ },
+ {
+ "epoch": 8.08,
+ "grad_norm": 75.9567642211914,
+ "learning_rate": 4.657777777777778e-05,
+ "loss": 0.481,
+ "step": 1414
+ },
+ {
+ "epoch": 8.085714285714285,
+ "grad_norm": 99.80516052246094,
+ "learning_rate": 4.6571428571428575e-05,
+ "loss": 0.6294,
+ "step": 1415
+ },
+ {
+ "epoch": 8.09142857142857,
+ "grad_norm": 44.61457824707031,
+ "learning_rate": 4.656507936507937e-05,
+ "loss": 0.4226,
+ "step": 1416
+ },
+ {
+ "epoch": 8.097142857142858,
+ "grad_norm": 77.42556762695312,
+ "learning_rate": 4.655873015873016e-05,
+ "loss": 0.5829,
+ "step": 1417
+ },
+ {
+ "epoch": 8.102857142857143,
+ "grad_norm": 113.65733337402344,
+ "learning_rate": 4.655238095238095e-05,
+ "loss": 0.7022,
+ "step": 1418
+ },
+ {
+ "epoch": 8.108571428571429,
+ "grad_norm": 67.45095825195312,
+ "learning_rate": 4.6546031746031745e-05,
+ "loss": 0.6034,
+ "step": 1419
+ },
+ {
+ "epoch": 8.114285714285714,
+ "grad_norm": 39.48252868652344,
+ "learning_rate": 4.6539682539682545e-05,
+ "loss": 0.6611,
+ "step": 1420
+ },
+ {
+ "epoch": 8.12,
+ "grad_norm": 36.0588264465332,
+ "learning_rate": 4.653333333333334e-05,
+ "loss": 0.6547,
+ "step": 1421
+ },
+ {
+ "epoch": 8.125714285714286,
+ "grad_norm": 56.38534164428711,
+ "learning_rate": 4.652698412698413e-05,
+ "loss": 0.643,
+ "step": 1422
+ },
+ {
+ "epoch": 8.131428571428572,
+ "grad_norm": 27.028913497924805,
+ "learning_rate": 4.652063492063492e-05,
+ "loss": 0.4471,
+ "step": 1423
+ },
+ {
+ "epoch": 8.137142857142857,
+ "grad_norm": 24.878549575805664,
+ "learning_rate": 4.6514285714285715e-05,
+ "loss": 0.5971,
+ "step": 1424
+ },
+ {
+ "epoch": 8.142857142857142,
+ "grad_norm": 113.82731628417969,
+ "learning_rate": 4.6507936507936515e-05,
+ "loss": 0.7132,
+ "step": 1425
+ },
+ {
+ "epoch": 8.14857142857143,
+ "grad_norm": 66.78875732421875,
+ "learning_rate": 4.65015873015873e-05,
+ "loss": 0.4648,
+ "step": 1426
+ },
+ {
+ "epoch": 8.154285714285715,
+ "grad_norm": 33.675479888916016,
+ "learning_rate": 4.64952380952381e-05,
+ "loss": 0.5242,
+ "step": 1427
+ },
+ {
+ "epoch": 8.16,
+ "grad_norm": 50.45384216308594,
+ "learning_rate": 4.648888888888889e-05,
+ "loss": 0.6918,
+ "step": 1428
+ },
+ {
+ "epoch": 8.165714285714285,
+ "grad_norm": 14.417842864990234,
+ "learning_rate": 4.6482539682539685e-05,
+ "loss": 0.6082,
+ "step": 1429
+ },
+ {
+ "epoch": 8.17142857142857,
+ "grad_norm": 47.269161224365234,
+ "learning_rate": 4.647619047619048e-05,
+ "loss": 0.6355,
+ "step": 1430
+ },
+ {
+ "epoch": 8.177142857142858,
+ "grad_norm": 31.608606338500977,
+ "learning_rate": 4.646984126984128e-05,
+ "loss": 0.5568,
+ "step": 1431
+ },
+ {
+ "epoch": 8.182857142857143,
+ "grad_norm": 37.61075210571289,
+ "learning_rate": 4.646349206349206e-05,
+ "loss": 0.4326,
+ "step": 1432
+ },
+ {
+ "epoch": 8.188571428571429,
+ "grad_norm": 113.44019317626953,
+ "learning_rate": 4.645714285714286e-05,
+ "loss": 0.4667,
+ "step": 1433
+ },
+ {
+ "epoch": 8.194285714285714,
+ "grad_norm": 49.1456184387207,
+ "learning_rate": 4.645079365079365e-05,
+ "loss": 0.7065,
+ "step": 1434
+ },
+ {
+ "epoch": 8.2,
+ "grad_norm": 40.68552780151367,
+ "learning_rate": 4.644444444444445e-05,
+ "loss": 0.5833,
+ "step": 1435
+ },
+ {
+ "epoch": 8.205714285714286,
+ "grad_norm": 54.234046936035156,
+ "learning_rate": 4.643809523809524e-05,
+ "loss": 0.6348,
+ "step": 1436
+ },
+ {
+ "epoch": 8.211428571428572,
+ "grad_norm": 84.55835723876953,
+ "learning_rate": 4.643174603174603e-05,
+ "loss": 0.5484,
+ "step": 1437
+ },
+ {
+ "epoch": 8.217142857142857,
+ "grad_norm": 95.15739440917969,
+ "learning_rate": 4.6425396825396826e-05,
+ "loss": 0.6561,
+ "step": 1438
+ },
+ {
+ "epoch": 8.222857142857142,
+ "grad_norm": 98.21651458740234,
+ "learning_rate": 4.6419047619047625e-05,
+ "loss": 0.8239,
+ "step": 1439
+ },
+ {
+ "epoch": 8.228571428571428,
+ "grad_norm": 36.658851623535156,
+ "learning_rate": 4.641269841269841e-05,
+ "loss": 0.7417,
+ "step": 1440
+ },
+ {
+ "epoch": 8.234285714285715,
+ "grad_norm": 85.5366439819336,
+ "learning_rate": 4.640634920634921e-05,
+ "loss": 0.4967,
+ "step": 1441
+ },
+ {
+ "epoch": 8.24,
+ "grad_norm": 85.452880859375,
+ "learning_rate": 4.64e-05,
+ "loss": 0.6752,
+ "step": 1442
+ },
+ {
+ "epoch": 8.245714285714286,
+ "grad_norm": 39.44017791748047,
+ "learning_rate": 4.6393650793650796e-05,
+ "loss": 0.5459,
+ "step": 1443
+ },
+ {
+ "epoch": 8.251428571428571,
+ "grad_norm": 27.0288143157959,
+ "learning_rate": 4.638730158730159e-05,
+ "loss": 0.6288,
+ "step": 1444
+ },
+ {
+ "epoch": 8.257142857142856,
+ "grad_norm": 51.48039245605469,
+ "learning_rate": 4.638095238095238e-05,
+ "loss": 0.6322,
+ "step": 1445
+ },
+ {
+ "epoch": 8.262857142857143,
+ "grad_norm": 35.94298553466797,
+ "learning_rate": 4.6374603174603174e-05,
+ "loss": 0.5504,
+ "step": 1446
+ },
+ {
+ "epoch": 8.268571428571429,
+ "grad_norm": 76.44605255126953,
+ "learning_rate": 4.636825396825397e-05,
+ "loss": 0.6296,
+ "step": 1447
+ },
+ {
+ "epoch": 8.274285714285714,
+ "grad_norm": 95.23500061035156,
+ "learning_rate": 4.636190476190476e-05,
+ "loss": 0.7675,
+ "step": 1448
+ },
+ {
+ "epoch": 8.28,
+ "grad_norm": 63.241539001464844,
+ "learning_rate": 4.635555555555556e-05,
+ "loss": 0.6487,
+ "step": 1449
+ },
+ {
+ "epoch": 8.285714285714286,
+ "grad_norm": 158.42202758789062,
+ "learning_rate": 4.634920634920635e-05,
+ "loss": 0.4883,
+ "step": 1450
+ },
+ {
+ "epoch": 8.291428571428572,
+ "grad_norm": 40.19489288330078,
+ "learning_rate": 4.6342857142857143e-05,
+ "loss": 0.7871,
+ "step": 1451
+ },
+ {
+ "epoch": 8.297142857142857,
+ "grad_norm": 45.30269241333008,
+ "learning_rate": 4.6336507936507936e-05,
+ "loss": 0.686,
+ "step": 1452
+ },
+ {
+ "epoch": 8.302857142857142,
+ "grad_norm": 83.68958282470703,
+ "learning_rate": 4.6330158730158736e-05,
+ "loss": 0.7743,
+ "step": 1453
+ },
+ {
+ "epoch": 8.308571428571428,
+ "grad_norm": 91.59910583496094,
+ "learning_rate": 4.632380952380953e-05,
+ "loss": 0.5252,
+ "step": 1454
+ },
+ {
+ "epoch": 8.314285714285715,
+ "grad_norm": 44.00296401977539,
+ "learning_rate": 4.631746031746032e-05,
+ "loss": 0.6858,
+ "step": 1455
+ },
+ {
+ "epoch": 8.32,
+ "grad_norm": 38.646297454833984,
+ "learning_rate": 4.6311111111111113e-05,
+ "loss": 0.6036,
+ "step": 1456
+ },
+ {
+ "epoch": 8.325714285714286,
+ "grad_norm": 31.151046752929688,
+ "learning_rate": 4.6304761904761906e-05,
+ "loss": 0.5736,
+ "step": 1457
+ },
+ {
+ "epoch": 8.331428571428571,
+ "grad_norm": 36.38899230957031,
+ "learning_rate": 4.62984126984127e-05,
+ "loss": 0.5169,
+ "step": 1458
+ },
+ {
+ "epoch": 8.337142857142856,
+ "grad_norm": 21.914627075195312,
+ "learning_rate": 4.629206349206349e-05,
+ "loss": 0.6808,
+ "step": 1459
+ },
+ {
+ "epoch": 8.342857142857143,
+ "grad_norm": 18.928544998168945,
+ "learning_rate": 4.628571428571429e-05,
+ "loss": 0.9638,
+ "step": 1460
+ },
+ {
+ "epoch": 8.348571428571429,
+ "grad_norm": 41.01659393310547,
+ "learning_rate": 4.6279365079365083e-05,
+ "loss": 0.4674,
+ "step": 1461
+ },
+ {
+ "epoch": 8.354285714285714,
+ "grad_norm": 24.344030380249023,
+ "learning_rate": 4.6273015873015876e-05,
+ "loss": 0.4353,
+ "step": 1462
+ },
+ {
+ "epoch": 8.36,
+ "grad_norm": 24.396142959594727,
+ "learning_rate": 4.626666666666667e-05,
+ "loss": 0.8538,
+ "step": 1463
+ },
+ {
+ "epoch": 8.365714285714287,
+ "grad_norm": 40.090240478515625,
+ "learning_rate": 4.626031746031747e-05,
+ "loss": 0.8105,
+ "step": 1464
+ },
+ {
+ "epoch": 8.371428571428572,
+ "grad_norm": 167.61058044433594,
+ "learning_rate": 4.6253968253968254e-05,
+ "loss": 0.5838,
+ "step": 1465
+ },
+ {
+ "epoch": 8.377142857142857,
+ "grad_norm": 46.45691680908203,
+ "learning_rate": 4.624761904761905e-05,
+ "loss": 0.5343,
+ "step": 1466
+ },
+ {
+ "epoch": 8.382857142857143,
+ "grad_norm": 90.0116958618164,
+ "learning_rate": 4.624126984126984e-05,
+ "loss": 0.5916,
+ "step": 1467
+ },
+ {
+ "epoch": 8.388571428571428,
+ "grad_norm": 17.666582107543945,
+ "learning_rate": 4.623492063492064e-05,
+ "loss": 0.5452,
+ "step": 1468
+ },
+ {
+ "epoch": 8.394285714285715,
+ "grad_norm": 51.96952438354492,
+ "learning_rate": 4.622857142857143e-05,
+ "loss": 0.6797,
+ "step": 1469
+ },
+ {
+ "epoch": 8.4,
+ "grad_norm": 41.560142517089844,
+ "learning_rate": 4.6222222222222224e-05,
+ "loss": 0.8717,
+ "step": 1470
+ },
+ {
+ "epoch": 8.405714285714286,
+ "grad_norm": 34.73108673095703,
+ "learning_rate": 4.6215873015873017e-05,
+ "loss": 0.541,
+ "step": 1471
+ },
+ {
+ "epoch": 8.411428571428571,
+ "grad_norm": 47.17734146118164,
+ "learning_rate": 4.6209523809523816e-05,
+ "loss": 0.6391,
+ "step": 1472
+ },
+ {
+ "epoch": 8.417142857142856,
+ "grad_norm": 44.5860481262207,
+ "learning_rate": 4.62031746031746e-05,
+ "loss": 0.7368,
+ "step": 1473
+ },
+ {
+ "epoch": 8.422857142857143,
+ "grad_norm": 38.533416748046875,
+ "learning_rate": 4.61968253968254e-05,
+ "loss": 0.6083,
+ "step": 1474
+ },
+ {
+ "epoch": 8.428571428571429,
+ "grad_norm": 81.4775161743164,
+ "learning_rate": 4.6190476190476194e-05,
+ "loss": 0.7288,
+ "step": 1475
+ },
+ {
+ "epoch": 8.434285714285714,
+ "grad_norm": 114.19631958007812,
+ "learning_rate": 4.6184126984126986e-05,
+ "loss": 0.6884,
+ "step": 1476
+ },
+ {
+ "epoch": 8.44,
+ "grad_norm": 66.40652465820312,
+ "learning_rate": 4.617777777777778e-05,
+ "loss": 0.6874,
+ "step": 1477
+ },
+ {
+ "epoch": 8.445714285714285,
+ "grad_norm": 101.87186431884766,
+ "learning_rate": 4.617142857142857e-05,
+ "loss": 0.7092,
+ "step": 1478
+ },
+ {
+ "epoch": 8.451428571428572,
+ "grad_norm": 197.94064331054688,
+ "learning_rate": 4.6165079365079364e-05,
+ "loss": 0.6513,
+ "step": 1479
+ },
+ {
+ "epoch": 8.457142857142857,
+ "grad_norm": 31.79027557373047,
+ "learning_rate": 4.6158730158730164e-05,
+ "loss": 0.6526,
+ "step": 1480
+ },
+ {
+ "epoch": 8.462857142857143,
+ "grad_norm": 31.410551071166992,
+ "learning_rate": 4.615238095238095e-05,
+ "loss": 0.6396,
+ "step": 1481
+ },
+ {
+ "epoch": 8.468571428571428,
+ "grad_norm": 29.926599502563477,
+ "learning_rate": 4.614603174603175e-05,
+ "loss": 0.5031,
+ "step": 1482
+ },
+ {
+ "epoch": 8.474285714285715,
+ "grad_norm": 24.395957946777344,
+ "learning_rate": 4.613968253968254e-05,
+ "loss": 0.7398,
+ "step": 1483
+ },
+ {
+ "epoch": 8.48,
+ "grad_norm": 53.47405242919922,
+ "learning_rate": 4.6133333333333334e-05,
+ "loss": 0.5225,
+ "step": 1484
+ },
+ {
+ "epoch": 8.485714285714286,
+ "grad_norm": 50.648536682128906,
+ "learning_rate": 4.612698412698413e-05,
+ "loss": 0.7729,
+ "step": 1485
+ },
+ {
+ "epoch": 8.491428571428571,
+ "grad_norm": 84.7298355102539,
+ "learning_rate": 4.6120634920634926e-05,
+ "loss": 0.7538,
+ "step": 1486
+ },
+ {
+ "epoch": 8.497142857142856,
+ "grad_norm": 73.40445709228516,
+ "learning_rate": 4.611428571428571e-05,
+ "loss": 0.7775,
+ "step": 1487
+ },
+ {
+ "epoch": 8.502857142857144,
+ "grad_norm": 75.76046752929688,
+ "learning_rate": 4.610793650793651e-05,
+ "loss": 0.6632,
+ "step": 1488
+ },
+ {
+ "epoch": 8.508571428571429,
+ "grad_norm": 59.162559509277344,
+ "learning_rate": 4.6101587301587304e-05,
+ "loss": 0.8116,
+ "step": 1489
+ },
+ {
+ "epoch": 8.514285714285714,
+ "grad_norm": 74.70832824707031,
+ "learning_rate": 4.60952380952381e-05,
+ "loss": 0.8448,
+ "step": 1490
+ },
+ {
+ "epoch": 8.52,
+ "grad_norm": 31.210222244262695,
+ "learning_rate": 4.608888888888889e-05,
+ "loss": 0.6818,
+ "step": 1491
+ },
+ {
+ "epoch": 8.525714285714285,
+ "grad_norm": 122.73198699951172,
+ "learning_rate": 4.608253968253968e-05,
+ "loss": 0.5263,
+ "step": 1492
+ },
+ {
+ "epoch": 8.531428571428572,
+ "grad_norm": 56.048561096191406,
+ "learning_rate": 4.607619047619048e-05,
+ "loss": 0.4712,
+ "step": 1493
+ },
+ {
+ "epoch": 8.537142857142857,
+ "grad_norm": 560.8683471679688,
+ "learning_rate": 4.6069841269841274e-05,
+ "loss": 0.5263,
+ "step": 1494
+ },
+ {
+ "epoch": 8.542857142857143,
+ "grad_norm": 31.27094841003418,
+ "learning_rate": 4.606349206349207e-05,
+ "loss": 0.5868,
+ "step": 1495
+ },
+ {
+ "epoch": 8.548571428571428,
+ "grad_norm": 41.043006896972656,
+ "learning_rate": 4.605714285714286e-05,
+ "loss": 0.6195,
+ "step": 1496
+ },
+ {
+ "epoch": 8.554285714285715,
+ "grad_norm": 71.65432739257812,
+ "learning_rate": 4.605079365079365e-05,
+ "loss": 0.7321,
+ "step": 1497
+ },
+ {
+ "epoch": 8.56,
+ "grad_norm": 90.78630065917969,
+ "learning_rate": 4.6044444444444445e-05,
+ "loss": 0.5513,
+ "step": 1498
+ },
+ {
+ "epoch": 8.565714285714286,
+ "grad_norm": 40.308448791503906,
+ "learning_rate": 4.6038095238095244e-05,
+ "loss": 0.6837,
+ "step": 1499
+ },
+ {
+ "epoch": 8.571428571428571,
+ "grad_norm": 27.45469856262207,
+ "learning_rate": 4.603174603174603e-05,
+ "loss": 0.7172,
+ "step": 1500
+ },
+ {
+ "epoch": 8.577142857142857,
+ "grad_norm": 29.458553314208984,
+ "learning_rate": 4.602539682539683e-05,
+ "loss": 0.4317,
+ "step": 1501
+ },
+ {
+ "epoch": 8.582857142857144,
+ "grad_norm": 96.34957122802734,
+ "learning_rate": 4.601904761904762e-05,
+ "loss": 0.7332,
+ "step": 1502
+ },
+ {
+ "epoch": 8.588571428571429,
+ "grad_norm": 27.539478302001953,
+ "learning_rate": 4.6012698412698415e-05,
+ "loss": 0.4862,
+ "step": 1503
+ },
+ {
+ "epoch": 8.594285714285714,
+ "grad_norm": 27.598003387451172,
+ "learning_rate": 4.600634920634921e-05,
+ "loss": 0.5812,
+ "step": 1504
+ },
+ {
+ "epoch": 8.6,
+ "grad_norm": 139.9840087890625,
+ "learning_rate": 4.600000000000001e-05,
+ "loss": 0.5812,
+ "step": 1505
+ },
+ {
+ "epoch": 8.605714285714285,
+ "grad_norm": 37.582332611083984,
+ "learning_rate": 4.599365079365079e-05,
+ "loss": 0.7514,
+ "step": 1506
+ },
+ {
+ "epoch": 8.611428571428572,
+ "grad_norm": 58.73211669921875,
+ "learning_rate": 4.598730158730159e-05,
+ "loss": 0.4071,
+ "step": 1507
+ },
+ {
+ "epoch": 8.617142857142857,
+ "grad_norm": 60.2987174987793,
+ "learning_rate": 4.5980952380952385e-05,
+ "loss": 0.526,
+ "step": 1508
+ },
+ {
+ "epoch": 8.622857142857143,
+ "grad_norm": 55.10319137573242,
+ "learning_rate": 4.597460317460318e-05,
+ "loss": 0.7074,
+ "step": 1509
+ },
+ {
+ "epoch": 8.628571428571428,
+ "grad_norm": 28.07171058654785,
+ "learning_rate": 4.596825396825397e-05,
+ "loss": 0.6715,
+ "step": 1510
+ },
+ {
+ "epoch": 8.634285714285713,
+ "grad_norm": 138.1480712890625,
+ "learning_rate": 4.596190476190476e-05,
+ "loss": 0.6359,
+ "step": 1511
+ },
+ {
+ "epoch": 8.64,
+ "grad_norm": 48.170631408691406,
+ "learning_rate": 4.5955555555555555e-05,
+ "loss": 0.6623,
+ "step": 1512
+ },
+ {
+ "epoch": 8.645714285714286,
+ "grad_norm": 44.70737075805664,
+ "learning_rate": 4.5949206349206355e-05,
+ "loss": 0.6849,
+ "step": 1513
+ },
+ {
+ "epoch": 8.651428571428571,
+ "grad_norm": 53.35809326171875,
+ "learning_rate": 4.594285714285714e-05,
+ "loss": 0.5214,
+ "step": 1514
+ },
+ {
+ "epoch": 8.657142857142857,
+ "grad_norm": 59.18272399902344,
+ "learning_rate": 4.593650793650794e-05,
+ "loss": 0.7109,
+ "step": 1515
+ },
+ {
+ "epoch": 8.662857142857142,
+ "grad_norm": 76.57801055908203,
+ "learning_rate": 4.593015873015873e-05,
+ "loss": 0.7157,
+ "step": 1516
+ },
+ {
+ "epoch": 8.668571428571429,
+ "grad_norm": 47.933834075927734,
+ "learning_rate": 4.5923809523809525e-05,
+ "loss": 0.6008,
+ "step": 1517
+ },
+ {
+ "epoch": 8.674285714285714,
+ "grad_norm": 34.38920593261719,
+ "learning_rate": 4.591746031746032e-05,
+ "loss": 0.5298,
+ "step": 1518
+ },
+ {
+ "epoch": 8.68,
+ "grad_norm": 36.14894104003906,
+ "learning_rate": 4.591111111111112e-05,
+ "loss": 0.5072,
+ "step": 1519
+ },
+ {
+ "epoch": 8.685714285714285,
+ "grad_norm": 36.38679504394531,
+ "learning_rate": 4.59047619047619e-05,
+ "loss": 0.5074,
+ "step": 1520
+ },
+ {
+ "epoch": 8.691428571428572,
+ "grad_norm": 35.809410095214844,
+ "learning_rate": 4.58984126984127e-05,
+ "loss": 0.5394,
+ "step": 1521
+ },
+ {
+ "epoch": 8.697142857142858,
+ "grad_norm": 76.38274383544922,
+ "learning_rate": 4.589206349206349e-05,
+ "loss": 0.6313,
+ "step": 1522
+ },
+ {
+ "epoch": 8.702857142857143,
+ "grad_norm": 37.26008987426758,
+ "learning_rate": 4.588571428571429e-05,
+ "loss": 0.6359,
+ "step": 1523
+ },
+ {
+ "epoch": 8.708571428571428,
+ "grad_norm": 52.82963943481445,
+ "learning_rate": 4.587936507936508e-05,
+ "loss": 0.5529,
+ "step": 1524
+ },
+ {
+ "epoch": 8.714285714285714,
+ "grad_norm": 33.750099182128906,
+ "learning_rate": 4.587301587301587e-05,
+ "loss": 0.5891,
+ "step": 1525
+ },
+ {
+ "epoch": 8.72,
+ "grad_norm": 26.14128875732422,
+ "learning_rate": 4.5866666666666666e-05,
+ "loss": 0.8089,
+ "step": 1526
+ },
+ {
+ "epoch": 8.725714285714286,
+ "grad_norm": 91.86772155761719,
+ "learning_rate": 4.5860317460317465e-05,
+ "loss": 0.5509,
+ "step": 1527
+ },
+ {
+ "epoch": 8.731428571428571,
+ "grad_norm": 38.546119689941406,
+ "learning_rate": 4.585396825396826e-05,
+ "loss": 0.5187,
+ "step": 1528
+ },
+ {
+ "epoch": 8.737142857142857,
+ "grad_norm": 35.20556640625,
+ "learning_rate": 4.584761904761905e-05,
+ "loss": 0.5353,
+ "step": 1529
+ },
+ {
+ "epoch": 8.742857142857144,
+ "grad_norm": 39.48540496826172,
+ "learning_rate": 4.584126984126984e-05,
+ "loss": 0.3533,
+ "step": 1530
+ },
+ {
+ "epoch": 8.748571428571429,
+ "grad_norm": 27.24939727783203,
+ "learning_rate": 4.5834920634920636e-05,
+ "loss": 0.5308,
+ "step": 1531
+ },
+ {
+ "epoch": 8.754285714285714,
+ "grad_norm": 64.14990997314453,
+ "learning_rate": 4.5828571428571435e-05,
+ "loss": 0.6566,
+ "step": 1532
+ },
+ {
+ "epoch": 8.76,
+ "grad_norm": 58.110958099365234,
+ "learning_rate": 4.582222222222222e-05,
+ "loss": 0.6346,
+ "step": 1533
+ },
+ {
+ "epoch": 8.765714285714285,
+ "grad_norm": 53.27418899536133,
+ "learning_rate": 4.581587301587302e-05,
+ "loss": 0.6744,
+ "step": 1534
+ },
+ {
+ "epoch": 8.771428571428572,
+ "grad_norm": 44.7225227355957,
+ "learning_rate": 4.580952380952381e-05,
+ "loss": 0.6127,
+ "step": 1535
+ },
+ {
+ "epoch": 8.777142857142858,
+ "grad_norm": 72.10116577148438,
+ "learning_rate": 4.5803174603174606e-05,
+ "loss": 0.6284,
+ "step": 1536
+ },
+ {
+ "epoch": 8.782857142857143,
+ "grad_norm": 43.42138671875,
+ "learning_rate": 4.57968253968254e-05,
+ "loss": 0.7004,
+ "step": 1537
+ },
+ {
+ "epoch": 8.788571428571428,
+ "grad_norm": 25.551225662231445,
+ "learning_rate": 4.57904761904762e-05,
+ "loss": 0.6069,
+ "step": 1538
+ },
+ {
+ "epoch": 8.794285714285714,
+ "grad_norm": 58.870418548583984,
+ "learning_rate": 4.5784126984126984e-05,
+ "loss": 0.7462,
+ "step": 1539
+ },
+ {
+ "epoch": 8.8,
+ "grad_norm": 29.90438461303711,
+ "learning_rate": 4.577777777777778e-05,
+ "loss": 0.5822,
+ "step": 1540
+ },
+ {
+ "epoch": 8.805714285714286,
+ "grad_norm": 45.953643798828125,
+ "learning_rate": 4.5771428571428576e-05,
+ "loss": 0.7325,
+ "step": 1541
+ },
+ {
+ "epoch": 8.811428571428571,
+ "grad_norm": 26.630359649658203,
+ "learning_rate": 4.576507936507937e-05,
+ "loss": 0.5846,
+ "step": 1542
+ },
+ {
+ "epoch": 8.817142857142857,
+ "grad_norm": 25.59412384033203,
+ "learning_rate": 4.575873015873016e-05,
+ "loss": 0.6627,
+ "step": 1543
+ },
+ {
+ "epoch": 8.822857142857142,
+ "grad_norm": 29.028831481933594,
+ "learning_rate": 4.5752380952380953e-05,
+ "loss": 0.718,
+ "step": 1544
+ },
+ {
+ "epoch": 8.82857142857143,
+ "grad_norm": 52.75741195678711,
+ "learning_rate": 4.5746031746031746e-05,
+ "loss": 0.4036,
+ "step": 1545
+ },
+ {
+ "epoch": 8.834285714285715,
+ "grad_norm": 42.16880798339844,
+ "learning_rate": 4.5739682539682546e-05,
+ "loss": 0.6891,
+ "step": 1546
+ },
+ {
+ "epoch": 8.84,
+ "grad_norm": 18.44571304321289,
+ "learning_rate": 4.573333333333333e-05,
+ "loss": 0.4403,
+ "step": 1547
+ },
+ {
+ "epoch": 8.845714285714285,
+ "grad_norm": 42.27539825439453,
+ "learning_rate": 4.572698412698413e-05,
+ "loss": 0.6595,
+ "step": 1548
+ },
+ {
+ "epoch": 8.85142857142857,
+ "grad_norm": 60.38774108886719,
+ "learning_rate": 4.5720634920634923e-05,
+ "loss": 0.4491,
+ "step": 1549
+ },
+ {
+ "epoch": 8.857142857142858,
+ "grad_norm": 42.25185012817383,
+ "learning_rate": 4.5714285714285716e-05,
+ "loss": 0.5451,
+ "step": 1550
+ },
+ {
+ "epoch": 8.862857142857143,
+ "grad_norm": 83.79962158203125,
+ "learning_rate": 4.570793650793651e-05,
+ "loss": 0.6691,
+ "step": 1551
+ },
+ {
+ "epoch": 8.868571428571428,
+ "grad_norm": 45.134437561035156,
+ "learning_rate": 4.570158730158731e-05,
+ "loss": 0.7317,
+ "step": 1552
+ },
+ {
+ "epoch": 8.874285714285714,
+ "grad_norm": 27.27747344970703,
+ "learning_rate": 4.5695238095238094e-05,
+ "loss": 0.5246,
+ "step": 1553
+ },
+ {
+ "epoch": 8.88,
+ "grad_norm": 29.774110794067383,
+ "learning_rate": 4.5688888888888893e-05,
+ "loss": 0.4865,
+ "step": 1554
+ },
+ {
+ "epoch": 8.885714285714286,
+ "grad_norm": 41.41864776611328,
+ "learning_rate": 4.568253968253968e-05,
+ "loss": 0.7461,
+ "step": 1555
+ },
+ {
+ "epoch": 8.891428571428571,
+ "grad_norm": 52.09632873535156,
+ "learning_rate": 4.567619047619048e-05,
+ "loss": 0.6364,
+ "step": 1556
+ },
+ {
+ "epoch": 8.897142857142857,
+ "grad_norm": 44.600128173828125,
+ "learning_rate": 4.566984126984127e-05,
+ "loss": 0.639,
+ "step": 1557
+ },
+ {
+ "epoch": 8.902857142857142,
+ "grad_norm": 26.527629852294922,
+ "learning_rate": 4.5663492063492064e-05,
+ "loss": 0.7991,
+ "step": 1558
+ },
+ {
+ "epoch": 8.90857142857143,
+ "grad_norm": 41.0920524597168,
+ "learning_rate": 4.5657142857142857e-05,
+ "loss": 0.9872,
+ "step": 1559
+ },
+ {
+ "epoch": 8.914285714285715,
+ "grad_norm": 82.2793960571289,
+ "learning_rate": 4.5650793650793656e-05,
+ "loss": 0.582,
+ "step": 1560
+ },
+ {
+ "epoch": 8.92,
+ "grad_norm": 22.054826736450195,
+ "learning_rate": 4.564444444444444e-05,
+ "loss": 0.4866,
+ "step": 1561
+ },
+ {
+ "epoch": 8.925714285714285,
+ "grad_norm": 24.977052688598633,
+ "learning_rate": 4.563809523809524e-05,
+ "loss": 0.9632,
+ "step": 1562
+ },
+ {
+ "epoch": 8.93142857142857,
+ "grad_norm": 18.51016616821289,
+ "learning_rate": 4.5631746031746034e-05,
+ "loss": 0.8068,
+ "step": 1563
+ },
+ {
+ "epoch": 8.937142857142858,
+ "grad_norm": 20.75175666809082,
+ "learning_rate": 4.5625396825396827e-05,
+ "loss": 0.7239,
+ "step": 1564
+ },
+ {
+ "epoch": 8.942857142857143,
+ "grad_norm": 51.263912200927734,
+ "learning_rate": 4.561904761904762e-05,
+ "loss": 0.4197,
+ "step": 1565
+ },
+ {
+ "epoch": 8.948571428571428,
+ "grad_norm": 53.12901306152344,
+ "learning_rate": 4.561269841269841e-05,
+ "loss": 0.7149,
+ "step": 1566
+ },
+ {
+ "epoch": 8.954285714285714,
+ "grad_norm": 30.954853057861328,
+ "learning_rate": 4.560634920634921e-05,
+ "loss": 0.5854,
+ "step": 1567
+ },
+ {
+ "epoch": 8.96,
+ "grad_norm": 15.945786476135254,
+ "learning_rate": 4.5600000000000004e-05,
+ "loss": 0.6039,
+ "step": 1568
+ },
+ {
+ "epoch": 8.965714285714286,
+ "grad_norm": 37.43260192871094,
+ "learning_rate": 4.5593650793650797e-05,
+ "loss": 0.3747,
+ "step": 1569
+ },
+ {
+ "epoch": 8.971428571428572,
+ "grad_norm": 70.73409271240234,
+ "learning_rate": 4.558730158730159e-05,
+ "loss": 0.8083,
+ "step": 1570
+ },
+ {
+ "epoch": 8.977142857142857,
+ "grad_norm": 25.643779754638672,
+ "learning_rate": 4.558095238095239e-05,
+ "loss": 0.6793,
+ "step": 1571
+ },
+ {
+ "epoch": 8.982857142857142,
+ "grad_norm": 34.19021224975586,
+ "learning_rate": 4.5574603174603174e-05,
+ "loss": 0.8151,
+ "step": 1572
+ },
+ {
+ "epoch": 8.98857142857143,
+ "grad_norm": 39.52885437011719,
+ "learning_rate": 4.5568253968253974e-05,
+ "loss": 0.3994,
+ "step": 1573
+ },
+ {
+ "epoch": 8.994285714285715,
+ "grad_norm": 95.88760375976562,
+ "learning_rate": 4.5561904761904766e-05,
+ "loss": 0.6294,
+ "step": 1574
+ },
+ {
+ "epoch": 9.0,
+ "grad_norm": 33.499820709228516,
+ "learning_rate": 4.555555555555556e-05,
+ "loss": 0.8259,
+ "step": 1575
+ },
+ {
+ "epoch": 9.0,
+ "eval_classes": 0,
+ "eval_loss": 0.7303072214126587,
+ "eval_map": 0.8974,
+ "eval_map_50": 0.9515,
+ "eval_map_75": 0.9372,
+ "eval_map_large": 0.8975,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.8974,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7594,
+ "eval_mar_10": 0.9619,
+ "eval_mar_100": 0.9756,
+ "eval_mar_100_per_class": 0.9756,
+ "eval_mar_large": 0.9756,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 17.5672,
+ "eval_samples_per_second": 16.736,
+ "eval_steps_per_second": 2.106,
+ "step": 1575
+ },
+ {
+ "epoch": 9.005714285714285,
+ "grad_norm": 96.72337341308594,
+ "learning_rate": 4.554920634920635e-05,
+ "loss": 0.5228,
+ "step": 1576
+ },
+ {
+ "epoch": 9.01142857142857,
+ "grad_norm": 29.805259704589844,
+ "learning_rate": 4.5542857142857144e-05,
+ "loss": 0.3786,
+ "step": 1577
+ },
+ {
+ "epoch": 9.017142857142858,
+ "grad_norm": 21.426660537719727,
+ "learning_rate": 4.553650793650794e-05,
+ "loss": 0.5206,
+ "step": 1578
+ },
+ {
+ "epoch": 9.022857142857143,
+ "grad_norm": 73.40640258789062,
+ "learning_rate": 4.5530158730158736e-05,
+ "loss": 0.4438,
+ "step": 1579
+ },
+ {
+ "epoch": 9.028571428571428,
+ "grad_norm": 50.55246353149414,
+ "learning_rate": 4.552380952380952e-05,
+ "loss": 0.4576,
+ "step": 1580
+ },
+ {
+ "epoch": 9.034285714285714,
+ "grad_norm": 55.30791473388672,
+ "learning_rate": 4.551746031746032e-05,
+ "loss": 0.3573,
+ "step": 1581
+ },
+ {
+ "epoch": 9.04,
+ "grad_norm": 17.10489845275879,
+ "learning_rate": 4.5511111111111114e-05,
+ "loss": 0.4981,
+ "step": 1582
+ },
+ {
+ "epoch": 9.045714285714286,
+ "grad_norm": 54.23091506958008,
+ "learning_rate": 4.550476190476191e-05,
+ "loss": 0.5639,
+ "step": 1583
+ },
+ {
+ "epoch": 9.051428571428572,
+ "grad_norm": 46.959014892578125,
+ "learning_rate": 4.54984126984127e-05,
+ "loss": 0.3785,
+ "step": 1584
+ },
+ {
+ "epoch": 9.057142857142857,
+ "grad_norm": 41.589237213134766,
+ "learning_rate": 4.54920634920635e-05,
+ "loss": 0.6079,
+ "step": 1585
+ },
+ {
+ "epoch": 9.062857142857142,
+ "grad_norm": 59.98491668701172,
+ "learning_rate": 4.5485714285714285e-05,
+ "loss": 0.4779,
+ "step": 1586
+ },
+ {
+ "epoch": 9.06857142857143,
+ "grad_norm": 59.319087982177734,
+ "learning_rate": 4.5479365079365084e-05,
+ "loss": 0.858,
+ "step": 1587
+ },
+ {
+ "epoch": 9.074285714285715,
+ "grad_norm": 15.701525688171387,
+ "learning_rate": 4.547301587301587e-05,
+ "loss": 0.5491,
+ "step": 1588
+ },
+ {
+ "epoch": 9.08,
+ "grad_norm": 34.07634735107422,
+ "learning_rate": 4.546666666666667e-05,
+ "loss": 0.5342,
+ "step": 1589
+ },
+ {
+ "epoch": 9.085714285714285,
+ "grad_norm": 17.197433471679688,
+ "learning_rate": 4.546031746031746e-05,
+ "loss": 0.7168,
+ "step": 1590
+ },
+ {
+ "epoch": 9.09142857142857,
+ "grad_norm": 43.62481689453125,
+ "learning_rate": 4.5453968253968255e-05,
+ "loss": 0.4793,
+ "step": 1591
+ },
+ {
+ "epoch": 9.097142857142858,
+ "grad_norm": 191.11241149902344,
+ "learning_rate": 4.544761904761905e-05,
+ "loss": 0.5385,
+ "step": 1592
+ },
+ {
+ "epoch": 9.102857142857143,
+ "grad_norm": 17.969465255737305,
+ "learning_rate": 4.544126984126985e-05,
+ "loss": 0.494,
+ "step": 1593
+ },
+ {
+ "epoch": 9.108571428571429,
+ "grad_norm": 171.6042938232422,
+ "learning_rate": 4.543492063492063e-05,
+ "loss": 0.6234,
+ "step": 1594
+ },
+ {
+ "epoch": 9.114285714285714,
+ "grad_norm": 67.10899353027344,
+ "learning_rate": 4.542857142857143e-05,
+ "loss": 0.4691,
+ "step": 1595
+ },
+ {
+ "epoch": 9.12,
+ "grad_norm": 35.339996337890625,
+ "learning_rate": 4.5422222222222225e-05,
+ "loss": 0.4985,
+ "step": 1596
+ },
+ {
+ "epoch": 9.125714285714286,
+ "grad_norm": 16.23410987854004,
+ "learning_rate": 4.541587301587302e-05,
+ "loss": 0.5747,
+ "step": 1597
+ },
+ {
+ "epoch": 9.131428571428572,
+ "grad_norm": 35.63035202026367,
+ "learning_rate": 4.540952380952381e-05,
+ "loss": 0.5536,
+ "step": 1598
+ },
+ {
+ "epoch": 9.137142857142857,
+ "grad_norm": 19.772794723510742,
+ "learning_rate": 4.54031746031746e-05,
+ "loss": 0.5108,
+ "step": 1599
+ },
+ {
+ "epoch": 9.142857142857142,
+ "grad_norm": 34.77439498901367,
+ "learning_rate": 4.5396825396825395e-05,
+ "loss": 0.5934,
+ "step": 1600
+ },
+ {
+ "epoch": 9.14857142857143,
+ "grad_norm": 33.682281494140625,
+ "learning_rate": 4.5390476190476195e-05,
+ "loss": 0.4317,
+ "step": 1601
+ },
+ {
+ "epoch": 9.154285714285715,
+ "grad_norm": 69.66313934326172,
+ "learning_rate": 4.538412698412699e-05,
+ "loss": 0.9722,
+ "step": 1602
+ },
+ {
+ "epoch": 9.16,
+ "grad_norm": 37.474693298339844,
+ "learning_rate": 4.537777777777778e-05,
+ "loss": 0.4238,
+ "step": 1603
+ },
+ {
+ "epoch": 9.165714285714285,
+ "grad_norm": 54.01615905761719,
+ "learning_rate": 4.537142857142857e-05,
+ "loss": 0.6075,
+ "step": 1604
+ },
+ {
+ "epoch": 9.17142857142857,
+ "grad_norm": 73.04664611816406,
+ "learning_rate": 4.5365079365079365e-05,
+ "loss": 0.638,
+ "step": 1605
+ },
+ {
+ "epoch": 9.177142857142858,
+ "grad_norm": 46.8115348815918,
+ "learning_rate": 4.5358730158730165e-05,
+ "loss": 0.5162,
+ "step": 1606
+ },
+ {
+ "epoch": 9.182857142857143,
+ "grad_norm": 51.396697998046875,
+ "learning_rate": 4.535238095238096e-05,
+ "loss": 0.4573,
+ "step": 1607
+ },
+ {
+ "epoch": 9.188571428571429,
+ "grad_norm": 25.829246520996094,
+ "learning_rate": 4.534603174603175e-05,
+ "loss": 0.5303,
+ "step": 1608
+ },
+ {
+ "epoch": 9.194285714285714,
+ "grad_norm": 1186.63720703125,
+ "learning_rate": 4.533968253968254e-05,
+ "loss": 0.428,
+ "step": 1609
+ },
+ {
+ "epoch": 9.2,
+ "grad_norm": 45.02142333984375,
+ "learning_rate": 4.5333333333333335e-05,
+ "loss": 0.5237,
+ "step": 1610
+ },
+ {
+ "epoch": 9.205714285714286,
+ "grad_norm": 462.3764343261719,
+ "learning_rate": 4.532698412698413e-05,
+ "loss": 0.671,
+ "step": 1611
+ },
+ {
+ "epoch": 9.211428571428572,
+ "grad_norm": 56.87477493286133,
+ "learning_rate": 4.532063492063493e-05,
+ "loss": 0.5889,
+ "step": 1612
+ },
+ {
+ "epoch": 9.217142857142857,
+ "grad_norm": 24.812471389770508,
+ "learning_rate": 4.531428571428571e-05,
+ "loss": 0.5464,
+ "step": 1613
+ },
+ {
+ "epoch": 9.222857142857142,
+ "grad_norm": 71.20873260498047,
+ "learning_rate": 4.530793650793651e-05,
+ "loss": 0.6917,
+ "step": 1614
+ },
+ {
+ "epoch": 9.228571428571428,
+ "grad_norm": 57.25650405883789,
+ "learning_rate": 4.5301587301587305e-05,
+ "loss": 0.7093,
+ "step": 1615
+ },
+ {
+ "epoch": 9.234285714285715,
+ "grad_norm": 61.251834869384766,
+ "learning_rate": 4.52952380952381e-05,
+ "loss": 0.5466,
+ "step": 1616
+ },
+ {
+ "epoch": 9.24,
+ "grad_norm": 144.06771850585938,
+ "learning_rate": 4.528888888888889e-05,
+ "loss": 0.5203,
+ "step": 1617
+ },
+ {
+ "epoch": 9.245714285714286,
+ "grad_norm": 42.55318832397461,
+ "learning_rate": 4.528253968253969e-05,
+ "loss": 0.6319,
+ "step": 1618
+ },
+ {
+ "epoch": 9.251428571428571,
+ "grad_norm": 27.46671485900879,
+ "learning_rate": 4.5276190476190476e-05,
+ "loss": 0.4394,
+ "step": 1619
+ },
+ {
+ "epoch": 9.257142857142856,
+ "grad_norm": 31.322734832763672,
+ "learning_rate": 4.5269841269841275e-05,
+ "loss": 0.7483,
+ "step": 1620
+ },
+ {
+ "epoch": 9.262857142857143,
+ "grad_norm": 74.06669616699219,
+ "learning_rate": 4.526349206349206e-05,
+ "loss": 0.8727,
+ "step": 1621
+ },
+ {
+ "epoch": 9.268571428571429,
+ "grad_norm": 29.42599868774414,
+ "learning_rate": 4.525714285714286e-05,
+ "loss": 0.4346,
+ "step": 1622
+ },
+ {
+ "epoch": 9.274285714285714,
+ "grad_norm": 68.91696166992188,
+ "learning_rate": 4.525079365079365e-05,
+ "loss": 0.6046,
+ "step": 1623
+ },
+ {
+ "epoch": 9.28,
+ "grad_norm": 30.62226104736328,
+ "learning_rate": 4.5244444444444446e-05,
+ "loss": 0.4043,
+ "step": 1624
+ },
+ {
+ "epoch": 9.285714285714286,
+ "grad_norm": 26.922250747680664,
+ "learning_rate": 4.523809523809524e-05,
+ "loss": 0.3778,
+ "step": 1625
+ },
+ {
+ "epoch": 9.291428571428572,
+ "grad_norm": 37.2835578918457,
+ "learning_rate": 4.523174603174604e-05,
+ "loss": 0.4843,
+ "step": 1626
+ },
+ {
+ "epoch": 9.297142857142857,
+ "grad_norm": 58.060791015625,
+ "learning_rate": 4.5225396825396824e-05,
+ "loss": 0.5054,
+ "step": 1627
+ },
+ {
+ "epoch": 9.302857142857142,
+ "grad_norm": 30.876922607421875,
+ "learning_rate": 4.521904761904762e-05,
+ "loss": 0.7619,
+ "step": 1628
+ },
+ {
+ "epoch": 9.308571428571428,
+ "grad_norm": 35.82794952392578,
+ "learning_rate": 4.521269841269841e-05,
+ "loss": 0.8043,
+ "step": 1629
+ },
+ {
+ "epoch": 9.314285714285715,
+ "grad_norm": 28.258895874023438,
+ "learning_rate": 4.520634920634921e-05,
+ "loss": 0.7233,
+ "step": 1630
+ },
+ {
+ "epoch": 9.32,
+ "grad_norm": 76.31919860839844,
+ "learning_rate": 4.52e-05,
+ "loss": 0.607,
+ "step": 1631
+ },
+ {
+ "epoch": 9.325714285714286,
+ "grad_norm": 201.3551025390625,
+ "learning_rate": 4.5193650793650794e-05,
+ "loss": 0.6616,
+ "step": 1632
+ },
+ {
+ "epoch": 9.331428571428571,
+ "grad_norm": 23.321441650390625,
+ "learning_rate": 4.5187301587301586e-05,
+ "loss": 0.5547,
+ "step": 1633
+ },
+ {
+ "epoch": 9.337142857142856,
+ "grad_norm": 53.26896286010742,
+ "learning_rate": 4.5180952380952386e-05,
+ "loss": 0.6694,
+ "step": 1634
+ },
+ {
+ "epoch": 9.342857142857143,
+ "grad_norm": 82.87789916992188,
+ "learning_rate": 4.517460317460318e-05,
+ "loss": 0.5523,
+ "step": 1635
+ },
+ {
+ "epoch": 9.348571428571429,
+ "grad_norm": 50.41506576538086,
+ "learning_rate": 4.516825396825397e-05,
+ "loss": 0.5354,
+ "step": 1636
+ },
+ {
+ "epoch": 9.354285714285714,
+ "grad_norm": 42.313018798828125,
+ "learning_rate": 4.5161904761904764e-05,
+ "loss": 0.5686,
+ "step": 1637
+ },
+ {
+ "epoch": 9.36,
+ "grad_norm": 181.2873992919922,
+ "learning_rate": 4.5155555555555556e-05,
+ "loss": 0.6026,
+ "step": 1638
+ },
+ {
+ "epoch": 9.365714285714287,
+ "grad_norm": 22.492979049682617,
+ "learning_rate": 4.5149206349206356e-05,
+ "loss": 0.7822,
+ "step": 1639
+ },
+ {
+ "epoch": 9.371428571428572,
+ "grad_norm": 26.08000373840332,
+ "learning_rate": 4.514285714285714e-05,
+ "loss": 0.5648,
+ "step": 1640
+ },
+ {
+ "epoch": 9.377142857142857,
+ "grad_norm": 48.00055694580078,
+ "learning_rate": 4.513650793650794e-05,
+ "loss": 0.6257,
+ "step": 1641
+ },
+ {
+ "epoch": 9.382857142857143,
+ "grad_norm": 65.03575134277344,
+ "learning_rate": 4.5130158730158733e-05,
+ "loss": 0.5279,
+ "step": 1642
+ },
+ {
+ "epoch": 9.388571428571428,
+ "grad_norm": 58.578128814697266,
+ "learning_rate": 4.5123809523809526e-05,
+ "loss": 0.5983,
+ "step": 1643
+ },
+ {
+ "epoch": 9.394285714285715,
+ "grad_norm": 35.81114196777344,
+ "learning_rate": 4.511746031746032e-05,
+ "loss": 0.5741,
+ "step": 1644
+ },
+ {
+ "epoch": 9.4,
+ "grad_norm": 22.605865478515625,
+ "learning_rate": 4.511111111111112e-05,
+ "loss": 0.7414,
+ "step": 1645
+ },
+ {
+ "epoch": 9.405714285714286,
+ "grad_norm": 43.81585693359375,
+ "learning_rate": 4.5104761904761904e-05,
+ "loss": 0.4807,
+ "step": 1646
+ },
+ {
+ "epoch": 9.411428571428571,
+ "grad_norm": 48.78623580932617,
+ "learning_rate": 4.5098412698412703e-05,
+ "loss": 0.497,
+ "step": 1647
+ },
+ {
+ "epoch": 9.417142857142856,
+ "grad_norm": 35.52511215209961,
+ "learning_rate": 4.5092063492063496e-05,
+ "loss": 0.8295,
+ "step": 1648
+ },
+ {
+ "epoch": 9.422857142857143,
+ "grad_norm": 35.88123321533203,
+ "learning_rate": 4.508571428571429e-05,
+ "loss": 0.5782,
+ "step": 1649
+ },
+ {
+ "epoch": 9.428571428571429,
+ "grad_norm": 28.440359115600586,
+ "learning_rate": 4.507936507936508e-05,
+ "loss": 0.5606,
+ "step": 1650
+ },
+ {
+ "epoch": 9.434285714285714,
+ "grad_norm": 31.525901794433594,
+ "learning_rate": 4.5073015873015874e-05,
+ "loss": 0.3901,
+ "step": 1651
+ },
+ {
+ "epoch": 9.44,
+ "grad_norm": 31.054569244384766,
+ "learning_rate": 4.5066666666666667e-05,
+ "loss": 0.5627,
+ "step": 1652
+ },
+ {
+ "epoch": 9.445714285714285,
+ "grad_norm": 30.92431640625,
+ "learning_rate": 4.5060317460317466e-05,
+ "loss": 0.5484,
+ "step": 1653
+ },
+ {
+ "epoch": 9.451428571428572,
+ "grad_norm": 45.711769104003906,
+ "learning_rate": 4.505396825396825e-05,
+ "loss": 0.5364,
+ "step": 1654
+ },
+ {
+ "epoch": 9.457142857142857,
+ "grad_norm": 33.75905990600586,
+ "learning_rate": 4.504761904761905e-05,
+ "loss": 0.3607,
+ "step": 1655
+ },
+ {
+ "epoch": 9.462857142857143,
+ "grad_norm": 68.22660827636719,
+ "learning_rate": 4.5041269841269844e-05,
+ "loss": 0.5863,
+ "step": 1656
+ },
+ {
+ "epoch": 9.468571428571428,
+ "grad_norm": 39.716426849365234,
+ "learning_rate": 4.5034920634920637e-05,
+ "loss": 0.5162,
+ "step": 1657
+ },
+ {
+ "epoch": 9.474285714285715,
+ "grad_norm": 57.95941162109375,
+ "learning_rate": 4.502857142857143e-05,
+ "loss": 0.4783,
+ "step": 1658
+ },
+ {
+ "epoch": 9.48,
+ "grad_norm": 36.185951232910156,
+ "learning_rate": 4.502222222222223e-05,
+ "loss": 0.626,
+ "step": 1659
+ },
+ {
+ "epoch": 9.485714285714286,
+ "grad_norm": 26.185272216796875,
+ "learning_rate": 4.5015873015873014e-05,
+ "loss": 0.5006,
+ "step": 1660
+ },
+ {
+ "epoch": 9.491428571428571,
+ "grad_norm": 30.795080184936523,
+ "learning_rate": 4.5009523809523814e-05,
+ "loss": 0.4489,
+ "step": 1661
+ },
+ {
+ "epoch": 9.497142857142856,
+ "grad_norm": 91.52268981933594,
+ "learning_rate": 4.50031746031746e-05,
+ "loss": 0.6501,
+ "step": 1662
+ },
+ {
+ "epoch": 9.502857142857144,
+ "grad_norm": 156.91587829589844,
+ "learning_rate": 4.49968253968254e-05,
+ "loss": 0.5349,
+ "step": 1663
+ },
+ {
+ "epoch": 9.508571428571429,
+ "grad_norm": 71.10111236572266,
+ "learning_rate": 4.499047619047619e-05,
+ "loss": 0.587,
+ "step": 1664
+ },
+ {
+ "epoch": 9.514285714285714,
+ "grad_norm": 404.21942138671875,
+ "learning_rate": 4.4984126984126984e-05,
+ "loss": 0.5079,
+ "step": 1665
+ },
+ {
+ "epoch": 9.52,
+ "grad_norm": 36.196353912353516,
+ "learning_rate": 4.497777777777778e-05,
+ "loss": 0.5316,
+ "step": 1666
+ },
+ {
+ "epoch": 9.525714285714285,
+ "grad_norm": 44.460018157958984,
+ "learning_rate": 4.4971428571428576e-05,
+ "loss": 0.9241,
+ "step": 1667
+ },
+ {
+ "epoch": 9.531428571428572,
+ "grad_norm": 34.32107162475586,
+ "learning_rate": 4.496507936507936e-05,
+ "loss": 0.5829,
+ "step": 1668
+ },
+ {
+ "epoch": 9.537142857142857,
+ "grad_norm": 55.574440002441406,
+ "learning_rate": 4.495873015873016e-05,
+ "loss": 0.8107,
+ "step": 1669
+ },
+ {
+ "epoch": 9.542857142857143,
+ "grad_norm": 32.89865493774414,
+ "learning_rate": 4.4952380952380954e-05,
+ "loss": 0.6595,
+ "step": 1670
+ },
+ {
+ "epoch": 9.548571428571428,
+ "grad_norm": 68.20197296142578,
+ "learning_rate": 4.494603174603175e-05,
+ "loss": 0.5237,
+ "step": 1671
+ },
+ {
+ "epoch": 9.554285714285715,
+ "grad_norm": 31.13216209411621,
+ "learning_rate": 4.493968253968254e-05,
+ "loss": 0.4951,
+ "step": 1672
+ },
+ {
+ "epoch": 9.56,
+ "grad_norm": 39.833072662353516,
+ "learning_rate": 4.493333333333333e-05,
+ "loss": 0.4265,
+ "step": 1673
+ },
+ {
+ "epoch": 9.565714285714286,
+ "grad_norm": 31.166410446166992,
+ "learning_rate": 4.492698412698413e-05,
+ "loss": 0.4458,
+ "step": 1674
+ },
+ {
+ "epoch": 9.571428571428571,
+ "grad_norm": 81.6888198852539,
+ "learning_rate": 4.4920634920634924e-05,
+ "loss": 0.6065,
+ "step": 1675
+ },
+ {
+ "epoch": 9.577142857142857,
+ "grad_norm": 62.83003616333008,
+ "learning_rate": 4.491428571428572e-05,
+ "loss": 0.5295,
+ "step": 1676
+ },
+ {
+ "epoch": 9.582857142857144,
+ "grad_norm": 52.28730773925781,
+ "learning_rate": 4.490793650793651e-05,
+ "loss": 0.5219,
+ "step": 1677
+ },
+ {
+ "epoch": 9.588571428571429,
+ "grad_norm": 42.736724853515625,
+ "learning_rate": 4.490158730158731e-05,
+ "loss": 0.4746,
+ "step": 1678
+ },
+ {
+ "epoch": 9.594285714285714,
+ "grad_norm": 91.87010955810547,
+ "learning_rate": 4.4895238095238095e-05,
+ "loss": 0.4918,
+ "step": 1679
+ },
+ {
+ "epoch": 9.6,
+ "grad_norm": 51.22637939453125,
+ "learning_rate": 4.4888888888888894e-05,
+ "loss": 0.6637,
+ "step": 1680
+ },
+ {
+ "epoch": 9.605714285714285,
+ "grad_norm": 42.326847076416016,
+ "learning_rate": 4.488253968253969e-05,
+ "loss": 0.6179,
+ "step": 1681
+ },
+ {
+ "epoch": 9.611428571428572,
+ "grad_norm": 33.708980560302734,
+ "learning_rate": 4.487619047619048e-05,
+ "loss": 0.5218,
+ "step": 1682
+ },
+ {
+ "epoch": 9.617142857142857,
+ "grad_norm": 38.000892639160156,
+ "learning_rate": 4.486984126984127e-05,
+ "loss": 0.8485,
+ "step": 1683
+ },
+ {
+ "epoch": 9.622857142857143,
+ "grad_norm": 67.10118103027344,
+ "learning_rate": 4.4863492063492065e-05,
+ "loss": 0.5679,
+ "step": 1684
+ },
+ {
+ "epoch": 9.628571428571428,
+ "grad_norm": 49.57404327392578,
+ "learning_rate": 4.485714285714286e-05,
+ "loss": 0.4918,
+ "step": 1685
+ },
+ {
+ "epoch": 9.634285714285713,
+ "grad_norm": 52.69540786743164,
+ "learning_rate": 4.485079365079366e-05,
+ "loss": 0.6589,
+ "step": 1686
+ },
+ {
+ "epoch": 9.64,
+ "grad_norm": 51.51718521118164,
+ "learning_rate": 4.484444444444444e-05,
+ "loss": 0.3976,
+ "step": 1687
+ },
+ {
+ "epoch": 9.645714285714286,
+ "grad_norm": 62.12007522583008,
+ "learning_rate": 4.483809523809524e-05,
+ "loss": 0.6888,
+ "step": 1688
+ },
+ {
+ "epoch": 9.651428571428571,
+ "grad_norm": 47.324432373046875,
+ "learning_rate": 4.4831746031746035e-05,
+ "loss": 0.6809,
+ "step": 1689
+ },
+ {
+ "epoch": 9.657142857142857,
+ "grad_norm": 189.40052795410156,
+ "learning_rate": 4.482539682539683e-05,
+ "loss": 0.4769,
+ "step": 1690
+ },
+ {
+ "epoch": 9.662857142857142,
+ "grad_norm": 147.95223999023438,
+ "learning_rate": 4.481904761904762e-05,
+ "loss": 0.3775,
+ "step": 1691
+ },
+ {
+ "epoch": 9.668571428571429,
+ "grad_norm": 32.29826736450195,
+ "learning_rate": 4.481269841269842e-05,
+ "loss": 0.8048,
+ "step": 1692
+ },
+ {
+ "epoch": 9.674285714285714,
+ "grad_norm": 53.42995071411133,
+ "learning_rate": 4.4806349206349205e-05,
+ "loss": 0.3771,
+ "step": 1693
+ },
+ {
+ "epoch": 9.68,
+ "grad_norm": 67.25894165039062,
+ "learning_rate": 4.4800000000000005e-05,
+ "loss": 0.6628,
+ "step": 1694
+ },
+ {
+ "epoch": 9.685714285714285,
+ "grad_norm": 50.50597381591797,
+ "learning_rate": 4.479365079365079e-05,
+ "loss": 0.495,
+ "step": 1695
+ },
+ {
+ "epoch": 9.691428571428572,
+ "grad_norm": 37.99165725708008,
+ "learning_rate": 4.478730158730159e-05,
+ "loss": 0.6954,
+ "step": 1696
+ },
+ {
+ "epoch": 9.697142857142858,
+ "grad_norm": 40.71255874633789,
+ "learning_rate": 4.478095238095238e-05,
+ "loss": 0.4784,
+ "step": 1697
+ },
+ {
+ "epoch": 9.702857142857143,
+ "grad_norm": 22.380142211914062,
+ "learning_rate": 4.4774603174603175e-05,
+ "loss": 0.6058,
+ "step": 1698
+ },
+ {
+ "epoch": 9.708571428571428,
+ "grad_norm": 53.52154541015625,
+ "learning_rate": 4.476825396825397e-05,
+ "loss": 0.492,
+ "step": 1699
+ },
+ {
+ "epoch": 9.714285714285714,
+ "grad_norm": 21.259414672851562,
+ "learning_rate": 4.476190476190477e-05,
+ "loss": 0.7073,
+ "step": 1700
+ },
+ {
+ "epoch": 9.72,
+ "grad_norm": 67.19175720214844,
+ "learning_rate": 4.475555555555555e-05,
+ "loss": 0.4383,
+ "step": 1701
+ },
+ {
+ "epoch": 9.725714285714286,
+ "grad_norm": 61.081031799316406,
+ "learning_rate": 4.474920634920635e-05,
+ "loss": 0.5562,
+ "step": 1702
+ },
+ {
+ "epoch": 9.731428571428571,
+ "grad_norm": 79.01075744628906,
+ "learning_rate": 4.4742857142857145e-05,
+ "loss": 0.529,
+ "step": 1703
+ },
+ {
+ "epoch": 9.737142857142857,
+ "grad_norm": 37.538238525390625,
+ "learning_rate": 4.473650793650794e-05,
+ "loss": 0.5126,
+ "step": 1704
+ },
+ {
+ "epoch": 9.742857142857144,
+ "grad_norm": 34.34321212768555,
+ "learning_rate": 4.473015873015873e-05,
+ "loss": 0.518,
+ "step": 1705
+ },
+ {
+ "epoch": 9.748571428571429,
+ "grad_norm": 69.47142791748047,
+ "learning_rate": 4.472380952380952e-05,
+ "loss": 0.4305,
+ "step": 1706
+ },
+ {
+ "epoch": 9.754285714285714,
+ "grad_norm": 115.71222686767578,
+ "learning_rate": 4.4717460317460316e-05,
+ "loss": 0.6728,
+ "step": 1707
+ },
+ {
+ "epoch": 9.76,
+ "grad_norm": 43.00593185424805,
+ "learning_rate": 4.4711111111111115e-05,
+ "loss": 0.6375,
+ "step": 1708
+ },
+ {
+ "epoch": 9.765714285714285,
+ "grad_norm": 28.06012725830078,
+ "learning_rate": 4.470476190476191e-05,
+ "loss": 0.7231,
+ "step": 1709
+ },
+ {
+ "epoch": 9.771428571428572,
+ "grad_norm": 47.50296401977539,
+ "learning_rate": 4.46984126984127e-05,
+ "loss": 0.4661,
+ "step": 1710
+ },
+ {
+ "epoch": 9.777142857142858,
+ "grad_norm": 137.15155029296875,
+ "learning_rate": 4.469206349206349e-05,
+ "loss": 0.6739,
+ "step": 1711
+ },
+ {
+ "epoch": 9.782857142857143,
+ "grad_norm": 69.97490692138672,
+ "learning_rate": 4.4685714285714286e-05,
+ "loss": 0.4805,
+ "step": 1712
+ },
+ {
+ "epoch": 9.788571428571428,
+ "grad_norm": 32.603946685791016,
+ "learning_rate": 4.4679365079365085e-05,
+ "loss": 0.6628,
+ "step": 1713
+ },
+ {
+ "epoch": 9.794285714285714,
+ "grad_norm": 47.70781707763672,
+ "learning_rate": 4.467301587301588e-05,
+ "loss": 0.7778,
+ "step": 1714
+ },
+ {
+ "epoch": 9.8,
+ "grad_norm": 33.18388748168945,
+ "learning_rate": 4.466666666666667e-05,
+ "loss": 0.523,
+ "step": 1715
+ },
+ {
+ "epoch": 9.805714285714286,
+ "grad_norm": 273.55523681640625,
+ "learning_rate": 4.466031746031746e-05,
+ "loss": 0.6989,
+ "step": 1716
+ },
+ {
+ "epoch": 9.811428571428571,
+ "grad_norm": 90.4410171508789,
+ "learning_rate": 4.4653968253968256e-05,
+ "loss": 0.421,
+ "step": 1717
+ },
+ {
+ "epoch": 9.817142857142857,
+ "grad_norm": 52.67757797241211,
+ "learning_rate": 4.464761904761905e-05,
+ "loss": 0.4459,
+ "step": 1718
+ },
+ {
+ "epoch": 9.822857142857142,
+ "grad_norm": 37.0533561706543,
+ "learning_rate": 4.464126984126985e-05,
+ "loss": 0.7322,
+ "step": 1719
+ },
+ {
+ "epoch": 9.82857142857143,
+ "grad_norm": 58.06081771850586,
+ "learning_rate": 4.4634920634920634e-05,
+ "loss": 0.412,
+ "step": 1720
+ },
+ {
+ "epoch": 9.834285714285715,
+ "grad_norm": 38.50861358642578,
+ "learning_rate": 4.462857142857143e-05,
+ "loss": 0.4276,
+ "step": 1721
+ },
+ {
+ "epoch": 9.84,
+ "grad_norm": 37.932193756103516,
+ "learning_rate": 4.4622222222222226e-05,
+ "loss": 0.517,
+ "step": 1722
+ },
+ {
+ "epoch": 9.845714285714285,
+ "grad_norm": 39.057884216308594,
+ "learning_rate": 4.461587301587302e-05,
+ "loss": 0.4924,
+ "step": 1723
+ },
+ {
+ "epoch": 9.85142857142857,
+ "grad_norm": 30.302133560180664,
+ "learning_rate": 4.460952380952381e-05,
+ "loss": 0.4598,
+ "step": 1724
+ },
+ {
+ "epoch": 9.857142857142858,
+ "grad_norm": 73.9806137084961,
+ "learning_rate": 4.460317460317461e-05,
+ "loss": 0.4182,
+ "step": 1725
+ },
+ {
+ "epoch": 9.862857142857143,
+ "grad_norm": 26.147907257080078,
+ "learning_rate": 4.4596825396825396e-05,
+ "loss": 0.6775,
+ "step": 1726
+ },
+ {
+ "epoch": 9.868571428571428,
+ "grad_norm": 34.04471206665039,
+ "learning_rate": 4.4590476190476196e-05,
+ "loss": 0.52,
+ "step": 1727
+ },
+ {
+ "epoch": 9.874285714285714,
+ "grad_norm": 46.370750427246094,
+ "learning_rate": 4.458412698412698e-05,
+ "loss": 0.6936,
+ "step": 1728
+ },
+ {
+ "epoch": 9.88,
+ "grad_norm": 35.784751892089844,
+ "learning_rate": 4.457777777777778e-05,
+ "loss": 0.3824,
+ "step": 1729
+ },
+ {
+ "epoch": 9.885714285714286,
+ "grad_norm": 81.29346466064453,
+ "learning_rate": 4.4571428571428574e-05,
+ "loss": 0.5515,
+ "step": 1730
+ },
+ {
+ "epoch": 9.891428571428571,
+ "grad_norm": 35.98829650878906,
+ "learning_rate": 4.4565079365079366e-05,
+ "loss": 0.536,
+ "step": 1731
+ },
+ {
+ "epoch": 9.897142857142857,
+ "grad_norm": 56.466915130615234,
+ "learning_rate": 4.455873015873016e-05,
+ "loss": 0.5491,
+ "step": 1732
+ },
+ {
+ "epoch": 9.902857142857142,
+ "grad_norm": 29.79550552368164,
+ "learning_rate": 4.455238095238096e-05,
+ "loss": 0.8255,
+ "step": 1733
+ },
+ {
+ "epoch": 9.90857142857143,
+ "grad_norm": 44.566951751708984,
+ "learning_rate": 4.4546031746031744e-05,
+ "loss": 0.513,
+ "step": 1734
+ },
+ {
+ "epoch": 9.914285714285715,
+ "grad_norm": 76.26630401611328,
+ "learning_rate": 4.4539682539682543e-05,
+ "loss": 0.5726,
+ "step": 1735
+ },
+ {
+ "epoch": 9.92,
+ "grad_norm": 88.24022674560547,
+ "learning_rate": 4.4533333333333336e-05,
+ "loss": 0.5026,
+ "step": 1736
+ },
+ {
+ "epoch": 9.925714285714285,
+ "grad_norm": 59.36637496948242,
+ "learning_rate": 4.452698412698413e-05,
+ "loss": 0.5303,
+ "step": 1737
+ },
+ {
+ "epoch": 9.93142857142857,
+ "grad_norm": 133.46734619140625,
+ "learning_rate": 4.452063492063492e-05,
+ "loss": 0.5762,
+ "step": 1738
+ },
+ {
+ "epoch": 9.937142857142858,
+ "grad_norm": 73.90782928466797,
+ "learning_rate": 4.4514285714285714e-05,
+ "loss": 0.4135,
+ "step": 1739
+ },
+ {
+ "epoch": 9.942857142857143,
+ "grad_norm": 16.689319610595703,
+ "learning_rate": 4.450793650793651e-05,
+ "loss": 0.4852,
+ "step": 1740
+ },
+ {
+ "epoch": 9.948571428571428,
+ "grad_norm": 46.914302825927734,
+ "learning_rate": 4.4501587301587306e-05,
+ "loss": 0.4914,
+ "step": 1741
+ },
+ {
+ "epoch": 9.954285714285714,
+ "grad_norm": 64.44391632080078,
+ "learning_rate": 4.44952380952381e-05,
+ "loss": 0.7654,
+ "step": 1742
+ },
+ {
+ "epoch": 9.96,
+ "grad_norm": 44.79595184326172,
+ "learning_rate": 4.448888888888889e-05,
+ "loss": 0.654,
+ "step": 1743
+ },
+ {
+ "epoch": 9.965714285714286,
+ "grad_norm": 20.467912673950195,
+ "learning_rate": 4.4482539682539684e-05,
+ "loss": 0.5238,
+ "step": 1744
+ },
+ {
+ "epoch": 9.971428571428572,
+ "grad_norm": 41.49550247192383,
+ "learning_rate": 4.447619047619048e-05,
+ "loss": 0.5272,
+ "step": 1745
+ },
+ {
+ "epoch": 9.977142857142857,
+ "grad_norm": 163.62322998046875,
+ "learning_rate": 4.446984126984127e-05,
+ "loss": 0.6409,
+ "step": 1746
+ },
+ {
+ "epoch": 9.982857142857142,
+ "grad_norm": 67.88383483886719,
+ "learning_rate": 4.446349206349207e-05,
+ "loss": 0.878,
+ "step": 1747
+ },
+ {
+ "epoch": 9.98857142857143,
+ "grad_norm": 139.52902221679688,
+ "learning_rate": 4.445714285714286e-05,
+ "loss": 0.5412,
+ "step": 1748
+ },
+ {
+ "epoch": 9.994285714285715,
+ "grad_norm": 44.225887298583984,
+ "learning_rate": 4.4450793650793654e-05,
+ "loss": 0.33,
+ "step": 1749
+ },
+ {
+ "epoch": 10.0,
+ "grad_norm": 91.18606567382812,
+ "learning_rate": 4.4444444444444447e-05,
+ "loss": 0.7271,
+ "step": 1750
+ },
+ {
+ "epoch": 10.0,
+ "eval_classes": 0,
+ "eval_loss": 0.679156482219696,
+ "eval_map": 0.9103,
+ "eval_map_50": 0.9707,
+ "eval_map_75": 0.9658,
+ "eval_map_large": 0.9103,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9103,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7806,
+ "eval_mar_10": 0.9597,
+ "eval_mar_100": 0.9743,
+ "eval_mar_100_per_class": 0.9743,
+ "eval_mar_large": 0.9743,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 18.2727,
+ "eval_samples_per_second": 16.09,
+ "eval_steps_per_second": 2.025,
+ "step": 1750
+ },
+ {
+ "epoch": 10.005714285714285,
+ "grad_norm": 27.433137893676758,
+ "learning_rate": 4.443809523809524e-05,
+ "loss": 0.5827,
+ "step": 1751
+ },
+ {
+ "epoch": 10.01142857142857,
+ "grad_norm": 34.193050384521484,
+ "learning_rate": 4.443174603174604e-05,
+ "loss": 0.4465,
+ "step": 1752
+ },
+ {
+ "epoch": 10.017142857142858,
+ "grad_norm": 29.82825469970703,
+ "learning_rate": 4.4425396825396824e-05,
+ "loss": 0.6885,
+ "step": 1753
+ },
+ {
+ "epoch": 10.022857142857143,
+ "grad_norm": 21.08742904663086,
+ "learning_rate": 4.4419047619047624e-05,
+ "loss": 0.5118,
+ "step": 1754
+ },
+ {
+ "epoch": 10.028571428571428,
+ "grad_norm": 22.1567325592041,
+ "learning_rate": 4.4412698412698417e-05,
+ "loss": 0.481,
+ "step": 1755
+ },
+ {
+ "epoch": 10.034285714285714,
+ "grad_norm": 38.391639709472656,
+ "learning_rate": 4.440634920634921e-05,
+ "loss": 0.4155,
+ "step": 1756
+ },
+ {
+ "epoch": 10.04,
+ "grad_norm": 62.439395904541016,
+ "learning_rate": 4.44e-05,
+ "loss": 0.78,
+ "step": 1757
+ },
+ {
+ "epoch": 10.045714285714286,
+ "grad_norm": 28.28538703918457,
+ "learning_rate": 4.43936507936508e-05,
+ "loss": 0.5031,
+ "step": 1758
+ },
+ {
+ "epoch": 10.051428571428572,
+ "grad_norm": 28.75678253173828,
+ "learning_rate": 4.438730158730159e-05,
+ "loss": 0.7015,
+ "step": 1759
+ },
+ {
+ "epoch": 10.057142857142857,
+ "grad_norm": 37.00398254394531,
+ "learning_rate": 4.4380952380952386e-05,
+ "loss": 0.533,
+ "step": 1760
+ },
+ {
+ "epoch": 10.062857142857142,
+ "grad_norm": 31.764421463012695,
+ "learning_rate": 4.437460317460317e-05,
+ "loss": 0.5539,
+ "step": 1761
+ },
+ {
+ "epoch": 10.06857142857143,
+ "grad_norm": 34.632568359375,
+ "learning_rate": 4.436825396825397e-05,
+ "loss": 0.5347,
+ "step": 1762
+ },
+ {
+ "epoch": 10.074285714285715,
+ "grad_norm": 355.4385070800781,
+ "learning_rate": 4.4361904761904764e-05,
+ "loss": 0.6434,
+ "step": 1763
+ },
+ {
+ "epoch": 10.08,
+ "grad_norm": 42.91116714477539,
+ "learning_rate": 4.435555555555556e-05,
+ "loss": 0.4943,
+ "step": 1764
+ },
+ {
+ "epoch": 10.085714285714285,
+ "grad_norm": 42.363983154296875,
+ "learning_rate": 4.434920634920635e-05,
+ "loss": 0.3786,
+ "step": 1765
+ },
+ {
+ "epoch": 10.09142857142857,
+ "grad_norm": 46.98724365234375,
+ "learning_rate": 4.434285714285715e-05,
+ "loss": 0.5811,
+ "step": 1766
+ },
+ {
+ "epoch": 10.097142857142858,
+ "grad_norm": 50.39039611816406,
+ "learning_rate": 4.4336507936507935e-05,
+ "loss": 0.3648,
+ "step": 1767
+ },
+ {
+ "epoch": 10.102857142857143,
+ "grad_norm": 68.35560607910156,
+ "learning_rate": 4.4330158730158734e-05,
+ "loss": 0.4918,
+ "step": 1768
+ },
+ {
+ "epoch": 10.108571428571429,
+ "grad_norm": 58.892364501953125,
+ "learning_rate": 4.432380952380953e-05,
+ "loss": 0.5621,
+ "step": 1769
+ },
+ {
+ "epoch": 10.114285714285714,
+ "grad_norm": 30.972572326660156,
+ "learning_rate": 4.431746031746032e-05,
+ "loss": 0.6199,
+ "step": 1770
+ },
+ {
+ "epoch": 10.12,
+ "grad_norm": 22.662992477416992,
+ "learning_rate": 4.431111111111111e-05,
+ "loss": 0.4741,
+ "step": 1771
+ },
+ {
+ "epoch": 10.125714285714286,
+ "grad_norm": 33.405879974365234,
+ "learning_rate": 4.4304761904761905e-05,
+ "loss": 0.4343,
+ "step": 1772
+ },
+ {
+ "epoch": 10.131428571428572,
+ "grad_norm": 48.99565505981445,
+ "learning_rate": 4.42984126984127e-05,
+ "loss": 0.5988,
+ "step": 1773
+ },
+ {
+ "epoch": 10.137142857142857,
+ "grad_norm": 441.27545166015625,
+ "learning_rate": 4.42920634920635e-05,
+ "loss": 0.5665,
+ "step": 1774
+ },
+ {
+ "epoch": 10.142857142857142,
+ "grad_norm": 29.71120262145996,
+ "learning_rate": 4.428571428571428e-05,
+ "loss": 0.7444,
+ "step": 1775
+ },
+ {
+ "epoch": 10.14857142857143,
+ "grad_norm": 26.079683303833008,
+ "learning_rate": 4.427936507936508e-05,
+ "loss": 0.51,
+ "step": 1776
+ },
+ {
+ "epoch": 10.154285714285715,
+ "grad_norm": 60.71558380126953,
+ "learning_rate": 4.4273015873015875e-05,
+ "loss": 0.4623,
+ "step": 1777
+ },
+ {
+ "epoch": 10.16,
+ "grad_norm": 66.20484924316406,
+ "learning_rate": 4.426666666666667e-05,
+ "loss": 0.7319,
+ "step": 1778
+ },
+ {
+ "epoch": 10.165714285714285,
+ "grad_norm": 50.07710647583008,
+ "learning_rate": 4.426031746031746e-05,
+ "loss": 0.7065,
+ "step": 1779
+ },
+ {
+ "epoch": 10.17142857142857,
+ "grad_norm": 64.73977661132812,
+ "learning_rate": 4.425396825396826e-05,
+ "loss": 0.6807,
+ "step": 1780
+ },
+ {
+ "epoch": 10.177142857142858,
+ "grad_norm": 89.32843017578125,
+ "learning_rate": 4.424761904761905e-05,
+ "loss": 0.4594,
+ "step": 1781
+ },
+ {
+ "epoch": 10.182857142857143,
+ "grad_norm": 52.258609771728516,
+ "learning_rate": 4.4241269841269845e-05,
+ "loss": 0.6438,
+ "step": 1782
+ },
+ {
+ "epoch": 10.188571428571429,
+ "grad_norm": 18.544471740722656,
+ "learning_rate": 4.423492063492064e-05,
+ "loss": 0.6665,
+ "step": 1783
+ },
+ {
+ "epoch": 10.194285714285714,
+ "grad_norm": 81.1020736694336,
+ "learning_rate": 4.422857142857143e-05,
+ "loss": 0.5614,
+ "step": 1784
+ },
+ {
+ "epoch": 10.2,
+ "grad_norm": 33.819732666015625,
+ "learning_rate": 4.422222222222222e-05,
+ "loss": 0.6872,
+ "step": 1785
+ },
+ {
+ "epoch": 10.205714285714286,
+ "grad_norm": 36.031288146972656,
+ "learning_rate": 4.4215873015873015e-05,
+ "loss": 0.6527,
+ "step": 1786
+ },
+ {
+ "epoch": 10.211428571428572,
+ "grad_norm": 37.90239334106445,
+ "learning_rate": 4.4209523809523815e-05,
+ "loss": 0.5108,
+ "step": 1787
+ },
+ {
+ "epoch": 10.217142857142857,
+ "grad_norm": 37.0508918762207,
+ "learning_rate": 4.420317460317461e-05,
+ "loss": 0.553,
+ "step": 1788
+ },
+ {
+ "epoch": 10.222857142857142,
+ "grad_norm": 78.48355102539062,
+ "learning_rate": 4.41968253968254e-05,
+ "loss": 0.83,
+ "step": 1789
+ },
+ {
+ "epoch": 10.228571428571428,
+ "grad_norm": 53.61076736450195,
+ "learning_rate": 4.419047619047619e-05,
+ "loss": 0.5672,
+ "step": 1790
+ },
+ {
+ "epoch": 10.234285714285715,
+ "grad_norm": 54.93144989013672,
+ "learning_rate": 4.418412698412699e-05,
+ "loss": 0.6038,
+ "step": 1791
+ },
+ {
+ "epoch": 10.24,
+ "grad_norm": 44.15296173095703,
+ "learning_rate": 4.417777777777778e-05,
+ "loss": 0.5362,
+ "step": 1792
+ },
+ {
+ "epoch": 10.245714285714286,
+ "grad_norm": 32.382083892822266,
+ "learning_rate": 4.417142857142858e-05,
+ "loss": 0.6964,
+ "step": 1793
+ },
+ {
+ "epoch": 10.251428571428571,
+ "grad_norm": 63.223724365234375,
+ "learning_rate": 4.416507936507936e-05,
+ "loss": 0.5144,
+ "step": 1794
+ },
+ {
+ "epoch": 10.257142857142856,
+ "grad_norm": 32.31887435913086,
+ "learning_rate": 4.415873015873016e-05,
+ "loss": 0.5586,
+ "step": 1795
+ },
+ {
+ "epoch": 10.262857142857143,
+ "grad_norm": 67.84217834472656,
+ "learning_rate": 4.4152380952380955e-05,
+ "loss": 0.3581,
+ "step": 1796
+ },
+ {
+ "epoch": 10.268571428571429,
+ "grad_norm": 74.47038269042969,
+ "learning_rate": 4.414603174603175e-05,
+ "loss": 0.3358,
+ "step": 1797
+ },
+ {
+ "epoch": 10.274285714285714,
+ "grad_norm": 31.795873641967773,
+ "learning_rate": 4.413968253968254e-05,
+ "loss": 0.611,
+ "step": 1798
+ },
+ {
+ "epoch": 10.28,
+ "grad_norm": 34.879581451416016,
+ "learning_rate": 4.413333333333334e-05,
+ "loss": 0.5509,
+ "step": 1799
+ },
+ {
+ "epoch": 10.285714285714286,
+ "grad_norm": 35.9124641418457,
+ "learning_rate": 4.4126984126984126e-05,
+ "loss": 0.4702,
+ "step": 1800
+ },
+ {
+ "epoch": 10.291428571428572,
+ "grad_norm": 17.336244583129883,
+ "learning_rate": 4.4120634920634925e-05,
+ "loss": 0.3667,
+ "step": 1801
+ },
+ {
+ "epoch": 10.297142857142857,
+ "grad_norm": 68.83108520507812,
+ "learning_rate": 4.411428571428572e-05,
+ "loss": 0.4476,
+ "step": 1802
+ },
+ {
+ "epoch": 10.302857142857142,
+ "grad_norm": 25.292253494262695,
+ "learning_rate": 4.410793650793651e-05,
+ "loss": 0.433,
+ "step": 1803
+ },
+ {
+ "epoch": 10.308571428571428,
+ "grad_norm": 72.31684875488281,
+ "learning_rate": 4.41015873015873e-05,
+ "loss": 0.5445,
+ "step": 1804
+ },
+ {
+ "epoch": 10.314285714285715,
+ "grad_norm": 39.674320220947266,
+ "learning_rate": 4.4095238095238096e-05,
+ "loss": 0.4679,
+ "step": 1805
+ },
+ {
+ "epoch": 10.32,
+ "grad_norm": 28.290306091308594,
+ "learning_rate": 4.408888888888889e-05,
+ "loss": 0.5483,
+ "step": 1806
+ },
+ {
+ "epoch": 10.325714285714286,
+ "grad_norm": 131.22181701660156,
+ "learning_rate": 4.408253968253969e-05,
+ "loss": 0.6128,
+ "step": 1807
+ },
+ {
+ "epoch": 10.331428571428571,
+ "grad_norm": 73.4686508178711,
+ "learning_rate": 4.4076190476190474e-05,
+ "loss": 0.8075,
+ "step": 1808
+ },
+ {
+ "epoch": 10.337142857142856,
+ "grad_norm": 38.875244140625,
+ "learning_rate": 4.406984126984127e-05,
+ "loss": 0.2387,
+ "step": 1809
+ },
+ {
+ "epoch": 10.342857142857143,
+ "grad_norm": 34.1298713684082,
+ "learning_rate": 4.4063492063492066e-05,
+ "loss": 0.5822,
+ "step": 1810
+ },
+ {
+ "epoch": 10.348571428571429,
+ "grad_norm": 43.67047882080078,
+ "learning_rate": 4.405714285714286e-05,
+ "loss": 0.7023,
+ "step": 1811
+ },
+ {
+ "epoch": 10.354285714285714,
+ "grad_norm": 62.95126724243164,
+ "learning_rate": 4.405079365079365e-05,
+ "loss": 0.5263,
+ "step": 1812
+ },
+ {
+ "epoch": 10.36,
+ "grad_norm": 14.687211036682129,
+ "learning_rate": 4.404444444444445e-05,
+ "loss": 0.4211,
+ "step": 1813
+ },
+ {
+ "epoch": 10.365714285714287,
+ "grad_norm": 109.05048370361328,
+ "learning_rate": 4.4038095238095236e-05,
+ "loss": 0.9146,
+ "step": 1814
+ },
+ {
+ "epoch": 10.371428571428572,
+ "grad_norm": 39.3391227722168,
+ "learning_rate": 4.4031746031746036e-05,
+ "loss": 0.3332,
+ "step": 1815
+ },
+ {
+ "epoch": 10.377142857142857,
+ "grad_norm": 24.407655715942383,
+ "learning_rate": 4.402539682539683e-05,
+ "loss": 0.4952,
+ "step": 1816
+ },
+ {
+ "epoch": 10.382857142857143,
+ "grad_norm": 54.404537200927734,
+ "learning_rate": 4.401904761904762e-05,
+ "loss": 0.5828,
+ "step": 1817
+ },
+ {
+ "epoch": 10.388571428571428,
+ "grad_norm": 91.14871215820312,
+ "learning_rate": 4.4012698412698414e-05,
+ "loss": 0.3805,
+ "step": 1818
+ },
+ {
+ "epoch": 10.394285714285715,
+ "grad_norm": 31.88556480407715,
+ "learning_rate": 4.4006349206349206e-05,
+ "loss": 0.5423,
+ "step": 1819
+ },
+ {
+ "epoch": 10.4,
+ "grad_norm": 26.99669075012207,
+ "learning_rate": 4.4000000000000006e-05,
+ "loss": 0.638,
+ "step": 1820
+ },
+ {
+ "epoch": 10.405714285714286,
+ "grad_norm": 86.47724914550781,
+ "learning_rate": 4.39936507936508e-05,
+ "loss": 0.3764,
+ "step": 1821
+ },
+ {
+ "epoch": 10.411428571428571,
+ "grad_norm": 54.31696319580078,
+ "learning_rate": 4.398730158730159e-05,
+ "loss": 0.5246,
+ "step": 1822
+ },
+ {
+ "epoch": 10.417142857142856,
+ "grad_norm": 55.37803268432617,
+ "learning_rate": 4.3980952380952384e-05,
+ "loss": 0.5363,
+ "step": 1823
+ },
+ {
+ "epoch": 10.422857142857143,
+ "grad_norm": 74.20997619628906,
+ "learning_rate": 4.3974603174603176e-05,
+ "loss": 0.559,
+ "step": 1824
+ },
+ {
+ "epoch": 10.428571428571429,
+ "grad_norm": 68.69185638427734,
+ "learning_rate": 4.396825396825397e-05,
+ "loss": 0.66,
+ "step": 1825
+ },
+ {
+ "epoch": 10.434285714285714,
+ "grad_norm": 71.71404266357422,
+ "learning_rate": 4.396190476190477e-05,
+ "loss": 0.5888,
+ "step": 1826
+ },
+ {
+ "epoch": 10.44,
+ "grad_norm": 41.23540496826172,
+ "learning_rate": 4.3955555555555554e-05,
+ "loss": 0.7463,
+ "step": 1827
+ },
+ {
+ "epoch": 10.445714285714285,
+ "grad_norm": 78.00237274169922,
+ "learning_rate": 4.3949206349206353e-05,
+ "loss": 0.5385,
+ "step": 1828
+ },
+ {
+ "epoch": 10.451428571428572,
+ "grad_norm": 27.8646240234375,
+ "learning_rate": 4.3942857142857146e-05,
+ "loss": 0.6213,
+ "step": 1829
+ },
+ {
+ "epoch": 10.457142857142857,
+ "grad_norm": 107.51541137695312,
+ "learning_rate": 4.393650793650794e-05,
+ "loss": 0.432,
+ "step": 1830
+ },
+ {
+ "epoch": 10.462857142857143,
+ "grad_norm": 105.09674835205078,
+ "learning_rate": 4.393015873015873e-05,
+ "loss": 0.6297,
+ "step": 1831
+ },
+ {
+ "epoch": 10.468571428571428,
+ "grad_norm": 50.10200500488281,
+ "learning_rate": 4.392380952380953e-05,
+ "loss": 0.3271,
+ "step": 1832
+ },
+ {
+ "epoch": 10.474285714285715,
+ "grad_norm": 103.67435455322266,
+ "learning_rate": 4.391746031746032e-05,
+ "loss": 0.6717,
+ "step": 1833
+ },
+ {
+ "epoch": 10.48,
+ "grad_norm": 15.707316398620605,
+ "learning_rate": 4.3911111111111116e-05,
+ "loss": 0.6886,
+ "step": 1834
+ },
+ {
+ "epoch": 10.485714285714286,
+ "grad_norm": 29.44092559814453,
+ "learning_rate": 4.39047619047619e-05,
+ "loss": 0.6768,
+ "step": 1835
+ },
+ {
+ "epoch": 10.491428571428571,
+ "grad_norm": 42.66861343383789,
+ "learning_rate": 4.38984126984127e-05,
+ "loss": 0.7291,
+ "step": 1836
+ },
+ {
+ "epoch": 10.497142857142856,
+ "grad_norm": 158.61050415039062,
+ "learning_rate": 4.3892063492063494e-05,
+ "loss": 0.4542,
+ "step": 1837
+ },
+ {
+ "epoch": 10.502857142857144,
+ "grad_norm": 38.50166320800781,
+ "learning_rate": 4.388571428571429e-05,
+ "loss": 0.5022,
+ "step": 1838
+ },
+ {
+ "epoch": 10.508571428571429,
+ "grad_norm": 72.28811645507812,
+ "learning_rate": 4.387936507936508e-05,
+ "loss": 0.6058,
+ "step": 1839
+ },
+ {
+ "epoch": 10.514285714285714,
+ "grad_norm": 22.055517196655273,
+ "learning_rate": 4.387301587301588e-05,
+ "loss": 0.7038,
+ "step": 1840
+ },
+ {
+ "epoch": 10.52,
+ "grad_norm": 325.0693054199219,
+ "learning_rate": 4.3866666666666665e-05,
+ "loss": 0.4803,
+ "step": 1841
+ },
+ {
+ "epoch": 10.525714285714285,
+ "grad_norm": 15.510709762573242,
+ "learning_rate": 4.3860317460317464e-05,
+ "loss": 0.399,
+ "step": 1842
+ },
+ {
+ "epoch": 10.531428571428572,
+ "grad_norm": 31.22942543029785,
+ "learning_rate": 4.3853968253968257e-05,
+ "loss": 0.668,
+ "step": 1843
+ },
+ {
+ "epoch": 10.537142857142857,
+ "grad_norm": 46.17405700683594,
+ "learning_rate": 4.384761904761905e-05,
+ "loss": 0.4323,
+ "step": 1844
+ },
+ {
+ "epoch": 10.542857142857143,
+ "grad_norm": 338.6712646484375,
+ "learning_rate": 4.384126984126984e-05,
+ "loss": 0.5699,
+ "step": 1845
+ },
+ {
+ "epoch": 10.548571428571428,
+ "grad_norm": 30.1645565032959,
+ "learning_rate": 4.3834920634920634e-05,
+ "loss": 0.4089,
+ "step": 1846
+ },
+ {
+ "epoch": 10.554285714285715,
+ "grad_norm": 24.616436004638672,
+ "learning_rate": 4.382857142857143e-05,
+ "loss": 0.4738,
+ "step": 1847
+ },
+ {
+ "epoch": 10.56,
+ "grad_norm": 56.76235580444336,
+ "learning_rate": 4.3822222222222227e-05,
+ "loss": 0.5598,
+ "step": 1848
+ },
+ {
+ "epoch": 10.565714285714286,
+ "grad_norm": 24.1751708984375,
+ "learning_rate": 4.381587301587301e-05,
+ "loss": 0.3399,
+ "step": 1849
+ },
+ {
+ "epoch": 10.571428571428571,
+ "grad_norm": 519.3365478515625,
+ "learning_rate": 4.380952380952381e-05,
+ "loss": 0.7979,
+ "step": 1850
+ },
+ {
+ "epoch": 10.577142857142857,
+ "grad_norm": 34.48884963989258,
+ "learning_rate": 4.3803174603174604e-05,
+ "loss": 0.4607,
+ "step": 1851
+ },
+ {
+ "epoch": 10.582857142857144,
+ "grad_norm": 24.102577209472656,
+ "learning_rate": 4.37968253968254e-05,
+ "loss": 0.3344,
+ "step": 1852
+ },
+ {
+ "epoch": 10.588571428571429,
+ "grad_norm": 25.61137580871582,
+ "learning_rate": 4.379047619047619e-05,
+ "loss": 0.3993,
+ "step": 1853
+ },
+ {
+ "epoch": 10.594285714285714,
+ "grad_norm": 36.330745697021484,
+ "learning_rate": 4.378412698412699e-05,
+ "loss": 0.4345,
+ "step": 1854
+ },
+ {
+ "epoch": 10.6,
+ "grad_norm": 60.14534378051758,
+ "learning_rate": 4.377777777777778e-05,
+ "loss": 0.4862,
+ "step": 1855
+ },
+ {
+ "epoch": 10.605714285714285,
+ "grad_norm": 62.317447662353516,
+ "learning_rate": 4.3771428571428574e-05,
+ "loss": 0.6628,
+ "step": 1856
+ },
+ {
+ "epoch": 10.611428571428572,
+ "grad_norm": 36.39304733276367,
+ "learning_rate": 4.376507936507937e-05,
+ "loss": 0.4503,
+ "step": 1857
+ },
+ {
+ "epoch": 10.617142857142857,
+ "grad_norm": 41.1817741394043,
+ "learning_rate": 4.375873015873016e-05,
+ "loss": 0.4089,
+ "step": 1858
+ },
+ {
+ "epoch": 10.622857142857143,
+ "grad_norm": 22.43792152404785,
+ "learning_rate": 4.375238095238096e-05,
+ "loss": 0.5147,
+ "step": 1859
+ },
+ {
+ "epoch": 10.628571428571428,
+ "grad_norm": 36.97702407836914,
+ "learning_rate": 4.3746031746031745e-05,
+ "loss": 0.4454,
+ "step": 1860
+ },
+ {
+ "epoch": 10.634285714285713,
+ "grad_norm": 54.35332489013672,
+ "learning_rate": 4.3739682539682544e-05,
+ "loss": 0.6532,
+ "step": 1861
+ },
+ {
+ "epoch": 10.64,
+ "grad_norm": 89.96182250976562,
+ "learning_rate": 4.373333333333334e-05,
+ "loss": 0.658,
+ "step": 1862
+ },
+ {
+ "epoch": 10.645714285714286,
+ "grad_norm": 57.741912841796875,
+ "learning_rate": 4.372698412698413e-05,
+ "loss": 0.6051,
+ "step": 1863
+ },
+ {
+ "epoch": 10.651428571428571,
+ "grad_norm": 63.035011291503906,
+ "learning_rate": 4.372063492063492e-05,
+ "loss": 0.4985,
+ "step": 1864
+ },
+ {
+ "epoch": 10.657142857142857,
+ "grad_norm": 64.25335693359375,
+ "learning_rate": 4.371428571428572e-05,
+ "loss": 0.8196,
+ "step": 1865
+ },
+ {
+ "epoch": 10.662857142857142,
+ "grad_norm": 44.35352325439453,
+ "learning_rate": 4.370793650793651e-05,
+ "loss": 0.4929,
+ "step": 1866
+ },
+ {
+ "epoch": 10.668571428571429,
+ "grad_norm": 63.985801696777344,
+ "learning_rate": 4.370158730158731e-05,
+ "loss": 0.4315,
+ "step": 1867
+ },
+ {
+ "epoch": 10.674285714285714,
+ "grad_norm": 69.80237579345703,
+ "learning_rate": 4.369523809523809e-05,
+ "loss": 0.4453,
+ "step": 1868
+ },
+ {
+ "epoch": 10.68,
+ "grad_norm": 38.57668685913086,
+ "learning_rate": 4.368888888888889e-05,
+ "loss": 0.3856,
+ "step": 1869
+ },
+ {
+ "epoch": 10.685714285714285,
+ "grad_norm": 58.52362823486328,
+ "learning_rate": 4.3682539682539685e-05,
+ "loss": 0.6876,
+ "step": 1870
+ },
+ {
+ "epoch": 10.691428571428572,
+ "grad_norm": 36.25707244873047,
+ "learning_rate": 4.367619047619048e-05,
+ "loss": 0.4271,
+ "step": 1871
+ },
+ {
+ "epoch": 10.697142857142858,
+ "grad_norm": 100.15937042236328,
+ "learning_rate": 4.366984126984127e-05,
+ "loss": 0.604,
+ "step": 1872
+ },
+ {
+ "epoch": 10.702857142857143,
+ "grad_norm": 81.68765258789062,
+ "learning_rate": 4.366349206349207e-05,
+ "loss": 0.4773,
+ "step": 1873
+ },
+ {
+ "epoch": 10.708571428571428,
+ "grad_norm": 74.6390609741211,
+ "learning_rate": 4.3657142857142855e-05,
+ "loss": 0.4262,
+ "step": 1874
+ },
+ {
+ "epoch": 10.714285714285714,
+ "grad_norm": 69.17678833007812,
+ "learning_rate": 4.3650793650793655e-05,
+ "loss": 0.6983,
+ "step": 1875
+ },
+ {
+ "epoch": 10.72,
+ "grad_norm": 222.8771514892578,
+ "learning_rate": 4.364444444444445e-05,
+ "loss": 0.7061,
+ "step": 1876
+ },
+ {
+ "epoch": 10.725714285714286,
+ "grad_norm": 144.02078247070312,
+ "learning_rate": 4.363809523809524e-05,
+ "loss": 0.5319,
+ "step": 1877
+ },
+ {
+ "epoch": 10.731428571428571,
+ "grad_norm": 45.64491653442383,
+ "learning_rate": 4.363174603174603e-05,
+ "loss": 0.4739,
+ "step": 1878
+ },
+ {
+ "epoch": 10.737142857142857,
+ "grad_norm": 87.71896362304688,
+ "learning_rate": 4.3625396825396825e-05,
+ "loss": 0.5727,
+ "step": 1879
+ },
+ {
+ "epoch": 10.742857142857144,
+ "grad_norm": 66.23672485351562,
+ "learning_rate": 4.361904761904762e-05,
+ "loss": 0.6821,
+ "step": 1880
+ },
+ {
+ "epoch": 10.748571428571429,
+ "grad_norm": 28.323043823242188,
+ "learning_rate": 4.361269841269842e-05,
+ "loss": 0.5018,
+ "step": 1881
+ },
+ {
+ "epoch": 10.754285714285714,
+ "grad_norm": 29.00827980041504,
+ "learning_rate": 4.36063492063492e-05,
+ "loss": 0.5059,
+ "step": 1882
+ },
+ {
+ "epoch": 10.76,
+ "grad_norm": 63.22015380859375,
+ "learning_rate": 4.36e-05,
+ "loss": 0.4007,
+ "step": 1883
+ },
+ {
+ "epoch": 10.765714285714285,
+ "grad_norm": 96.02413177490234,
+ "learning_rate": 4.3593650793650795e-05,
+ "loss": 0.7076,
+ "step": 1884
+ },
+ {
+ "epoch": 10.771428571428572,
+ "grad_norm": 37.6485595703125,
+ "learning_rate": 4.358730158730159e-05,
+ "loss": 0.5585,
+ "step": 1885
+ },
+ {
+ "epoch": 10.777142857142858,
+ "grad_norm": 141.62635803222656,
+ "learning_rate": 4.358095238095238e-05,
+ "loss": 0.483,
+ "step": 1886
+ },
+ {
+ "epoch": 10.782857142857143,
+ "grad_norm": 65.65088653564453,
+ "learning_rate": 4.357460317460318e-05,
+ "loss": 0.7007,
+ "step": 1887
+ },
+ {
+ "epoch": 10.788571428571428,
+ "grad_norm": 486.7319641113281,
+ "learning_rate": 4.3568253968253966e-05,
+ "loss": 0.5519,
+ "step": 1888
+ },
+ {
+ "epoch": 10.794285714285714,
+ "grad_norm": 69.58808898925781,
+ "learning_rate": 4.3561904761904765e-05,
+ "loss": 0.4125,
+ "step": 1889
+ },
+ {
+ "epoch": 10.8,
+ "grad_norm": 47.438167572021484,
+ "learning_rate": 4.355555555555556e-05,
+ "loss": 0.5029,
+ "step": 1890
+ },
+ {
+ "epoch": 10.805714285714286,
+ "grad_norm": 34.23190689086914,
+ "learning_rate": 4.354920634920635e-05,
+ "loss": 0.5539,
+ "step": 1891
+ },
+ {
+ "epoch": 10.811428571428571,
+ "grad_norm": 55.843780517578125,
+ "learning_rate": 4.354285714285714e-05,
+ "loss": 0.6891,
+ "step": 1892
+ },
+ {
+ "epoch": 10.817142857142857,
+ "grad_norm": 28.279245376586914,
+ "learning_rate": 4.3536507936507936e-05,
+ "loss": 0.6202,
+ "step": 1893
+ },
+ {
+ "epoch": 10.822857142857142,
+ "grad_norm": 41.35030746459961,
+ "learning_rate": 4.3530158730158735e-05,
+ "loss": 0.6041,
+ "step": 1894
+ },
+ {
+ "epoch": 10.82857142857143,
+ "grad_norm": 131.7793426513672,
+ "learning_rate": 4.352380952380953e-05,
+ "loss": 0.7109,
+ "step": 1895
+ },
+ {
+ "epoch": 10.834285714285715,
+ "grad_norm": 71.40914916992188,
+ "learning_rate": 4.351746031746032e-05,
+ "loss": 0.5715,
+ "step": 1896
+ },
+ {
+ "epoch": 10.84,
+ "grad_norm": 36.117801666259766,
+ "learning_rate": 4.351111111111111e-05,
+ "loss": 0.6285,
+ "step": 1897
+ },
+ {
+ "epoch": 10.845714285714285,
+ "grad_norm": 56.4141960144043,
+ "learning_rate": 4.350476190476191e-05,
+ "loss": 0.7102,
+ "step": 1898
+ },
+ {
+ "epoch": 10.85142857142857,
+ "grad_norm": 50.0871696472168,
+ "learning_rate": 4.34984126984127e-05,
+ "loss": 0.4798,
+ "step": 1899
+ },
+ {
+ "epoch": 10.857142857142858,
+ "grad_norm": 61.65697479248047,
+ "learning_rate": 4.34920634920635e-05,
+ "loss": 0.3668,
+ "step": 1900
+ },
+ {
+ "epoch": 10.862857142857143,
+ "grad_norm": 69.15985107421875,
+ "learning_rate": 4.3485714285714284e-05,
+ "loss": 0.5221,
+ "step": 1901
+ },
+ {
+ "epoch": 10.868571428571428,
+ "grad_norm": 36.924461364746094,
+ "learning_rate": 4.347936507936508e-05,
+ "loss": 0.6449,
+ "step": 1902
+ },
+ {
+ "epoch": 10.874285714285714,
+ "grad_norm": 61.253902435302734,
+ "learning_rate": 4.3473015873015876e-05,
+ "loss": 0.5385,
+ "step": 1903
+ },
+ {
+ "epoch": 10.88,
+ "grad_norm": 65.72564697265625,
+ "learning_rate": 4.346666666666667e-05,
+ "loss": 0.6443,
+ "step": 1904
+ },
+ {
+ "epoch": 10.885714285714286,
+ "grad_norm": 57.5501823425293,
+ "learning_rate": 4.346031746031746e-05,
+ "loss": 0.6056,
+ "step": 1905
+ },
+ {
+ "epoch": 10.891428571428571,
+ "grad_norm": 89.76790618896484,
+ "learning_rate": 4.345396825396826e-05,
+ "loss": 0.5265,
+ "step": 1906
+ },
+ {
+ "epoch": 10.897142857142857,
+ "grad_norm": 34.84463119506836,
+ "learning_rate": 4.3447619047619046e-05,
+ "loss": 0.5052,
+ "step": 1907
+ },
+ {
+ "epoch": 10.902857142857142,
+ "grad_norm": 51.19764709472656,
+ "learning_rate": 4.3441269841269846e-05,
+ "loss": 0.5892,
+ "step": 1908
+ },
+ {
+ "epoch": 10.90857142857143,
+ "grad_norm": 52.49979782104492,
+ "learning_rate": 4.343492063492064e-05,
+ "loss": 0.727,
+ "step": 1909
+ },
+ {
+ "epoch": 10.914285714285715,
+ "grad_norm": 17.535512924194336,
+ "learning_rate": 4.342857142857143e-05,
+ "loss": 0.4278,
+ "step": 1910
+ },
+ {
+ "epoch": 10.92,
+ "grad_norm": 25.79306983947754,
+ "learning_rate": 4.3422222222222224e-05,
+ "loss": 0.5654,
+ "step": 1911
+ },
+ {
+ "epoch": 10.925714285714285,
+ "grad_norm": 117.12657928466797,
+ "learning_rate": 4.3415873015873016e-05,
+ "loss": 0.381,
+ "step": 1912
+ },
+ {
+ "epoch": 10.93142857142857,
+ "grad_norm": 38.862060546875,
+ "learning_rate": 4.340952380952381e-05,
+ "loss": 0.5139,
+ "step": 1913
+ },
+ {
+ "epoch": 10.937142857142858,
+ "grad_norm": 68.39180755615234,
+ "learning_rate": 4.340317460317461e-05,
+ "loss": 0.4431,
+ "step": 1914
+ },
+ {
+ "epoch": 10.942857142857143,
+ "grad_norm": 53.311988830566406,
+ "learning_rate": 4.3396825396825394e-05,
+ "loss": 0.4408,
+ "step": 1915
+ },
+ {
+ "epoch": 10.948571428571428,
+ "grad_norm": 76.713623046875,
+ "learning_rate": 4.3390476190476194e-05,
+ "loss": 0.3767,
+ "step": 1916
+ },
+ {
+ "epoch": 10.954285714285714,
+ "grad_norm": 26.620380401611328,
+ "learning_rate": 4.3384126984126986e-05,
+ "loss": 0.4085,
+ "step": 1917
+ },
+ {
+ "epoch": 10.96,
+ "grad_norm": 38.23472595214844,
+ "learning_rate": 4.337777777777778e-05,
+ "loss": 0.5599,
+ "step": 1918
+ },
+ {
+ "epoch": 10.965714285714286,
+ "grad_norm": 96.24976348876953,
+ "learning_rate": 4.337142857142857e-05,
+ "loss": 0.6077,
+ "step": 1919
+ },
+ {
+ "epoch": 10.971428571428572,
+ "grad_norm": 98.00028228759766,
+ "learning_rate": 4.336507936507937e-05,
+ "loss": 0.3625,
+ "step": 1920
+ },
+ {
+ "epoch": 10.977142857142857,
+ "grad_norm": 56.013404846191406,
+ "learning_rate": 4.335873015873016e-05,
+ "loss": 0.6928,
+ "step": 1921
+ },
+ {
+ "epoch": 10.982857142857142,
+ "grad_norm": 43.91138458251953,
+ "learning_rate": 4.3352380952380956e-05,
+ "loss": 0.5207,
+ "step": 1922
+ },
+ {
+ "epoch": 10.98857142857143,
+ "grad_norm": 63.15080261230469,
+ "learning_rate": 4.334603174603175e-05,
+ "loss": 0.4553,
+ "step": 1923
+ },
+ {
+ "epoch": 10.994285714285715,
+ "grad_norm": 34.55913543701172,
+ "learning_rate": 4.333968253968254e-05,
+ "loss": 0.5642,
+ "step": 1924
+ },
+ {
+ "epoch": 11.0,
+ "grad_norm": 55.247528076171875,
+ "learning_rate": 4.3333333333333334e-05,
+ "loss": 0.5804,
+ "step": 1925
+ },
+ {
+ "epoch": 11.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6237368583679199,
+ "eval_map": 0.9203,
+ "eval_map_50": 0.9709,
+ "eval_map_75": 0.9592,
+ "eval_map_large": 0.9205,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9203,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7902,
+ "eval_mar_10": 0.9641,
+ "eval_mar_100": 0.9743,
+ "eval_mar_100_per_class": 0.9743,
+ "eval_mar_large": 0.9743,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 18.5564,
+ "eval_samples_per_second": 15.844,
+ "eval_steps_per_second": 1.994,
+ "step": 1925
+ },
+ {
+ "epoch": 11.005714285714285,
+ "grad_norm": 37.32865905761719,
+ "learning_rate": 4.332698412698413e-05,
+ "loss": 0.4686,
+ "step": 1926
+ },
+ {
+ "epoch": 11.01142857142857,
+ "grad_norm": 33.08869552612305,
+ "learning_rate": 4.332063492063492e-05,
+ "loss": 0.4919,
+ "step": 1927
+ },
+ {
+ "epoch": 11.017142857142858,
+ "grad_norm": 56.82509994506836,
+ "learning_rate": 4.331428571428572e-05,
+ "loss": 0.5189,
+ "step": 1928
+ },
+ {
+ "epoch": 11.022857142857143,
+ "grad_norm": 74.70069885253906,
+ "learning_rate": 4.330793650793651e-05,
+ "loss": 0.488,
+ "step": 1929
+ },
+ {
+ "epoch": 11.028571428571428,
+ "grad_norm": 78.25180053710938,
+ "learning_rate": 4.3301587301587304e-05,
+ "loss": 0.6853,
+ "step": 1930
+ },
+ {
+ "epoch": 11.034285714285714,
+ "grad_norm": 155.81265258789062,
+ "learning_rate": 4.32952380952381e-05,
+ "loss": 0.4037,
+ "step": 1931
+ },
+ {
+ "epoch": 11.04,
+ "grad_norm": 30.16897201538086,
+ "learning_rate": 4.328888888888889e-05,
+ "loss": 0.4806,
+ "step": 1932
+ },
+ {
+ "epoch": 11.045714285714286,
+ "grad_norm": 82.40743255615234,
+ "learning_rate": 4.328253968253969e-05,
+ "loss": 0.4178,
+ "step": 1933
+ },
+ {
+ "epoch": 11.051428571428572,
+ "grad_norm": 428.0183410644531,
+ "learning_rate": 4.3276190476190475e-05,
+ "loss": 0.425,
+ "step": 1934
+ },
+ {
+ "epoch": 11.057142857142857,
+ "grad_norm": 158.1415252685547,
+ "learning_rate": 4.3269841269841274e-05,
+ "loss": 0.7478,
+ "step": 1935
+ },
+ {
+ "epoch": 11.062857142857142,
+ "grad_norm": 244.0629425048828,
+ "learning_rate": 4.3263492063492067e-05,
+ "loss": 0.4738,
+ "step": 1936
+ },
+ {
+ "epoch": 11.06857142857143,
+ "grad_norm": 325.82147216796875,
+ "learning_rate": 4.325714285714286e-05,
+ "loss": 0.7152,
+ "step": 1937
+ },
+ {
+ "epoch": 11.074285714285715,
+ "grad_norm": 98.61585235595703,
+ "learning_rate": 4.325079365079365e-05,
+ "loss": 0.5722,
+ "step": 1938
+ },
+ {
+ "epoch": 11.08,
+ "grad_norm": 42.40473556518555,
+ "learning_rate": 4.324444444444445e-05,
+ "loss": 0.4279,
+ "step": 1939
+ },
+ {
+ "epoch": 11.085714285714285,
+ "grad_norm": 54.572452545166016,
+ "learning_rate": 4.323809523809524e-05,
+ "loss": 0.6706,
+ "step": 1940
+ },
+ {
+ "epoch": 11.09142857142857,
+ "grad_norm": 27.586755752563477,
+ "learning_rate": 4.3231746031746037e-05,
+ "loss": 0.5558,
+ "step": 1941
+ },
+ {
+ "epoch": 11.097142857142858,
+ "grad_norm": 78.04554748535156,
+ "learning_rate": 4.322539682539683e-05,
+ "loss": 0.518,
+ "step": 1942
+ },
+ {
+ "epoch": 11.102857142857143,
+ "grad_norm": 44.25578689575195,
+ "learning_rate": 4.321904761904762e-05,
+ "loss": 0.4998,
+ "step": 1943
+ },
+ {
+ "epoch": 11.108571428571429,
+ "grad_norm": 21.83209991455078,
+ "learning_rate": 4.3212698412698414e-05,
+ "loss": 0.3801,
+ "step": 1944
+ },
+ {
+ "epoch": 11.114285714285714,
+ "grad_norm": 46.22245407104492,
+ "learning_rate": 4.320634920634921e-05,
+ "loss": 0.6038,
+ "step": 1945
+ },
+ {
+ "epoch": 11.12,
+ "grad_norm": 36.09217071533203,
+ "learning_rate": 4.32e-05,
+ "loss": 0.4138,
+ "step": 1946
+ },
+ {
+ "epoch": 11.125714285714286,
+ "grad_norm": 79.12897491455078,
+ "learning_rate": 4.31936507936508e-05,
+ "loss": 0.4681,
+ "step": 1947
+ },
+ {
+ "epoch": 11.131428571428572,
+ "grad_norm": 33.90317153930664,
+ "learning_rate": 4.3187301587301585e-05,
+ "loss": 0.4288,
+ "step": 1948
+ },
+ {
+ "epoch": 11.137142857142857,
+ "grad_norm": 109.8990478515625,
+ "learning_rate": 4.3180952380952384e-05,
+ "loss": 0.5626,
+ "step": 1949
+ },
+ {
+ "epoch": 11.142857142857142,
+ "grad_norm": 141.09649658203125,
+ "learning_rate": 4.317460317460318e-05,
+ "loss": 0.415,
+ "step": 1950
+ },
+ {
+ "epoch": 11.14857142857143,
+ "grad_norm": 118.22587585449219,
+ "learning_rate": 4.316825396825397e-05,
+ "loss": 0.4853,
+ "step": 1951
+ },
+ {
+ "epoch": 11.154285714285715,
+ "grad_norm": 34.691768646240234,
+ "learning_rate": 4.316190476190476e-05,
+ "loss": 0.4886,
+ "step": 1952
+ },
+ {
+ "epoch": 11.16,
+ "grad_norm": 46.5521354675293,
+ "learning_rate": 4.315555555555556e-05,
+ "loss": 0.5466,
+ "step": 1953
+ },
+ {
+ "epoch": 11.165714285714285,
+ "grad_norm": 53.882667541503906,
+ "learning_rate": 4.314920634920635e-05,
+ "loss": 0.5406,
+ "step": 1954
+ },
+ {
+ "epoch": 11.17142857142857,
+ "grad_norm": 20.344602584838867,
+ "learning_rate": 4.314285714285715e-05,
+ "loss": 0.7339,
+ "step": 1955
+ },
+ {
+ "epoch": 11.177142857142858,
+ "grad_norm": 928.9378051757812,
+ "learning_rate": 4.313650793650793e-05,
+ "loss": 0.4915,
+ "step": 1956
+ },
+ {
+ "epoch": 11.182857142857143,
+ "grad_norm": 38.344974517822266,
+ "learning_rate": 4.313015873015873e-05,
+ "loss": 0.5138,
+ "step": 1957
+ },
+ {
+ "epoch": 11.188571428571429,
+ "grad_norm": 28.63566780090332,
+ "learning_rate": 4.3123809523809525e-05,
+ "loss": 0.5422,
+ "step": 1958
+ },
+ {
+ "epoch": 11.194285714285714,
+ "grad_norm": 68.49561309814453,
+ "learning_rate": 4.311746031746032e-05,
+ "loss": 0.5566,
+ "step": 1959
+ },
+ {
+ "epoch": 11.2,
+ "grad_norm": 48.883544921875,
+ "learning_rate": 4.311111111111111e-05,
+ "loss": 0.605,
+ "step": 1960
+ },
+ {
+ "epoch": 11.205714285714286,
+ "grad_norm": 50.32197952270508,
+ "learning_rate": 4.310476190476191e-05,
+ "loss": 0.7474,
+ "step": 1961
+ },
+ {
+ "epoch": 11.211428571428572,
+ "grad_norm": 48.06432342529297,
+ "learning_rate": 4.30984126984127e-05,
+ "loss": 0.2864,
+ "step": 1962
+ },
+ {
+ "epoch": 11.217142857142857,
+ "grad_norm": 64.10684967041016,
+ "learning_rate": 4.3092063492063495e-05,
+ "loss": 0.568,
+ "step": 1963
+ },
+ {
+ "epoch": 11.222857142857142,
+ "grad_norm": 31.029804229736328,
+ "learning_rate": 4.308571428571429e-05,
+ "loss": 0.5044,
+ "step": 1964
+ },
+ {
+ "epoch": 11.228571428571428,
+ "grad_norm": 38.481319427490234,
+ "learning_rate": 4.307936507936508e-05,
+ "loss": 0.4226,
+ "step": 1965
+ },
+ {
+ "epoch": 11.234285714285715,
+ "grad_norm": 204.23287963867188,
+ "learning_rate": 4.307301587301587e-05,
+ "loss": 0.4376,
+ "step": 1966
+ },
+ {
+ "epoch": 11.24,
+ "grad_norm": 83.03377532958984,
+ "learning_rate": 4.3066666666666665e-05,
+ "loss": 0.6468,
+ "step": 1967
+ },
+ {
+ "epoch": 11.245714285714286,
+ "grad_norm": 56.4277229309082,
+ "learning_rate": 4.3060317460317465e-05,
+ "loss": 0.524,
+ "step": 1968
+ },
+ {
+ "epoch": 11.251428571428571,
+ "grad_norm": 95.98672485351562,
+ "learning_rate": 4.305396825396826e-05,
+ "loss": 0.8834,
+ "step": 1969
+ },
+ {
+ "epoch": 11.257142857142856,
+ "grad_norm": 68.86486053466797,
+ "learning_rate": 4.304761904761905e-05,
+ "loss": 0.5369,
+ "step": 1970
+ },
+ {
+ "epoch": 11.262857142857143,
+ "grad_norm": 79.95071411132812,
+ "learning_rate": 4.304126984126984e-05,
+ "loss": 0.6789,
+ "step": 1971
+ },
+ {
+ "epoch": 11.268571428571429,
+ "grad_norm": 89.56037139892578,
+ "learning_rate": 4.303492063492064e-05,
+ "loss": 0.7805,
+ "step": 1972
+ },
+ {
+ "epoch": 11.274285714285714,
+ "grad_norm": 44.71689224243164,
+ "learning_rate": 4.302857142857143e-05,
+ "loss": 0.5586,
+ "step": 1973
+ },
+ {
+ "epoch": 11.28,
+ "grad_norm": 100.0972671508789,
+ "learning_rate": 4.302222222222223e-05,
+ "loss": 0.5374,
+ "step": 1974
+ },
+ {
+ "epoch": 11.285714285714286,
+ "grad_norm": 52.913841247558594,
+ "learning_rate": 4.301587301587302e-05,
+ "loss": 0.6111,
+ "step": 1975
+ },
+ {
+ "epoch": 11.291428571428572,
+ "grad_norm": 24.168188095092773,
+ "learning_rate": 4.300952380952381e-05,
+ "loss": 0.3927,
+ "step": 1976
+ },
+ {
+ "epoch": 11.297142857142857,
+ "grad_norm": 41.68073272705078,
+ "learning_rate": 4.3003174603174605e-05,
+ "loss": 0.6429,
+ "step": 1977
+ },
+ {
+ "epoch": 11.302857142857142,
+ "grad_norm": 99.18087005615234,
+ "learning_rate": 4.29968253968254e-05,
+ "loss": 0.5777,
+ "step": 1978
+ },
+ {
+ "epoch": 11.308571428571428,
+ "grad_norm": 72.45071411132812,
+ "learning_rate": 4.299047619047619e-05,
+ "loss": 0.5482,
+ "step": 1979
+ },
+ {
+ "epoch": 11.314285714285715,
+ "grad_norm": 43.584842681884766,
+ "learning_rate": 4.298412698412699e-05,
+ "loss": 0.5588,
+ "step": 1980
+ },
+ {
+ "epoch": 11.32,
+ "grad_norm": 72.10374450683594,
+ "learning_rate": 4.2977777777777776e-05,
+ "loss": 0.4757,
+ "step": 1981
+ },
+ {
+ "epoch": 11.325714285714286,
+ "grad_norm": 54.02776336669922,
+ "learning_rate": 4.2971428571428575e-05,
+ "loss": 0.4067,
+ "step": 1982
+ },
+ {
+ "epoch": 11.331428571428571,
+ "grad_norm": 165.2854766845703,
+ "learning_rate": 4.296507936507937e-05,
+ "loss": 0.6324,
+ "step": 1983
+ },
+ {
+ "epoch": 11.337142857142856,
+ "grad_norm": 40.24265670776367,
+ "learning_rate": 4.295873015873016e-05,
+ "loss": 0.5337,
+ "step": 1984
+ },
+ {
+ "epoch": 11.342857142857143,
+ "grad_norm": 44.01199722290039,
+ "learning_rate": 4.295238095238095e-05,
+ "loss": 0.6425,
+ "step": 1985
+ },
+ {
+ "epoch": 11.348571428571429,
+ "grad_norm": 30.16360855102539,
+ "learning_rate": 4.294603174603175e-05,
+ "loss": 0.569,
+ "step": 1986
+ },
+ {
+ "epoch": 11.354285714285714,
+ "grad_norm": 54.55186462402344,
+ "learning_rate": 4.293968253968254e-05,
+ "loss": 0.5003,
+ "step": 1987
+ },
+ {
+ "epoch": 11.36,
+ "grad_norm": 84.72821807861328,
+ "learning_rate": 4.293333333333334e-05,
+ "loss": 0.6783,
+ "step": 1988
+ },
+ {
+ "epoch": 11.365714285714287,
+ "grad_norm": 33.0731201171875,
+ "learning_rate": 4.2926984126984124e-05,
+ "loss": 0.5685,
+ "step": 1989
+ },
+ {
+ "epoch": 11.371428571428572,
+ "grad_norm": 67.03597259521484,
+ "learning_rate": 4.292063492063492e-05,
+ "loss": 0.4879,
+ "step": 1990
+ },
+ {
+ "epoch": 11.377142857142857,
+ "grad_norm": 26.01388168334961,
+ "learning_rate": 4.2914285714285716e-05,
+ "loss": 0.4475,
+ "step": 1991
+ },
+ {
+ "epoch": 11.382857142857143,
+ "grad_norm": 41.41946792602539,
+ "learning_rate": 4.290793650793651e-05,
+ "loss": 0.5839,
+ "step": 1992
+ },
+ {
+ "epoch": 11.388571428571428,
+ "grad_norm": 22.868900299072266,
+ "learning_rate": 4.29015873015873e-05,
+ "loss": 0.3814,
+ "step": 1993
+ },
+ {
+ "epoch": 11.394285714285715,
+ "grad_norm": 38.04800033569336,
+ "learning_rate": 4.28952380952381e-05,
+ "loss": 0.5012,
+ "step": 1994
+ },
+ {
+ "epoch": 11.4,
+ "grad_norm": 29.78988265991211,
+ "learning_rate": 4.2888888888888886e-05,
+ "loss": 0.4224,
+ "step": 1995
+ },
+ {
+ "epoch": 11.405714285714286,
+ "grad_norm": 34.21943664550781,
+ "learning_rate": 4.2882539682539686e-05,
+ "loss": 0.4692,
+ "step": 1996
+ },
+ {
+ "epoch": 11.411428571428571,
+ "grad_norm": 36.694095611572266,
+ "learning_rate": 4.287619047619048e-05,
+ "loss": 0.6359,
+ "step": 1997
+ },
+ {
+ "epoch": 11.417142857142856,
+ "grad_norm": 50.67527770996094,
+ "learning_rate": 4.286984126984127e-05,
+ "loss": 0.872,
+ "step": 1998
+ },
+ {
+ "epoch": 11.422857142857143,
+ "grad_norm": 46.49637222290039,
+ "learning_rate": 4.2863492063492064e-05,
+ "loss": 0.529,
+ "step": 1999
+ },
+ {
+ "epoch": 11.428571428571429,
+ "grad_norm": 60.049251556396484,
+ "learning_rate": 4.2857142857142856e-05,
+ "loss": 0.6069,
+ "step": 2000
+ },
+ {
+ "epoch": 11.434285714285714,
+ "grad_norm": 50.15530014038086,
+ "learning_rate": 4.2850793650793656e-05,
+ "loss": 0.5928,
+ "step": 2001
+ },
+ {
+ "epoch": 11.44,
+ "grad_norm": 56.090023040771484,
+ "learning_rate": 4.284444444444445e-05,
+ "loss": 0.4419,
+ "step": 2002
+ },
+ {
+ "epoch": 11.445714285714285,
+ "grad_norm": 27.48720932006836,
+ "learning_rate": 4.283809523809524e-05,
+ "loss": 0.4542,
+ "step": 2003
+ },
+ {
+ "epoch": 11.451428571428572,
+ "grad_norm": 73.39603424072266,
+ "learning_rate": 4.2831746031746034e-05,
+ "loss": 0.7161,
+ "step": 2004
+ },
+ {
+ "epoch": 11.457142857142857,
+ "grad_norm": 33.92833709716797,
+ "learning_rate": 4.2825396825396826e-05,
+ "loss": 0.4452,
+ "step": 2005
+ },
+ {
+ "epoch": 11.462857142857143,
+ "grad_norm": 51.888221740722656,
+ "learning_rate": 4.281904761904762e-05,
+ "loss": 0.7835,
+ "step": 2006
+ },
+ {
+ "epoch": 11.468571428571428,
+ "grad_norm": 31.269601821899414,
+ "learning_rate": 4.281269841269842e-05,
+ "loss": 0.8202,
+ "step": 2007
+ },
+ {
+ "epoch": 11.474285714285715,
+ "grad_norm": 29.58742332458496,
+ "learning_rate": 4.280634920634921e-05,
+ "loss": 0.8376,
+ "step": 2008
+ },
+ {
+ "epoch": 11.48,
+ "grad_norm": 28.278118133544922,
+ "learning_rate": 4.2800000000000004e-05,
+ "loss": 0.6554,
+ "step": 2009
+ },
+ {
+ "epoch": 11.485714285714286,
+ "grad_norm": 64.48429107666016,
+ "learning_rate": 4.2793650793650796e-05,
+ "loss": 0.5049,
+ "step": 2010
+ },
+ {
+ "epoch": 11.491428571428571,
+ "grad_norm": 45.5100212097168,
+ "learning_rate": 4.278730158730159e-05,
+ "loss": 0.3134,
+ "step": 2011
+ },
+ {
+ "epoch": 11.497142857142856,
+ "grad_norm": 57.239131927490234,
+ "learning_rate": 4.278095238095238e-05,
+ "loss": 0.5187,
+ "step": 2012
+ },
+ {
+ "epoch": 11.502857142857144,
+ "grad_norm": 42.058956146240234,
+ "learning_rate": 4.277460317460318e-05,
+ "loss": 0.5187,
+ "step": 2013
+ },
+ {
+ "epoch": 11.508571428571429,
+ "grad_norm": 126.79985809326172,
+ "learning_rate": 4.276825396825397e-05,
+ "loss": 0.6129,
+ "step": 2014
+ },
+ {
+ "epoch": 11.514285714285714,
+ "grad_norm": 126.11183166503906,
+ "learning_rate": 4.2761904761904766e-05,
+ "loss": 0.6724,
+ "step": 2015
+ },
+ {
+ "epoch": 11.52,
+ "grad_norm": 48.38181686401367,
+ "learning_rate": 4.275555555555556e-05,
+ "loss": 0.5613,
+ "step": 2016
+ },
+ {
+ "epoch": 11.525714285714285,
+ "grad_norm": 72.9981689453125,
+ "learning_rate": 4.274920634920635e-05,
+ "loss": 0.6188,
+ "step": 2017
+ },
+ {
+ "epoch": 11.531428571428572,
+ "grad_norm": 190.55735778808594,
+ "learning_rate": 4.2742857142857144e-05,
+ "loss": 0.6706,
+ "step": 2018
+ },
+ {
+ "epoch": 11.537142857142857,
+ "grad_norm": 220.0579071044922,
+ "learning_rate": 4.2736507936507943e-05,
+ "loss": 0.3876,
+ "step": 2019
+ },
+ {
+ "epoch": 11.542857142857143,
+ "grad_norm": 67.05650329589844,
+ "learning_rate": 4.273015873015873e-05,
+ "loss": 0.5308,
+ "step": 2020
+ },
+ {
+ "epoch": 11.548571428571428,
+ "grad_norm": 45.68679428100586,
+ "learning_rate": 4.272380952380953e-05,
+ "loss": 0.548,
+ "step": 2021
+ },
+ {
+ "epoch": 11.554285714285715,
+ "grad_norm": 19.767457962036133,
+ "learning_rate": 4.2717460317460315e-05,
+ "loss": 0.4076,
+ "step": 2022
+ },
+ {
+ "epoch": 11.56,
+ "grad_norm": 54.95707321166992,
+ "learning_rate": 4.2711111111111114e-05,
+ "loss": 0.4922,
+ "step": 2023
+ },
+ {
+ "epoch": 11.565714285714286,
+ "grad_norm": 31.493915557861328,
+ "learning_rate": 4.270476190476191e-05,
+ "loss": 0.555,
+ "step": 2024
+ },
+ {
+ "epoch": 11.571428571428571,
+ "grad_norm": 44.43305206298828,
+ "learning_rate": 4.26984126984127e-05,
+ "loss": 0.6991,
+ "step": 2025
+ },
+ {
+ "epoch": 11.577142857142857,
+ "grad_norm": 19.636838912963867,
+ "learning_rate": 4.269206349206349e-05,
+ "loss": 0.487,
+ "step": 2026
+ },
+ {
+ "epoch": 11.582857142857144,
+ "grad_norm": 21.483842849731445,
+ "learning_rate": 4.268571428571429e-05,
+ "loss": 0.4997,
+ "step": 2027
+ },
+ {
+ "epoch": 11.588571428571429,
+ "grad_norm": 34.854068756103516,
+ "learning_rate": 4.267936507936508e-05,
+ "loss": 0.3773,
+ "step": 2028
+ },
+ {
+ "epoch": 11.594285714285714,
+ "grad_norm": 74.83104705810547,
+ "learning_rate": 4.267301587301588e-05,
+ "loss": 0.3825,
+ "step": 2029
+ },
+ {
+ "epoch": 11.6,
+ "grad_norm": 66.73103332519531,
+ "learning_rate": 4.266666666666667e-05,
+ "loss": 0.4521,
+ "step": 2030
+ },
+ {
+ "epoch": 11.605714285714285,
+ "grad_norm": 70.25010681152344,
+ "learning_rate": 4.266031746031746e-05,
+ "loss": 0.3803,
+ "step": 2031
+ },
+ {
+ "epoch": 11.611428571428572,
+ "grad_norm": 11.714632034301758,
+ "learning_rate": 4.2653968253968255e-05,
+ "loss": 0.4515,
+ "step": 2032
+ },
+ {
+ "epoch": 11.617142857142857,
+ "grad_norm": 50.63631820678711,
+ "learning_rate": 4.264761904761905e-05,
+ "loss": 0.4352,
+ "step": 2033
+ },
+ {
+ "epoch": 11.622857142857143,
+ "grad_norm": 52.2682991027832,
+ "learning_rate": 4.264126984126984e-05,
+ "loss": 0.7108,
+ "step": 2034
+ },
+ {
+ "epoch": 11.628571428571428,
+ "grad_norm": 39.13172149658203,
+ "learning_rate": 4.263492063492064e-05,
+ "loss": 0.7079,
+ "step": 2035
+ },
+ {
+ "epoch": 11.634285714285713,
+ "grad_norm": 39.47657775878906,
+ "learning_rate": 4.262857142857143e-05,
+ "loss": 0.7292,
+ "step": 2036
+ },
+ {
+ "epoch": 11.64,
+ "grad_norm": 17.30119514465332,
+ "learning_rate": 4.2622222222222224e-05,
+ "loss": 0.5236,
+ "step": 2037
+ },
+ {
+ "epoch": 11.645714285714286,
+ "grad_norm": 48.4094123840332,
+ "learning_rate": 4.261587301587302e-05,
+ "loss": 0.7339,
+ "step": 2038
+ },
+ {
+ "epoch": 11.651428571428571,
+ "grad_norm": 39.617584228515625,
+ "learning_rate": 4.260952380952381e-05,
+ "loss": 0.5657,
+ "step": 2039
+ },
+ {
+ "epoch": 11.657142857142857,
+ "grad_norm": 20.779735565185547,
+ "learning_rate": 4.260317460317461e-05,
+ "loss": 0.4882,
+ "step": 2040
+ },
+ {
+ "epoch": 11.662857142857142,
+ "grad_norm": 41.407501220703125,
+ "learning_rate": 4.25968253968254e-05,
+ "loss": 0.3647,
+ "step": 2041
+ },
+ {
+ "epoch": 11.668571428571429,
+ "grad_norm": 64.75984954833984,
+ "learning_rate": 4.2590476190476194e-05,
+ "loss": 0.4312,
+ "step": 2042
+ },
+ {
+ "epoch": 11.674285714285714,
+ "grad_norm": 47.528480529785156,
+ "learning_rate": 4.258412698412699e-05,
+ "loss": 0.3838,
+ "step": 2043
+ },
+ {
+ "epoch": 11.68,
+ "grad_norm": 32.28666305541992,
+ "learning_rate": 4.257777777777778e-05,
+ "loss": 0.4923,
+ "step": 2044
+ },
+ {
+ "epoch": 11.685714285714285,
+ "grad_norm": 89.13447570800781,
+ "learning_rate": 4.257142857142857e-05,
+ "loss": 0.4379,
+ "step": 2045
+ },
+ {
+ "epoch": 11.691428571428572,
+ "grad_norm": 28.47506332397461,
+ "learning_rate": 4.256507936507937e-05,
+ "loss": 0.4225,
+ "step": 2046
+ },
+ {
+ "epoch": 11.697142857142858,
+ "grad_norm": 47.36610794067383,
+ "learning_rate": 4.255873015873016e-05,
+ "loss": 0.5541,
+ "step": 2047
+ },
+ {
+ "epoch": 11.702857142857143,
+ "grad_norm": 56.977779388427734,
+ "learning_rate": 4.255238095238096e-05,
+ "loss": 0.6693,
+ "step": 2048
+ },
+ {
+ "epoch": 11.708571428571428,
+ "grad_norm": 27.516395568847656,
+ "learning_rate": 4.254603174603175e-05,
+ "loss": 0.6076,
+ "step": 2049
+ },
+ {
+ "epoch": 11.714285714285714,
+ "grad_norm": 329.2275085449219,
+ "learning_rate": 4.253968253968254e-05,
+ "loss": 0.4606,
+ "step": 2050
+ },
+ {
+ "epoch": 11.72,
+ "grad_norm": 63.27556228637695,
+ "learning_rate": 4.2533333333333335e-05,
+ "loss": 0.35,
+ "step": 2051
+ },
+ {
+ "epoch": 11.725714285714286,
+ "grad_norm": 24.467205047607422,
+ "learning_rate": 4.252698412698413e-05,
+ "loss": 0.4779,
+ "step": 2052
+ },
+ {
+ "epoch": 11.731428571428571,
+ "grad_norm": 27.03794288635254,
+ "learning_rate": 4.252063492063492e-05,
+ "loss": 0.519,
+ "step": 2053
+ },
+ {
+ "epoch": 11.737142857142857,
+ "grad_norm": 44.75577163696289,
+ "learning_rate": 4.251428571428572e-05,
+ "loss": 0.5261,
+ "step": 2054
+ },
+ {
+ "epoch": 11.742857142857144,
+ "grad_norm": 46.69427490234375,
+ "learning_rate": 4.2507936507936505e-05,
+ "loss": 0.3524,
+ "step": 2055
+ },
+ {
+ "epoch": 11.748571428571429,
+ "grad_norm": 661.8193969726562,
+ "learning_rate": 4.2501587301587305e-05,
+ "loss": 0.5148,
+ "step": 2056
+ },
+ {
+ "epoch": 11.754285714285714,
+ "grad_norm": 37.70705795288086,
+ "learning_rate": 4.24952380952381e-05,
+ "loss": 0.7424,
+ "step": 2057
+ },
+ {
+ "epoch": 11.76,
+ "grad_norm": 60.31504440307617,
+ "learning_rate": 4.248888888888889e-05,
+ "loss": 0.6803,
+ "step": 2058
+ },
+ {
+ "epoch": 11.765714285714285,
+ "grad_norm": 53.602027893066406,
+ "learning_rate": 4.248253968253968e-05,
+ "loss": 0.3222,
+ "step": 2059
+ },
+ {
+ "epoch": 11.771428571428572,
+ "grad_norm": 16.7213134765625,
+ "learning_rate": 4.247619047619048e-05,
+ "loss": 0.4194,
+ "step": 2060
+ },
+ {
+ "epoch": 11.777142857142858,
+ "grad_norm": 31.727859497070312,
+ "learning_rate": 4.246984126984127e-05,
+ "loss": 0.6186,
+ "step": 2061
+ },
+ {
+ "epoch": 11.782857142857143,
+ "grad_norm": 68.51502990722656,
+ "learning_rate": 4.246349206349207e-05,
+ "loss": 0.6848,
+ "step": 2062
+ },
+ {
+ "epoch": 11.788571428571428,
+ "grad_norm": 111.31018829345703,
+ "learning_rate": 4.245714285714285e-05,
+ "loss": 0.6621,
+ "step": 2063
+ },
+ {
+ "epoch": 11.794285714285714,
+ "grad_norm": 47.58015441894531,
+ "learning_rate": 4.245079365079365e-05,
+ "loss": 0.5018,
+ "step": 2064
+ },
+ {
+ "epoch": 11.8,
+ "grad_norm": 55.297119140625,
+ "learning_rate": 4.2444444444444445e-05,
+ "loss": 0.6728,
+ "step": 2065
+ },
+ {
+ "epoch": 11.805714285714286,
+ "grad_norm": 76.49820709228516,
+ "learning_rate": 4.243809523809524e-05,
+ "loss": 0.3881,
+ "step": 2066
+ },
+ {
+ "epoch": 11.811428571428571,
+ "grad_norm": 24.38382339477539,
+ "learning_rate": 4.243174603174603e-05,
+ "loss": 0.4981,
+ "step": 2067
+ },
+ {
+ "epoch": 11.817142857142857,
+ "grad_norm": 22.08475112915039,
+ "learning_rate": 4.242539682539683e-05,
+ "loss": 0.5994,
+ "step": 2068
+ },
+ {
+ "epoch": 11.822857142857142,
+ "grad_norm": 41.89626693725586,
+ "learning_rate": 4.241904761904762e-05,
+ "loss": 0.4922,
+ "step": 2069
+ },
+ {
+ "epoch": 11.82857142857143,
+ "grad_norm": 40.99779510498047,
+ "learning_rate": 4.2412698412698415e-05,
+ "loss": 0.406,
+ "step": 2070
+ },
+ {
+ "epoch": 11.834285714285715,
+ "grad_norm": 36.55776596069336,
+ "learning_rate": 4.240634920634921e-05,
+ "loss": 0.4732,
+ "step": 2071
+ },
+ {
+ "epoch": 11.84,
+ "grad_norm": 88.96212768554688,
+ "learning_rate": 4.24e-05,
+ "loss": 0.4956,
+ "step": 2072
+ },
+ {
+ "epoch": 11.845714285714285,
+ "grad_norm": 28.359384536743164,
+ "learning_rate": 4.239365079365079e-05,
+ "loss": 0.4471,
+ "step": 2073
+ },
+ {
+ "epoch": 11.85142857142857,
+ "grad_norm": 149.3414764404297,
+ "learning_rate": 4.2387301587301586e-05,
+ "loss": 0.6434,
+ "step": 2074
+ },
+ {
+ "epoch": 11.857142857142858,
+ "grad_norm": 46.888206481933594,
+ "learning_rate": 4.2380952380952385e-05,
+ "loss": 0.4271,
+ "step": 2075
+ },
+ {
+ "epoch": 11.862857142857143,
+ "grad_norm": 49.115962982177734,
+ "learning_rate": 4.237460317460318e-05,
+ "loss": 0.5293,
+ "step": 2076
+ },
+ {
+ "epoch": 11.868571428571428,
+ "grad_norm": 54.932952880859375,
+ "learning_rate": 4.236825396825397e-05,
+ "loss": 0.85,
+ "step": 2077
+ },
+ {
+ "epoch": 11.874285714285714,
+ "grad_norm": 50.6089973449707,
+ "learning_rate": 4.236190476190476e-05,
+ "loss": 0.756,
+ "step": 2078
+ },
+ {
+ "epoch": 11.88,
+ "grad_norm": 31.927751541137695,
+ "learning_rate": 4.235555555555556e-05,
+ "loss": 0.5294,
+ "step": 2079
+ },
+ {
+ "epoch": 11.885714285714286,
+ "grad_norm": 38.52204513549805,
+ "learning_rate": 4.234920634920635e-05,
+ "loss": 0.534,
+ "step": 2080
+ },
+ {
+ "epoch": 11.891428571428571,
+ "grad_norm": 27.282697677612305,
+ "learning_rate": 4.234285714285715e-05,
+ "loss": 0.455,
+ "step": 2081
+ },
+ {
+ "epoch": 11.897142857142857,
+ "grad_norm": 48.41023635864258,
+ "learning_rate": 4.233650793650794e-05,
+ "loss": 0.5042,
+ "step": 2082
+ },
+ {
+ "epoch": 11.902857142857142,
+ "grad_norm": 32.89781188964844,
+ "learning_rate": 4.233015873015873e-05,
+ "loss": 0.5412,
+ "step": 2083
+ },
+ {
+ "epoch": 11.90857142857143,
+ "grad_norm": 238.75128173828125,
+ "learning_rate": 4.2323809523809526e-05,
+ "loss": 0.5629,
+ "step": 2084
+ },
+ {
+ "epoch": 11.914285714285715,
+ "grad_norm": 32.940555572509766,
+ "learning_rate": 4.231746031746032e-05,
+ "loss": 0.4948,
+ "step": 2085
+ },
+ {
+ "epoch": 11.92,
+ "grad_norm": 68.61351013183594,
+ "learning_rate": 4.231111111111111e-05,
+ "loss": 0.6385,
+ "step": 2086
+ },
+ {
+ "epoch": 11.925714285714285,
+ "grad_norm": 164.97157287597656,
+ "learning_rate": 4.230476190476191e-05,
+ "loss": 0.7415,
+ "step": 2087
+ },
+ {
+ "epoch": 11.93142857142857,
+ "grad_norm": 61.077117919921875,
+ "learning_rate": 4.2298412698412696e-05,
+ "loss": 0.7373,
+ "step": 2088
+ },
+ {
+ "epoch": 11.937142857142858,
+ "grad_norm": 35.24998474121094,
+ "learning_rate": 4.2292063492063496e-05,
+ "loss": 0.6198,
+ "step": 2089
+ },
+ {
+ "epoch": 11.942857142857143,
+ "grad_norm": 52.322322845458984,
+ "learning_rate": 4.228571428571429e-05,
+ "loss": 0.6625,
+ "step": 2090
+ },
+ {
+ "epoch": 11.948571428571428,
+ "grad_norm": 26.97150993347168,
+ "learning_rate": 4.227936507936508e-05,
+ "loss": 0.4099,
+ "step": 2091
+ },
+ {
+ "epoch": 11.954285714285714,
+ "grad_norm": 48.867034912109375,
+ "learning_rate": 4.2273015873015874e-05,
+ "loss": 0.5592,
+ "step": 2092
+ },
+ {
+ "epoch": 11.96,
+ "grad_norm": 32.61470031738281,
+ "learning_rate": 4.226666666666667e-05,
+ "loss": 0.4504,
+ "step": 2093
+ },
+ {
+ "epoch": 11.965714285714286,
+ "grad_norm": 83.98099517822266,
+ "learning_rate": 4.226031746031746e-05,
+ "loss": 0.5111,
+ "step": 2094
+ },
+ {
+ "epoch": 11.971428571428572,
+ "grad_norm": 338.13092041015625,
+ "learning_rate": 4.225396825396826e-05,
+ "loss": 0.3596,
+ "step": 2095
+ },
+ {
+ "epoch": 11.977142857142857,
+ "grad_norm": 47.3306884765625,
+ "learning_rate": 4.2247619047619044e-05,
+ "loss": 0.4337,
+ "step": 2096
+ },
+ {
+ "epoch": 11.982857142857142,
+ "grad_norm": 23.791034698486328,
+ "learning_rate": 4.2241269841269844e-05,
+ "loss": 0.5078,
+ "step": 2097
+ },
+ {
+ "epoch": 11.98857142857143,
+ "grad_norm": 34.635372161865234,
+ "learning_rate": 4.2234920634920636e-05,
+ "loss": 0.6245,
+ "step": 2098
+ },
+ {
+ "epoch": 11.994285714285715,
+ "grad_norm": 47.677650451660156,
+ "learning_rate": 4.222857142857143e-05,
+ "loss": 0.482,
+ "step": 2099
+ },
+ {
+ "epoch": 12.0,
+ "grad_norm": 19.956443786621094,
+ "learning_rate": 4.222222222222222e-05,
+ "loss": 0.4969,
+ "step": 2100
+ },
+ {
+ "epoch": 12.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6676629781723022,
+ "eval_map": 0.9106,
+ "eval_map_50": 0.9664,
+ "eval_map_75": 0.9573,
+ "eval_map_large": 0.9107,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9106,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7819,
+ "eval_mar_10": 0.9613,
+ "eval_mar_100": 0.9717,
+ "eval_mar_100_per_class": 0.9717,
+ "eval_mar_large": 0.9717,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 17.7214,
+ "eval_samples_per_second": 16.59,
+ "eval_steps_per_second": 2.088,
+ "step": 2100
+ },
+ {
+ "epoch": 12.005714285714285,
+ "grad_norm": 19.098283767700195,
+ "learning_rate": 4.221587301587302e-05,
+ "loss": 0.4616,
+ "step": 2101
+ },
+ {
+ "epoch": 12.01142857142857,
+ "grad_norm": 116.86248779296875,
+ "learning_rate": 4.220952380952381e-05,
+ "loss": 0.498,
+ "step": 2102
+ },
+ {
+ "epoch": 12.017142857142858,
+ "grad_norm": 67.87767791748047,
+ "learning_rate": 4.2203174603174606e-05,
+ "loss": 0.4618,
+ "step": 2103
+ },
+ {
+ "epoch": 12.022857142857143,
+ "grad_norm": 44.77392578125,
+ "learning_rate": 4.21968253968254e-05,
+ "loss": 0.5808,
+ "step": 2104
+ },
+ {
+ "epoch": 12.028571428571428,
+ "grad_norm": 230.97474670410156,
+ "learning_rate": 4.219047619047619e-05,
+ "loss": 0.5865,
+ "step": 2105
+ },
+ {
+ "epoch": 12.034285714285714,
+ "grad_norm": 88.32511138916016,
+ "learning_rate": 4.2184126984126984e-05,
+ "loss": 0.5504,
+ "step": 2106
+ },
+ {
+ "epoch": 12.04,
+ "grad_norm": 30.843040466308594,
+ "learning_rate": 4.217777777777778e-05,
+ "loss": 0.5666,
+ "step": 2107
+ },
+ {
+ "epoch": 12.045714285714286,
+ "grad_norm": 36.712955474853516,
+ "learning_rate": 4.2171428571428576e-05,
+ "loss": 0.5809,
+ "step": 2108
+ },
+ {
+ "epoch": 12.051428571428572,
+ "grad_norm": 380.5534362792969,
+ "learning_rate": 4.216507936507937e-05,
+ "loss": 0.5338,
+ "step": 2109
+ },
+ {
+ "epoch": 12.057142857142857,
+ "grad_norm": 22.827157974243164,
+ "learning_rate": 4.215873015873016e-05,
+ "loss": 0.6134,
+ "step": 2110
+ },
+ {
+ "epoch": 12.062857142857142,
+ "grad_norm": 65.1633071899414,
+ "learning_rate": 4.2152380952380954e-05,
+ "loss": 0.4525,
+ "step": 2111
+ },
+ {
+ "epoch": 12.06857142857143,
+ "grad_norm": 40.37752151489258,
+ "learning_rate": 4.214603174603175e-05,
+ "loss": 0.599,
+ "step": 2112
+ },
+ {
+ "epoch": 12.074285714285715,
+ "grad_norm": 36.74299621582031,
+ "learning_rate": 4.213968253968254e-05,
+ "loss": 0.5092,
+ "step": 2113
+ },
+ {
+ "epoch": 12.08,
+ "grad_norm": 32.371429443359375,
+ "learning_rate": 4.213333333333334e-05,
+ "loss": 0.5349,
+ "step": 2114
+ },
+ {
+ "epoch": 12.085714285714285,
+ "grad_norm": 27.217981338500977,
+ "learning_rate": 4.212698412698413e-05,
+ "loss": 0.4384,
+ "step": 2115
+ },
+ {
+ "epoch": 12.09142857142857,
+ "grad_norm": 27.24367904663086,
+ "learning_rate": 4.2120634920634924e-05,
+ "loss": 0.4383,
+ "step": 2116
+ },
+ {
+ "epoch": 12.097142857142858,
+ "grad_norm": 71.84082794189453,
+ "learning_rate": 4.211428571428572e-05,
+ "loss": 0.5462,
+ "step": 2117
+ },
+ {
+ "epoch": 12.102857142857143,
+ "grad_norm": 27.53964614868164,
+ "learning_rate": 4.210793650793651e-05,
+ "loss": 0.4367,
+ "step": 2118
+ },
+ {
+ "epoch": 12.108571428571429,
+ "grad_norm": 71.42292785644531,
+ "learning_rate": 4.21015873015873e-05,
+ "loss": 0.5036,
+ "step": 2119
+ },
+ {
+ "epoch": 12.114285714285714,
+ "grad_norm": 55.60530471801758,
+ "learning_rate": 4.20952380952381e-05,
+ "loss": 0.615,
+ "step": 2120
+ },
+ {
+ "epoch": 12.12,
+ "grad_norm": 66.84548950195312,
+ "learning_rate": 4.208888888888889e-05,
+ "loss": 0.5791,
+ "step": 2121
+ },
+ {
+ "epoch": 12.125714285714286,
+ "grad_norm": 143.91664123535156,
+ "learning_rate": 4.208253968253969e-05,
+ "loss": 0.4915,
+ "step": 2122
+ },
+ {
+ "epoch": 12.131428571428572,
+ "grad_norm": 31.189228057861328,
+ "learning_rate": 4.207619047619048e-05,
+ "loss": 0.8509,
+ "step": 2123
+ },
+ {
+ "epoch": 12.137142857142857,
+ "grad_norm": 24.511716842651367,
+ "learning_rate": 4.206984126984127e-05,
+ "loss": 0.5659,
+ "step": 2124
+ },
+ {
+ "epoch": 12.142857142857142,
+ "grad_norm": 20.814542770385742,
+ "learning_rate": 4.2063492063492065e-05,
+ "loss": 0.5101,
+ "step": 2125
+ },
+ {
+ "epoch": 12.14857142857143,
+ "grad_norm": 49.04802703857422,
+ "learning_rate": 4.2057142857142864e-05,
+ "loss": 0.3948,
+ "step": 2126
+ },
+ {
+ "epoch": 12.154285714285715,
+ "grad_norm": 64.11085510253906,
+ "learning_rate": 4.205079365079365e-05,
+ "loss": 0.4462,
+ "step": 2127
+ },
+ {
+ "epoch": 12.16,
+ "grad_norm": 36.154884338378906,
+ "learning_rate": 4.204444444444445e-05,
+ "loss": 0.6394,
+ "step": 2128
+ },
+ {
+ "epoch": 12.165714285714285,
+ "grad_norm": 53.460052490234375,
+ "learning_rate": 4.2038095238095235e-05,
+ "loss": 0.4813,
+ "step": 2129
+ },
+ {
+ "epoch": 12.17142857142857,
+ "grad_norm": 70.51872253417969,
+ "learning_rate": 4.2031746031746034e-05,
+ "loss": 0.4803,
+ "step": 2130
+ },
+ {
+ "epoch": 12.177142857142858,
+ "grad_norm": 65.65107727050781,
+ "learning_rate": 4.202539682539683e-05,
+ "loss": 0.4508,
+ "step": 2131
+ },
+ {
+ "epoch": 12.182857142857143,
+ "grad_norm": 51.026554107666016,
+ "learning_rate": 4.201904761904762e-05,
+ "loss": 0.322,
+ "step": 2132
+ },
+ {
+ "epoch": 12.188571428571429,
+ "grad_norm": 39.102333068847656,
+ "learning_rate": 4.201269841269841e-05,
+ "loss": 0.4516,
+ "step": 2133
+ },
+ {
+ "epoch": 12.194285714285714,
+ "grad_norm": 38.93904113769531,
+ "learning_rate": 4.200634920634921e-05,
+ "loss": 0.5626,
+ "step": 2134
+ },
+ {
+ "epoch": 12.2,
+ "grad_norm": 24.30509376525879,
+ "learning_rate": 4.2e-05,
+ "loss": 0.3237,
+ "step": 2135
+ },
+ {
+ "epoch": 12.205714285714286,
+ "grad_norm": 15.547541618347168,
+ "learning_rate": 4.19936507936508e-05,
+ "loss": 0.5393,
+ "step": 2136
+ },
+ {
+ "epoch": 12.211428571428572,
+ "grad_norm": 53.673561096191406,
+ "learning_rate": 4.198730158730159e-05,
+ "loss": 0.6376,
+ "step": 2137
+ },
+ {
+ "epoch": 12.217142857142857,
+ "grad_norm": 81.23772430419922,
+ "learning_rate": 4.198095238095238e-05,
+ "loss": 0.5826,
+ "step": 2138
+ },
+ {
+ "epoch": 12.222857142857142,
+ "grad_norm": 28.7341251373291,
+ "learning_rate": 4.1974603174603175e-05,
+ "loss": 0.5006,
+ "step": 2139
+ },
+ {
+ "epoch": 12.228571428571428,
+ "grad_norm": 38.085105895996094,
+ "learning_rate": 4.196825396825397e-05,
+ "loss": 0.5555,
+ "step": 2140
+ },
+ {
+ "epoch": 12.234285714285715,
+ "grad_norm": 41.27619934082031,
+ "learning_rate": 4.196190476190476e-05,
+ "loss": 0.4308,
+ "step": 2141
+ },
+ {
+ "epoch": 12.24,
+ "grad_norm": 31.69409942626953,
+ "learning_rate": 4.195555555555556e-05,
+ "loss": 0.6899,
+ "step": 2142
+ },
+ {
+ "epoch": 12.245714285714286,
+ "grad_norm": 23.529006958007812,
+ "learning_rate": 4.194920634920635e-05,
+ "loss": 0.4078,
+ "step": 2143
+ },
+ {
+ "epoch": 12.251428571428571,
+ "grad_norm": 77.69281005859375,
+ "learning_rate": 4.1942857142857145e-05,
+ "loss": 0.4954,
+ "step": 2144
+ },
+ {
+ "epoch": 12.257142857142856,
+ "grad_norm": 62.58024215698242,
+ "learning_rate": 4.193650793650794e-05,
+ "loss": 0.4758,
+ "step": 2145
+ },
+ {
+ "epoch": 12.262857142857143,
+ "grad_norm": 68.62194061279297,
+ "learning_rate": 4.193015873015873e-05,
+ "loss": 0.5499,
+ "step": 2146
+ },
+ {
+ "epoch": 12.268571428571429,
+ "grad_norm": 41.420654296875,
+ "learning_rate": 4.192380952380953e-05,
+ "loss": 0.4927,
+ "step": 2147
+ },
+ {
+ "epoch": 12.274285714285714,
+ "grad_norm": 51.051979064941406,
+ "learning_rate": 4.191746031746032e-05,
+ "loss": 0.4567,
+ "step": 2148
+ },
+ {
+ "epoch": 12.28,
+ "grad_norm": 37.83919906616211,
+ "learning_rate": 4.1911111111111115e-05,
+ "loss": 0.627,
+ "step": 2149
+ },
+ {
+ "epoch": 12.285714285714286,
+ "grad_norm": 56.637603759765625,
+ "learning_rate": 4.190476190476191e-05,
+ "loss": 0.4963,
+ "step": 2150
+ },
+ {
+ "epoch": 12.291428571428572,
+ "grad_norm": 58.017276763916016,
+ "learning_rate": 4.18984126984127e-05,
+ "loss": 0.592,
+ "step": 2151
+ },
+ {
+ "epoch": 12.297142857142857,
+ "grad_norm": 321.4501037597656,
+ "learning_rate": 4.189206349206349e-05,
+ "loss": 0.5086,
+ "step": 2152
+ },
+ {
+ "epoch": 12.302857142857142,
+ "grad_norm": 28.185853958129883,
+ "learning_rate": 4.188571428571429e-05,
+ "loss": 0.458,
+ "step": 2153
+ },
+ {
+ "epoch": 12.308571428571428,
+ "grad_norm": 28.150747299194336,
+ "learning_rate": 4.187936507936508e-05,
+ "loss": 0.3269,
+ "step": 2154
+ },
+ {
+ "epoch": 12.314285714285715,
+ "grad_norm": 52.02998733520508,
+ "learning_rate": 4.187301587301588e-05,
+ "loss": 0.6794,
+ "step": 2155
+ },
+ {
+ "epoch": 12.32,
+ "grad_norm": 110.56523132324219,
+ "learning_rate": 4.186666666666667e-05,
+ "loss": 0.3929,
+ "step": 2156
+ },
+ {
+ "epoch": 12.325714285714286,
+ "grad_norm": 23.185405731201172,
+ "learning_rate": 4.186031746031746e-05,
+ "loss": 0.6975,
+ "step": 2157
+ },
+ {
+ "epoch": 12.331428571428571,
+ "grad_norm": 235.8870086669922,
+ "learning_rate": 4.1853968253968255e-05,
+ "loss": 0.4131,
+ "step": 2158
+ },
+ {
+ "epoch": 12.337142857142856,
+ "grad_norm": 53.238712310791016,
+ "learning_rate": 4.1847619047619055e-05,
+ "loss": 0.4683,
+ "step": 2159
+ },
+ {
+ "epoch": 12.342857142857143,
+ "grad_norm": 38.866641998291016,
+ "learning_rate": 4.184126984126984e-05,
+ "loss": 0.4686,
+ "step": 2160
+ },
+ {
+ "epoch": 12.348571428571429,
+ "grad_norm": 29.63631820678711,
+ "learning_rate": 4.183492063492064e-05,
+ "loss": 0.6512,
+ "step": 2161
+ },
+ {
+ "epoch": 12.354285714285714,
+ "grad_norm": 34.041786193847656,
+ "learning_rate": 4.1828571428571426e-05,
+ "loss": 0.4923,
+ "step": 2162
+ },
+ {
+ "epoch": 12.36,
+ "grad_norm": 51.90822219848633,
+ "learning_rate": 4.1822222222222225e-05,
+ "loss": 0.4309,
+ "step": 2163
+ },
+ {
+ "epoch": 12.365714285714287,
+ "grad_norm": 54.88132858276367,
+ "learning_rate": 4.181587301587302e-05,
+ "loss": 0.4355,
+ "step": 2164
+ },
+ {
+ "epoch": 12.371428571428572,
+ "grad_norm": 37.65031433105469,
+ "learning_rate": 4.180952380952381e-05,
+ "loss": 0.4204,
+ "step": 2165
+ },
+ {
+ "epoch": 12.377142857142857,
+ "grad_norm": 45.69845199584961,
+ "learning_rate": 4.18031746031746e-05,
+ "loss": 0.3058,
+ "step": 2166
+ },
+ {
+ "epoch": 12.382857142857143,
+ "grad_norm": 33.428993225097656,
+ "learning_rate": 4.17968253968254e-05,
+ "loss": 0.3404,
+ "step": 2167
+ },
+ {
+ "epoch": 12.388571428571428,
+ "grad_norm": 28.256479263305664,
+ "learning_rate": 4.179047619047619e-05,
+ "loss": 0.865,
+ "step": 2168
+ },
+ {
+ "epoch": 12.394285714285715,
+ "grad_norm": 71.89823150634766,
+ "learning_rate": 4.178412698412699e-05,
+ "loss": 0.7565,
+ "step": 2169
+ },
+ {
+ "epoch": 12.4,
+ "grad_norm": 43.25562286376953,
+ "learning_rate": 4.177777777777778e-05,
+ "loss": 0.4256,
+ "step": 2170
+ },
+ {
+ "epoch": 12.405714285714286,
+ "grad_norm": 31.406131744384766,
+ "learning_rate": 4.177142857142857e-05,
+ "loss": 0.6001,
+ "step": 2171
+ },
+ {
+ "epoch": 12.411428571428571,
+ "grad_norm": 70.84765625,
+ "learning_rate": 4.1765079365079366e-05,
+ "loss": 0.4689,
+ "step": 2172
+ },
+ {
+ "epoch": 12.417142857142856,
+ "grad_norm": 48.25562286376953,
+ "learning_rate": 4.175873015873016e-05,
+ "loss": 0.4392,
+ "step": 2173
+ },
+ {
+ "epoch": 12.422857142857143,
+ "grad_norm": 35.83698272705078,
+ "learning_rate": 4.175238095238095e-05,
+ "loss": 0.5675,
+ "step": 2174
+ },
+ {
+ "epoch": 12.428571428571429,
+ "grad_norm": 72.83888244628906,
+ "learning_rate": 4.174603174603175e-05,
+ "loss": 0.4522,
+ "step": 2175
+ },
+ {
+ "epoch": 12.434285714285714,
+ "grad_norm": 117.34611511230469,
+ "learning_rate": 4.1739682539682536e-05,
+ "loss": 0.5442,
+ "step": 2176
+ },
+ {
+ "epoch": 12.44,
+ "grad_norm": 81.17318725585938,
+ "learning_rate": 4.1733333333333336e-05,
+ "loss": 0.4339,
+ "step": 2177
+ },
+ {
+ "epoch": 12.445714285714285,
+ "grad_norm": 55.67113494873047,
+ "learning_rate": 4.172698412698413e-05,
+ "loss": 0.4434,
+ "step": 2178
+ },
+ {
+ "epoch": 12.451428571428572,
+ "grad_norm": 27.9365177154541,
+ "learning_rate": 4.172063492063492e-05,
+ "loss": 0.46,
+ "step": 2179
+ },
+ {
+ "epoch": 12.457142857142857,
+ "grad_norm": 61.951812744140625,
+ "learning_rate": 4.1714285714285714e-05,
+ "loss": 0.5254,
+ "step": 2180
+ },
+ {
+ "epoch": 12.462857142857143,
+ "grad_norm": 30.21892738342285,
+ "learning_rate": 4.170793650793651e-05,
+ "loss": 0.526,
+ "step": 2181
+ },
+ {
+ "epoch": 12.468571428571428,
+ "grad_norm": 41.746585845947266,
+ "learning_rate": 4.1701587301587306e-05,
+ "loss": 0.4023,
+ "step": 2182
+ },
+ {
+ "epoch": 12.474285714285715,
+ "grad_norm": 27.280019760131836,
+ "learning_rate": 4.16952380952381e-05,
+ "loss": 0.4215,
+ "step": 2183
+ },
+ {
+ "epoch": 12.48,
+ "grad_norm": 33.64419937133789,
+ "learning_rate": 4.168888888888889e-05,
+ "loss": 0.414,
+ "step": 2184
+ },
+ {
+ "epoch": 12.485714285714286,
+ "grad_norm": 87.38959503173828,
+ "learning_rate": 4.1682539682539684e-05,
+ "loss": 0.6313,
+ "step": 2185
+ },
+ {
+ "epoch": 12.491428571428571,
+ "grad_norm": 44.78113555908203,
+ "learning_rate": 4.167619047619048e-05,
+ "loss": 0.4985,
+ "step": 2186
+ },
+ {
+ "epoch": 12.497142857142856,
+ "grad_norm": 52.0896110534668,
+ "learning_rate": 4.166984126984127e-05,
+ "loss": 0.581,
+ "step": 2187
+ },
+ {
+ "epoch": 12.502857142857144,
+ "grad_norm": 97.08638000488281,
+ "learning_rate": 4.166349206349207e-05,
+ "loss": 0.78,
+ "step": 2188
+ },
+ {
+ "epoch": 12.508571428571429,
+ "grad_norm": 60.737979888916016,
+ "learning_rate": 4.165714285714286e-05,
+ "loss": 0.6149,
+ "step": 2189
+ },
+ {
+ "epoch": 12.514285714285714,
+ "grad_norm": 117.92765045166016,
+ "learning_rate": 4.1650793650793654e-05,
+ "loss": 0.5493,
+ "step": 2190
+ },
+ {
+ "epoch": 12.52,
+ "grad_norm": 27.144393920898438,
+ "learning_rate": 4.1644444444444446e-05,
+ "loss": 0.6426,
+ "step": 2191
+ },
+ {
+ "epoch": 12.525714285714285,
+ "grad_norm": 53.552608489990234,
+ "learning_rate": 4.1638095238095246e-05,
+ "loss": 0.6946,
+ "step": 2192
+ },
+ {
+ "epoch": 12.531428571428572,
+ "grad_norm": 41.25876998901367,
+ "learning_rate": 4.163174603174603e-05,
+ "loss": 0.6304,
+ "step": 2193
+ },
+ {
+ "epoch": 12.537142857142857,
+ "grad_norm": 36.41542053222656,
+ "learning_rate": 4.162539682539683e-05,
+ "loss": 0.4749,
+ "step": 2194
+ },
+ {
+ "epoch": 12.542857142857143,
+ "grad_norm": 48.75900650024414,
+ "learning_rate": 4.161904761904762e-05,
+ "loss": 0.5893,
+ "step": 2195
+ },
+ {
+ "epoch": 12.548571428571428,
+ "grad_norm": 59.34621810913086,
+ "learning_rate": 4.1612698412698416e-05,
+ "loss": 0.4529,
+ "step": 2196
+ },
+ {
+ "epoch": 12.554285714285715,
+ "grad_norm": 44.70014953613281,
+ "learning_rate": 4.160634920634921e-05,
+ "loss": 0.3575,
+ "step": 2197
+ },
+ {
+ "epoch": 12.56,
+ "grad_norm": 38.56593704223633,
+ "learning_rate": 4.16e-05,
+ "loss": 0.6915,
+ "step": 2198
+ },
+ {
+ "epoch": 12.565714285714286,
+ "grad_norm": 85.35823822021484,
+ "learning_rate": 4.1593650793650794e-05,
+ "loss": 0.4099,
+ "step": 2199
+ },
+ {
+ "epoch": 12.571428571428571,
+ "grad_norm": 47.486751556396484,
+ "learning_rate": 4.1587301587301594e-05,
+ "loss": 0.583,
+ "step": 2200
+ },
+ {
+ "epoch": 12.577142857142857,
+ "grad_norm": 53.076847076416016,
+ "learning_rate": 4.158095238095238e-05,
+ "loss": 0.5741,
+ "step": 2201
+ },
+ {
+ "epoch": 12.582857142857144,
+ "grad_norm": 58.12879943847656,
+ "learning_rate": 4.157460317460318e-05,
+ "loss": 0.4875,
+ "step": 2202
+ },
+ {
+ "epoch": 12.588571428571429,
+ "grad_norm": 34.542964935302734,
+ "learning_rate": 4.156825396825397e-05,
+ "loss": 0.4918,
+ "step": 2203
+ },
+ {
+ "epoch": 12.594285714285714,
+ "grad_norm": 30.032142639160156,
+ "learning_rate": 4.1561904761904764e-05,
+ "loss": 0.4003,
+ "step": 2204
+ },
+ {
+ "epoch": 12.6,
+ "grad_norm": 95.49871063232422,
+ "learning_rate": 4.155555555555556e-05,
+ "loss": 0.4535,
+ "step": 2205
+ },
+ {
+ "epoch": 12.605714285714285,
+ "grad_norm": 61.23542404174805,
+ "learning_rate": 4.154920634920635e-05,
+ "loss": 0.6763,
+ "step": 2206
+ },
+ {
+ "epoch": 12.611428571428572,
+ "grad_norm": 48.1504020690918,
+ "learning_rate": 4.154285714285714e-05,
+ "loss": 0.5599,
+ "step": 2207
+ },
+ {
+ "epoch": 12.617142857142857,
+ "grad_norm": 56.297393798828125,
+ "learning_rate": 4.153650793650794e-05,
+ "loss": 0.3831,
+ "step": 2208
+ },
+ {
+ "epoch": 12.622857142857143,
+ "grad_norm": 35.83076477050781,
+ "learning_rate": 4.153015873015873e-05,
+ "loss": 0.5612,
+ "step": 2209
+ },
+ {
+ "epoch": 12.628571428571428,
+ "grad_norm": 37.802852630615234,
+ "learning_rate": 4.152380952380953e-05,
+ "loss": 0.31,
+ "step": 2210
+ },
+ {
+ "epoch": 12.634285714285713,
+ "grad_norm": 39.768943786621094,
+ "learning_rate": 4.151746031746032e-05,
+ "loss": 0.4237,
+ "step": 2211
+ },
+ {
+ "epoch": 12.64,
+ "grad_norm": 19.604061126708984,
+ "learning_rate": 4.151111111111111e-05,
+ "loss": 0.5153,
+ "step": 2212
+ },
+ {
+ "epoch": 12.645714285714286,
+ "grad_norm": 71.32669830322266,
+ "learning_rate": 4.1504761904761905e-05,
+ "loss": 0.6764,
+ "step": 2213
+ },
+ {
+ "epoch": 12.651428571428571,
+ "grad_norm": 120.34733581542969,
+ "learning_rate": 4.1498412698412704e-05,
+ "loss": 0.4144,
+ "step": 2214
+ },
+ {
+ "epoch": 12.657142857142857,
+ "grad_norm": 87.2412338256836,
+ "learning_rate": 4.149206349206349e-05,
+ "loss": 0.3574,
+ "step": 2215
+ },
+ {
+ "epoch": 12.662857142857142,
+ "grad_norm": 40.069541931152344,
+ "learning_rate": 4.148571428571429e-05,
+ "loss": 0.5624,
+ "step": 2216
+ },
+ {
+ "epoch": 12.668571428571429,
+ "grad_norm": 27.889135360717773,
+ "learning_rate": 4.147936507936508e-05,
+ "loss": 0.536,
+ "step": 2217
+ },
+ {
+ "epoch": 12.674285714285714,
+ "grad_norm": 43.12520980834961,
+ "learning_rate": 4.1473015873015875e-05,
+ "loss": 0.6002,
+ "step": 2218
+ },
+ {
+ "epoch": 12.68,
+ "grad_norm": 44.10758590698242,
+ "learning_rate": 4.146666666666667e-05,
+ "loss": 0.5036,
+ "step": 2219
+ },
+ {
+ "epoch": 12.685714285714285,
+ "grad_norm": 23.964990615844727,
+ "learning_rate": 4.146031746031746e-05,
+ "loss": 0.3921,
+ "step": 2220
+ },
+ {
+ "epoch": 12.691428571428572,
+ "grad_norm": 52.23629379272461,
+ "learning_rate": 4.145396825396826e-05,
+ "loss": 0.4159,
+ "step": 2221
+ },
+ {
+ "epoch": 12.697142857142858,
+ "grad_norm": 20.55379295349121,
+ "learning_rate": 4.144761904761905e-05,
+ "loss": 0.4629,
+ "step": 2222
+ },
+ {
+ "epoch": 12.702857142857143,
+ "grad_norm": 62.3278694152832,
+ "learning_rate": 4.1441269841269845e-05,
+ "loss": 0.5655,
+ "step": 2223
+ },
+ {
+ "epoch": 12.708571428571428,
+ "grad_norm": 86.1930923461914,
+ "learning_rate": 4.143492063492064e-05,
+ "loss": 0.3896,
+ "step": 2224
+ },
+ {
+ "epoch": 12.714285714285714,
+ "grad_norm": 71.3177719116211,
+ "learning_rate": 4.1428571428571437e-05,
+ "loss": 0.563,
+ "step": 2225
+ },
+ {
+ "epoch": 12.72,
+ "grad_norm": 63.88593292236328,
+ "learning_rate": 4.142222222222222e-05,
+ "loss": 0.5526,
+ "step": 2226
+ },
+ {
+ "epoch": 12.725714285714286,
+ "grad_norm": 59.57171630859375,
+ "learning_rate": 4.141587301587302e-05,
+ "loss": 0.5307,
+ "step": 2227
+ },
+ {
+ "epoch": 12.731428571428571,
+ "grad_norm": 19.78388023376465,
+ "learning_rate": 4.140952380952381e-05,
+ "loss": 0.3957,
+ "step": 2228
+ },
+ {
+ "epoch": 12.737142857142857,
+ "grad_norm": 32.11157989501953,
+ "learning_rate": 4.140317460317461e-05,
+ "loss": 0.591,
+ "step": 2229
+ },
+ {
+ "epoch": 12.742857142857144,
+ "grad_norm": 36.756797790527344,
+ "learning_rate": 4.13968253968254e-05,
+ "loss": 0.4532,
+ "step": 2230
+ },
+ {
+ "epoch": 12.748571428571429,
+ "grad_norm": 19.290258407592773,
+ "learning_rate": 4.139047619047619e-05,
+ "loss": 0.5087,
+ "step": 2231
+ },
+ {
+ "epoch": 12.754285714285714,
+ "grad_norm": 97.21000671386719,
+ "learning_rate": 4.1384126984126985e-05,
+ "loss": 0.4911,
+ "step": 2232
+ },
+ {
+ "epoch": 12.76,
+ "grad_norm": 648.7294921875,
+ "learning_rate": 4.1377777777777784e-05,
+ "loss": 0.4904,
+ "step": 2233
+ },
+ {
+ "epoch": 12.765714285714285,
+ "grad_norm": 35.48090744018555,
+ "learning_rate": 4.137142857142857e-05,
+ "loss": 0.5012,
+ "step": 2234
+ },
+ {
+ "epoch": 12.771428571428572,
+ "grad_norm": 43.22107696533203,
+ "learning_rate": 4.136507936507937e-05,
+ "loss": 0.4422,
+ "step": 2235
+ },
+ {
+ "epoch": 12.777142857142858,
+ "grad_norm": 113.79402160644531,
+ "learning_rate": 4.135873015873016e-05,
+ "loss": 0.5195,
+ "step": 2236
+ },
+ {
+ "epoch": 12.782857142857143,
+ "grad_norm": 85.96818542480469,
+ "learning_rate": 4.1352380952380955e-05,
+ "loss": 0.4418,
+ "step": 2237
+ },
+ {
+ "epoch": 12.788571428571428,
+ "grad_norm": 18.308883666992188,
+ "learning_rate": 4.134603174603175e-05,
+ "loss": 0.5449,
+ "step": 2238
+ },
+ {
+ "epoch": 12.794285714285714,
+ "grad_norm": 29.693681716918945,
+ "learning_rate": 4.133968253968254e-05,
+ "loss": 0.4464,
+ "step": 2239
+ },
+ {
+ "epoch": 12.8,
+ "grad_norm": 211.36032104492188,
+ "learning_rate": 4.133333333333333e-05,
+ "loss": 0.5757,
+ "step": 2240
+ },
+ {
+ "epoch": 12.805714285714286,
+ "grad_norm": 35.264339447021484,
+ "learning_rate": 4.132698412698413e-05,
+ "loss": 0.4297,
+ "step": 2241
+ },
+ {
+ "epoch": 12.811428571428571,
+ "grad_norm": 35.70240020751953,
+ "learning_rate": 4.132063492063492e-05,
+ "loss": 0.464,
+ "step": 2242
+ },
+ {
+ "epoch": 12.817142857142857,
+ "grad_norm": 16.335657119750977,
+ "learning_rate": 4.131428571428572e-05,
+ "loss": 0.4074,
+ "step": 2243
+ },
+ {
+ "epoch": 12.822857142857142,
+ "grad_norm": 41.2120475769043,
+ "learning_rate": 4.130793650793651e-05,
+ "loss": 0.401,
+ "step": 2244
+ },
+ {
+ "epoch": 12.82857142857143,
+ "grad_norm": 25.838830947875977,
+ "learning_rate": 4.13015873015873e-05,
+ "loss": 0.5388,
+ "step": 2245
+ },
+ {
+ "epoch": 12.834285714285715,
+ "grad_norm": 38.36637496948242,
+ "learning_rate": 4.1295238095238095e-05,
+ "loss": 0.4832,
+ "step": 2246
+ },
+ {
+ "epoch": 12.84,
+ "grad_norm": 142.09210205078125,
+ "learning_rate": 4.1288888888888895e-05,
+ "loss": 0.5239,
+ "step": 2247
+ },
+ {
+ "epoch": 12.845714285714285,
+ "grad_norm": 101.1116943359375,
+ "learning_rate": 4.128253968253968e-05,
+ "loss": 0.5064,
+ "step": 2248
+ },
+ {
+ "epoch": 12.85142857142857,
+ "grad_norm": 38.06106948852539,
+ "learning_rate": 4.127619047619048e-05,
+ "loss": 0.3722,
+ "step": 2249
+ },
+ {
+ "epoch": 12.857142857142858,
+ "grad_norm": 40.42740249633789,
+ "learning_rate": 4.126984126984127e-05,
+ "loss": 0.4188,
+ "step": 2250
+ },
+ {
+ "epoch": 12.862857142857143,
+ "grad_norm": 25.689695358276367,
+ "learning_rate": 4.1263492063492065e-05,
+ "loss": 0.7067,
+ "step": 2251
+ },
+ {
+ "epoch": 12.868571428571428,
+ "grad_norm": 44.955421447753906,
+ "learning_rate": 4.125714285714286e-05,
+ "loss": 0.5671,
+ "step": 2252
+ },
+ {
+ "epoch": 12.874285714285714,
+ "grad_norm": 49.81996536254883,
+ "learning_rate": 4.125079365079365e-05,
+ "loss": 0.5818,
+ "step": 2253
+ },
+ {
+ "epoch": 12.88,
+ "grad_norm": 34.02114486694336,
+ "learning_rate": 4.124444444444444e-05,
+ "loss": 0.4568,
+ "step": 2254
+ },
+ {
+ "epoch": 12.885714285714286,
+ "grad_norm": 26.276948928833008,
+ "learning_rate": 4.123809523809524e-05,
+ "loss": 0.4356,
+ "step": 2255
+ },
+ {
+ "epoch": 12.891428571428571,
+ "grad_norm": 78.74541473388672,
+ "learning_rate": 4.1231746031746035e-05,
+ "loss": 0.541,
+ "step": 2256
+ },
+ {
+ "epoch": 12.897142857142857,
+ "grad_norm": 20.253076553344727,
+ "learning_rate": 4.122539682539683e-05,
+ "loss": 0.3611,
+ "step": 2257
+ },
+ {
+ "epoch": 12.902857142857142,
+ "grad_norm": 28.861141204833984,
+ "learning_rate": 4.121904761904762e-05,
+ "loss": 0.4468,
+ "step": 2258
+ },
+ {
+ "epoch": 12.90857142857143,
+ "grad_norm": 53.7813606262207,
+ "learning_rate": 4.121269841269841e-05,
+ "loss": 0.5349,
+ "step": 2259
+ },
+ {
+ "epoch": 12.914285714285715,
+ "grad_norm": 93.12281799316406,
+ "learning_rate": 4.120634920634921e-05,
+ "loss": 0.3874,
+ "step": 2260
+ },
+ {
+ "epoch": 12.92,
+ "grad_norm": 72.68656158447266,
+ "learning_rate": 4.12e-05,
+ "loss": 0.4197,
+ "step": 2261
+ },
+ {
+ "epoch": 12.925714285714285,
+ "grad_norm": 48.06651306152344,
+ "learning_rate": 4.11936507936508e-05,
+ "loss": 0.5674,
+ "step": 2262
+ },
+ {
+ "epoch": 12.93142857142857,
+ "grad_norm": 129.9513702392578,
+ "learning_rate": 4.118730158730159e-05,
+ "loss": 0.3995,
+ "step": 2263
+ },
+ {
+ "epoch": 12.937142857142858,
+ "grad_norm": 79.52442932128906,
+ "learning_rate": 4.118095238095238e-05,
+ "loss": 0.4851,
+ "step": 2264
+ },
+ {
+ "epoch": 12.942857142857143,
+ "grad_norm": 27.562313079833984,
+ "learning_rate": 4.1174603174603176e-05,
+ "loss": 0.502,
+ "step": 2265
+ },
+ {
+ "epoch": 12.948571428571428,
+ "grad_norm": 47.59757995605469,
+ "learning_rate": 4.1168253968253975e-05,
+ "loss": 0.4192,
+ "step": 2266
+ },
+ {
+ "epoch": 12.954285714285714,
+ "grad_norm": 251.0091552734375,
+ "learning_rate": 4.116190476190476e-05,
+ "loss": 0.8031,
+ "step": 2267
+ },
+ {
+ "epoch": 12.96,
+ "grad_norm": 35.71852111816406,
+ "learning_rate": 4.115555555555556e-05,
+ "loss": 0.4427,
+ "step": 2268
+ },
+ {
+ "epoch": 12.965714285714286,
+ "grad_norm": 109.42433166503906,
+ "learning_rate": 4.1149206349206346e-05,
+ "loss": 0.5537,
+ "step": 2269
+ },
+ {
+ "epoch": 12.971428571428572,
+ "grad_norm": 69.72895050048828,
+ "learning_rate": 4.1142857142857146e-05,
+ "loss": 0.5632,
+ "step": 2270
+ },
+ {
+ "epoch": 12.977142857142857,
+ "grad_norm": 46.306617736816406,
+ "learning_rate": 4.113650793650794e-05,
+ "loss": 0.2779,
+ "step": 2271
+ },
+ {
+ "epoch": 12.982857142857142,
+ "grad_norm": 55.917381286621094,
+ "learning_rate": 4.113015873015873e-05,
+ "loss": 0.6122,
+ "step": 2272
+ },
+ {
+ "epoch": 12.98857142857143,
+ "grad_norm": 30.848764419555664,
+ "learning_rate": 4.1123809523809524e-05,
+ "loss": 0.4483,
+ "step": 2273
+ },
+ {
+ "epoch": 12.994285714285715,
+ "grad_norm": 43.35200500488281,
+ "learning_rate": 4.111746031746032e-05,
+ "loss": 0.4658,
+ "step": 2274
+ },
+ {
+ "epoch": 13.0,
+ "grad_norm": 48.12958526611328,
+ "learning_rate": 4.111111111111111e-05,
+ "loss": 0.4433,
+ "step": 2275
+ },
+ {
+ "epoch": 13.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6659702658653259,
+ "eval_map": 0.9143,
+ "eval_map_50": 0.9608,
+ "eval_map_75": 0.9477,
+ "eval_map_large": 0.9144,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9143,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7806,
+ "eval_mar_10": 0.9721,
+ "eval_mar_100": 0.9819,
+ "eval_mar_100_per_class": 0.9819,
+ "eval_mar_large": 0.9819,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 15.677,
+ "eval_samples_per_second": 18.754,
+ "eval_steps_per_second": 2.36,
+ "step": 2275
+ },
+ {
+ "epoch": 13.005714285714285,
+ "grad_norm": 85.56483459472656,
+ "learning_rate": 4.110476190476191e-05,
+ "loss": 0.445,
+ "step": 2276
+ },
+ {
+ "epoch": 13.01142857142857,
+ "grad_norm": 27.930274963378906,
+ "learning_rate": 4.10984126984127e-05,
+ "loss": 0.7734,
+ "step": 2277
+ },
+ {
+ "epoch": 13.017142857142858,
+ "grad_norm": 51.549617767333984,
+ "learning_rate": 4.1092063492063494e-05,
+ "loss": 0.4166,
+ "step": 2278
+ },
+ {
+ "epoch": 13.022857142857143,
+ "grad_norm": 81.78390502929688,
+ "learning_rate": 4.1085714285714286e-05,
+ "loss": 0.4473,
+ "step": 2279
+ },
+ {
+ "epoch": 13.028571428571428,
+ "grad_norm": 53.621219635009766,
+ "learning_rate": 4.107936507936508e-05,
+ "loss": 0.4609,
+ "step": 2280
+ },
+ {
+ "epoch": 13.034285714285714,
+ "grad_norm": 101.24864959716797,
+ "learning_rate": 4.107301587301587e-05,
+ "loss": 0.5958,
+ "step": 2281
+ },
+ {
+ "epoch": 13.04,
+ "grad_norm": 45.11786651611328,
+ "learning_rate": 4.106666666666667e-05,
+ "loss": 0.5586,
+ "step": 2282
+ },
+ {
+ "epoch": 13.045714285714286,
+ "grad_norm": 25.740419387817383,
+ "learning_rate": 4.106031746031746e-05,
+ "loss": 0.4583,
+ "step": 2283
+ },
+ {
+ "epoch": 13.051428571428572,
+ "grad_norm": 31.131811141967773,
+ "learning_rate": 4.1053968253968256e-05,
+ "loss": 0.4216,
+ "step": 2284
+ },
+ {
+ "epoch": 13.057142857142857,
+ "grad_norm": 139.30738830566406,
+ "learning_rate": 4.104761904761905e-05,
+ "loss": 0.5814,
+ "step": 2285
+ },
+ {
+ "epoch": 13.062857142857142,
+ "grad_norm": 34.93601608276367,
+ "learning_rate": 4.104126984126984e-05,
+ "loss": 0.4903,
+ "step": 2286
+ },
+ {
+ "epoch": 13.06857142857143,
+ "grad_norm": 57.43114471435547,
+ "learning_rate": 4.1034920634920634e-05,
+ "loss": 0.6388,
+ "step": 2287
+ },
+ {
+ "epoch": 13.074285714285715,
+ "grad_norm": 32.30340576171875,
+ "learning_rate": 4.1028571428571434e-05,
+ "loss": 0.6437,
+ "step": 2288
+ },
+ {
+ "epoch": 13.08,
+ "grad_norm": 28.308734893798828,
+ "learning_rate": 4.1022222222222226e-05,
+ "loss": 0.495,
+ "step": 2289
+ },
+ {
+ "epoch": 13.085714285714285,
+ "grad_norm": 39.46627426147461,
+ "learning_rate": 4.101587301587302e-05,
+ "loss": 0.4642,
+ "step": 2290
+ },
+ {
+ "epoch": 13.09142857142857,
+ "grad_norm": 91.99317932128906,
+ "learning_rate": 4.100952380952381e-05,
+ "loss": 0.5092,
+ "step": 2291
+ },
+ {
+ "epoch": 13.097142857142858,
+ "grad_norm": 85.90660858154297,
+ "learning_rate": 4.1003174603174604e-05,
+ "loss": 0.3799,
+ "step": 2292
+ },
+ {
+ "epoch": 13.102857142857143,
+ "grad_norm": 66.8467788696289,
+ "learning_rate": 4.09968253968254e-05,
+ "loss": 0.2814,
+ "step": 2293
+ },
+ {
+ "epoch": 13.108571428571429,
+ "grad_norm": 30.89203643798828,
+ "learning_rate": 4.099047619047619e-05,
+ "loss": 0.2674,
+ "step": 2294
+ },
+ {
+ "epoch": 13.114285714285714,
+ "grad_norm": 48.26861572265625,
+ "learning_rate": 4.098412698412699e-05,
+ "loss": 0.478,
+ "step": 2295
+ },
+ {
+ "epoch": 13.12,
+ "grad_norm": 72.69939422607422,
+ "learning_rate": 4.097777777777778e-05,
+ "loss": 0.4094,
+ "step": 2296
+ },
+ {
+ "epoch": 13.125714285714286,
+ "grad_norm": 49.83537673950195,
+ "learning_rate": 4.0971428571428574e-05,
+ "loss": 0.4477,
+ "step": 2297
+ },
+ {
+ "epoch": 13.131428571428572,
+ "grad_norm": 30.371856689453125,
+ "learning_rate": 4.096507936507937e-05,
+ "loss": 0.6477,
+ "step": 2298
+ },
+ {
+ "epoch": 13.137142857142857,
+ "grad_norm": 75.71834564208984,
+ "learning_rate": 4.0958730158730166e-05,
+ "loss": 0.4407,
+ "step": 2299
+ },
+ {
+ "epoch": 13.142857142857142,
+ "grad_norm": 42.12094497680664,
+ "learning_rate": 4.095238095238095e-05,
+ "loss": 0.5816,
+ "step": 2300
+ },
+ {
+ "epoch": 13.14857142857143,
+ "grad_norm": 66.28294372558594,
+ "learning_rate": 4.094603174603175e-05,
+ "loss": 0.5022,
+ "step": 2301
+ },
+ {
+ "epoch": 13.154285714285715,
+ "grad_norm": 62.495208740234375,
+ "learning_rate": 4.093968253968254e-05,
+ "loss": 0.5326,
+ "step": 2302
+ },
+ {
+ "epoch": 13.16,
+ "grad_norm": 26.98307228088379,
+ "learning_rate": 4.093333333333334e-05,
+ "loss": 0.3825,
+ "step": 2303
+ },
+ {
+ "epoch": 13.165714285714285,
+ "grad_norm": 52.273460388183594,
+ "learning_rate": 4.092698412698413e-05,
+ "loss": 0.4312,
+ "step": 2304
+ },
+ {
+ "epoch": 13.17142857142857,
+ "grad_norm": 54.64181900024414,
+ "learning_rate": 4.092063492063492e-05,
+ "loss": 0.4302,
+ "step": 2305
+ },
+ {
+ "epoch": 13.177142857142858,
+ "grad_norm": 72.7457046508789,
+ "learning_rate": 4.0914285714285715e-05,
+ "loss": 0.4043,
+ "step": 2306
+ },
+ {
+ "epoch": 13.182857142857143,
+ "grad_norm": 96.02617645263672,
+ "learning_rate": 4.0907936507936514e-05,
+ "loss": 0.3691,
+ "step": 2307
+ },
+ {
+ "epoch": 13.188571428571429,
+ "grad_norm": 30.374874114990234,
+ "learning_rate": 4.09015873015873e-05,
+ "loss": 0.4787,
+ "step": 2308
+ },
+ {
+ "epoch": 13.194285714285714,
+ "grad_norm": 41.272945404052734,
+ "learning_rate": 4.08952380952381e-05,
+ "loss": 0.4312,
+ "step": 2309
+ },
+ {
+ "epoch": 13.2,
+ "grad_norm": 68.13615417480469,
+ "learning_rate": 4.088888888888889e-05,
+ "loss": 0.6399,
+ "step": 2310
+ },
+ {
+ "epoch": 13.205714285714286,
+ "grad_norm": 26.37989044189453,
+ "learning_rate": 4.0882539682539685e-05,
+ "loss": 0.4583,
+ "step": 2311
+ },
+ {
+ "epoch": 13.211428571428572,
+ "grad_norm": 210.23959350585938,
+ "learning_rate": 4.087619047619048e-05,
+ "loss": 0.4212,
+ "step": 2312
+ },
+ {
+ "epoch": 13.217142857142857,
+ "grad_norm": 47.81938934326172,
+ "learning_rate": 4.086984126984127e-05,
+ "loss": 0.5077,
+ "step": 2313
+ },
+ {
+ "epoch": 13.222857142857142,
+ "grad_norm": 27.632001876831055,
+ "learning_rate": 4.086349206349206e-05,
+ "loss": 0.4903,
+ "step": 2314
+ },
+ {
+ "epoch": 13.228571428571428,
+ "grad_norm": 38.60955810546875,
+ "learning_rate": 4.085714285714286e-05,
+ "loss": 0.7757,
+ "step": 2315
+ },
+ {
+ "epoch": 13.234285714285715,
+ "grad_norm": 35.93268966674805,
+ "learning_rate": 4.085079365079365e-05,
+ "loss": 0.8017,
+ "step": 2316
+ },
+ {
+ "epoch": 13.24,
+ "grad_norm": 45.96718215942383,
+ "learning_rate": 4.084444444444445e-05,
+ "loss": 0.599,
+ "step": 2317
+ },
+ {
+ "epoch": 13.245714285714286,
+ "grad_norm": 54.23589324951172,
+ "learning_rate": 4.083809523809524e-05,
+ "loss": 0.3001,
+ "step": 2318
+ },
+ {
+ "epoch": 13.251428571428571,
+ "grad_norm": 63.831703186035156,
+ "learning_rate": 4.083174603174603e-05,
+ "loss": 0.5831,
+ "step": 2319
+ },
+ {
+ "epoch": 13.257142857142856,
+ "grad_norm": 38.45108413696289,
+ "learning_rate": 4.0825396825396825e-05,
+ "loss": 0.3537,
+ "step": 2320
+ },
+ {
+ "epoch": 13.262857142857143,
+ "grad_norm": 40.974998474121094,
+ "learning_rate": 4.0819047619047624e-05,
+ "loss": 0.4593,
+ "step": 2321
+ },
+ {
+ "epoch": 13.268571428571429,
+ "grad_norm": 35.33351135253906,
+ "learning_rate": 4.081269841269841e-05,
+ "loss": 0.3977,
+ "step": 2322
+ },
+ {
+ "epoch": 13.274285714285714,
+ "grad_norm": 28.143640518188477,
+ "learning_rate": 4.080634920634921e-05,
+ "loss": 0.4476,
+ "step": 2323
+ },
+ {
+ "epoch": 13.28,
+ "grad_norm": 50.97539520263672,
+ "learning_rate": 4.08e-05,
+ "loss": 0.4968,
+ "step": 2324
+ },
+ {
+ "epoch": 13.285714285714286,
+ "grad_norm": 33.4579963684082,
+ "learning_rate": 4.0793650793650795e-05,
+ "loss": 0.3779,
+ "step": 2325
+ },
+ {
+ "epoch": 13.291428571428572,
+ "grad_norm": 113.8723373413086,
+ "learning_rate": 4.078730158730159e-05,
+ "loss": 0.6165,
+ "step": 2326
+ },
+ {
+ "epoch": 13.297142857142857,
+ "grad_norm": 80.13993835449219,
+ "learning_rate": 4.078095238095238e-05,
+ "loss": 0.5253,
+ "step": 2327
+ },
+ {
+ "epoch": 13.302857142857142,
+ "grad_norm": 125.358154296875,
+ "learning_rate": 4.077460317460318e-05,
+ "loss": 0.3868,
+ "step": 2328
+ },
+ {
+ "epoch": 13.308571428571428,
+ "grad_norm": 24.944622039794922,
+ "learning_rate": 4.076825396825397e-05,
+ "loss": 0.4771,
+ "step": 2329
+ },
+ {
+ "epoch": 13.314285714285715,
+ "grad_norm": 44.7222900390625,
+ "learning_rate": 4.0761904761904765e-05,
+ "loss": 0.5108,
+ "step": 2330
+ },
+ {
+ "epoch": 13.32,
+ "grad_norm": 68.44735717773438,
+ "learning_rate": 4.075555555555556e-05,
+ "loss": 0.4781,
+ "step": 2331
+ },
+ {
+ "epoch": 13.325714285714286,
+ "grad_norm": 95.55236053466797,
+ "learning_rate": 4.074920634920635e-05,
+ "loss": 0.4965,
+ "step": 2332
+ },
+ {
+ "epoch": 13.331428571428571,
+ "grad_norm": 29.676864624023438,
+ "learning_rate": 4.074285714285714e-05,
+ "loss": 0.5067,
+ "step": 2333
+ },
+ {
+ "epoch": 13.337142857142856,
+ "grad_norm": 54.00707244873047,
+ "learning_rate": 4.073650793650794e-05,
+ "loss": 0.393,
+ "step": 2334
+ },
+ {
+ "epoch": 13.342857142857143,
+ "grad_norm": 79.81952667236328,
+ "learning_rate": 4.073015873015873e-05,
+ "loss": 0.4672,
+ "step": 2335
+ },
+ {
+ "epoch": 13.348571428571429,
+ "grad_norm": 74.93247985839844,
+ "learning_rate": 4.072380952380953e-05,
+ "loss": 0.3498,
+ "step": 2336
+ },
+ {
+ "epoch": 13.354285714285714,
+ "grad_norm": 25.939695358276367,
+ "learning_rate": 4.071746031746032e-05,
+ "loss": 0.4535,
+ "step": 2337
+ },
+ {
+ "epoch": 13.36,
+ "grad_norm": 38.312496185302734,
+ "learning_rate": 4.071111111111111e-05,
+ "loss": 0.4496,
+ "step": 2338
+ },
+ {
+ "epoch": 13.365714285714287,
+ "grad_norm": 63.252864837646484,
+ "learning_rate": 4.0704761904761905e-05,
+ "loss": 0.3789,
+ "step": 2339
+ },
+ {
+ "epoch": 13.371428571428572,
+ "grad_norm": 38.034812927246094,
+ "learning_rate": 4.0698412698412705e-05,
+ "loss": 0.379,
+ "step": 2340
+ },
+ {
+ "epoch": 13.377142857142857,
+ "grad_norm": 107.1825180053711,
+ "learning_rate": 4.069206349206349e-05,
+ "loss": 0.4296,
+ "step": 2341
+ },
+ {
+ "epoch": 13.382857142857143,
+ "grad_norm": 48.01869201660156,
+ "learning_rate": 4.068571428571429e-05,
+ "loss": 0.5187,
+ "step": 2342
+ },
+ {
+ "epoch": 13.388571428571428,
+ "grad_norm": 98.60651397705078,
+ "learning_rate": 4.067936507936508e-05,
+ "loss": 0.7235,
+ "step": 2343
+ },
+ {
+ "epoch": 13.394285714285715,
+ "grad_norm": 38.27113723754883,
+ "learning_rate": 4.0673015873015875e-05,
+ "loss": 0.742,
+ "step": 2344
+ },
+ {
+ "epoch": 13.4,
+ "grad_norm": 24.036109924316406,
+ "learning_rate": 4.066666666666667e-05,
+ "loss": 0.5235,
+ "step": 2345
+ },
+ {
+ "epoch": 13.405714285714286,
+ "grad_norm": 27.38574981689453,
+ "learning_rate": 4.066031746031746e-05,
+ "loss": 0.6086,
+ "step": 2346
+ },
+ {
+ "epoch": 13.411428571428571,
+ "grad_norm": 42.157020568847656,
+ "learning_rate": 4.065396825396825e-05,
+ "loss": 0.3587,
+ "step": 2347
+ },
+ {
+ "epoch": 13.417142857142856,
+ "grad_norm": 46.96993637084961,
+ "learning_rate": 4.064761904761905e-05,
+ "loss": 0.3667,
+ "step": 2348
+ },
+ {
+ "epoch": 13.422857142857143,
+ "grad_norm": 115.64402770996094,
+ "learning_rate": 4.064126984126984e-05,
+ "loss": 0.3629,
+ "step": 2349
+ },
+ {
+ "epoch": 13.428571428571429,
+ "grad_norm": 25.6961727142334,
+ "learning_rate": 4.063492063492064e-05,
+ "loss": 0.5351,
+ "step": 2350
+ },
+ {
+ "epoch": 13.434285714285714,
+ "grad_norm": 31.624475479125977,
+ "learning_rate": 4.062857142857143e-05,
+ "loss": 0.3819,
+ "step": 2351
+ },
+ {
+ "epoch": 13.44,
+ "grad_norm": 90.46236419677734,
+ "learning_rate": 4.062222222222222e-05,
+ "loss": 0.6151,
+ "step": 2352
+ },
+ {
+ "epoch": 13.445714285714285,
+ "grad_norm": 100.17254638671875,
+ "learning_rate": 4.0615873015873016e-05,
+ "loss": 0.3905,
+ "step": 2353
+ },
+ {
+ "epoch": 13.451428571428572,
+ "grad_norm": 219.26023864746094,
+ "learning_rate": 4.0609523809523815e-05,
+ "loss": 0.5057,
+ "step": 2354
+ },
+ {
+ "epoch": 13.457142857142857,
+ "grad_norm": 52.1915283203125,
+ "learning_rate": 4.06031746031746e-05,
+ "loss": 0.3385,
+ "step": 2355
+ },
+ {
+ "epoch": 13.462857142857143,
+ "grad_norm": 30.643356323242188,
+ "learning_rate": 4.05968253968254e-05,
+ "loss": 0.9016,
+ "step": 2356
+ },
+ {
+ "epoch": 13.468571428571428,
+ "grad_norm": 61.73093795776367,
+ "learning_rate": 4.059047619047619e-05,
+ "loss": 0.4592,
+ "step": 2357
+ },
+ {
+ "epoch": 13.474285714285715,
+ "grad_norm": 102.5331039428711,
+ "learning_rate": 4.0584126984126986e-05,
+ "loss": 0.6305,
+ "step": 2358
+ },
+ {
+ "epoch": 13.48,
+ "grad_norm": 35.591346740722656,
+ "learning_rate": 4.057777777777778e-05,
+ "loss": 0.3772,
+ "step": 2359
+ },
+ {
+ "epoch": 13.485714285714286,
+ "grad_norm": 235.7429962158203,
+ "learning_rate": 4.057142857142857e-05,
+ "loss": 0.5206,
+ "step": 2360
+ },
+ {
+ "epoch": 13.491428571428571,
+ "grad_norm": 82.47492218017578,
+ "learning_rate": 4.0565079365079364e-05,
+ "loss": 0.3429,
+ "step": 2361
+ },
+ {
+ "epoch": 13.497142857142856,
+ "grad_norm": 38.43523406982422,
+ "learning_rate": 4.055873015873016e-05,
+ "loss": 0.7014,
+ "step": 2362
+ },
+ {
+ "epoch": 13.502857142857144,
+ "grad_norm": 76.04268646240234,
+ "learning_rate": 4.0552380952380956e-05,
+ "loss": 0.478,
+ "step": 2363
+ },
+ {
+ "epoch": 13.508571428571429,
+ "grad_norm": 33.30377960205078,
+ "learning_rate": 4.054603174603175e-05,
+ "loss": 0.3771,
+ "step": 2364
+ },
+ {
+ "epoch": 13.514285714285714,
+ "grad_norm": 33.07572555541992,
+ "learning_rate": 4.053968253968254e-05,
+ "loss": 0.442,
+ "step": 2365
+ },
+ {
+ "epoch": 13.52,
+ "grad_norm": 47.697723388671875,
+ "learning_rate": 4.0533333333333334e-05,
+ "loss": 0.3184,
+ "step": 2366
+ },
+ {
+ "epoch": 13.525714285714285,
+ "grad_norm": 66.57059478759766,
+ "learning_rate": 4.052698412698413e-05,
+ "loss": 0.3617,
+ "step": 2367
+ },
+ {
+ "epoch": 13.531428571428572,
+ "grad_norm": 17.434370040893555,
+ "learning_rate": 4.052063492063492e-05,
+ "loss": 0.4504,
+ "step": 2368
+ },
+ {
+ "epoch": 13.537142857142857,
+ "grad_norm": 22.381181716918945,
+ "learning_rate": 4.051428571428572e-05,
+ "loss": 0.4599,
+ "step": 2369
+ },
+ {
+ "epoch": 13.542857142857143,
+ "grad_norm": 65.71564483642578,
+ "learning_rate": 4.050793650793651e-05,
+ "loss": 0.2784,
+ "step": 2370
+ },
+ {
+ "epoch": 13.548571428571428,
+ "grad_norm": 64.0141372680664,
+ "learning_rate": 4.0501587301587304e-05,
+ "loss": 0.3171,
+ "step": 2371
+ },
+ {
+ "epoch": 13.554285714285715,
+ "grad_norm": 124.51643371582031,
+ "learning_rate": 4.0495238095238096e-05,
+ "loss": 0.6788,
+ "step": 2372
+ },
+ {
+ "epoch": 13.56,
+ "grad_norm": 30.546184539794922,
+ "learning_rate": 4.0488888888888896e-05,
+ "loss": 0.6532,
+ "step": 2373
+ },
+ {
+ "epoch": 13.565714285714286,
+ "grad_norm": 104.8770523071289,
+ "learning_rate": 4.048253968253968e-05,
+ "loss": 0.3769,
+ "step": 2374
+ },
+ {
+ "epoch": 13.571428571428571,
+ "grad_norm": 52.30247116088867,
+ "learning_rate": 4.047619047619048e-05,
+ "loss": 0.624,
+ "step": 2375
+ },
+ {
+ "epoch": 13.577142857142857,
+ "grad_norm": 37.756839752197266,
+ "learning_rate": 4.0469841269841274e-05,
+ "loss": 0.592,
+ "step": 2376
+ },
+ {
+ "epoch": 13.582857142857144,
+ "grad_norm": 64.60394287109375,
+ "learning_rate": 4.0463492063492066e-05,
+ "loss": 0.4058,
+ "step": 2377
+ },
+ {
+ "epoch": 13.588571428571429,
+ "grad_norm": 53.130348205566406,
+ "learning_rate": 4.045714285714286e-05,
+ "loss": 0.4623,
+ "step": 2378
+ },
+ {
+ "epoch": 13.594285714285714,
+ "grad_norm": 29.630857467651367,
+ "learning_rate": 4.045079365079365e-05,
+ "loss": 0.4196,
+ "step": 2379
+ },
+ {
+ "epoch": 13.6,
+ "grad_norm": 46.71171188354492,
+ "learning_rate": 4.0444444444444444e-05,
+ "loss": 0.5538,
+ "step": 2380
+ },
+ {
+ "epoch": 13.605714285714285,
+ "grad_norm": 38.45140838623047,
+ "learning_rate": 4.0438095238095244e-05,
+ "loss": 0.3889,
+ "step": 2381
+ },
+ {
+ "epoch": 13.611428571428572,
+ "grad_norm": 762.2544555664062,
+ "learning_rate": 4.043174603174603e-05,
+ "loss": 0.345,
+ "step": 2382
+ },
+ {
+ "epoch": 13.617142857142857,
+ "grad_norm": 403.2573547363281,
+ "learning_rate": 4.042539682539683e-05,
+ "loss": 0.3458,
+ "step": 2383
+ },
+ {
+ "epoch": 13.622857142857143,
+ "grad_norm": 40.29979705810547,
+ "learning_rate": 4.041904761904762e-05,
+ "loss": 0.2905,
+ "step": 2384
+ },
+ {
+ "epoch": 13.628571428571428,
+ "grad_norm": 56.68416213989258,
+ "learning_rate": 4.0412698412698414e-05,
+ "loss": 0.4291,
+ "step": 2385
+ },
+ {
+ "epoch": 13.634285714285713,
+ "grad_norm": 25.499067306518555,
+ "learning_rate": 4.040634920634921e-05,
+ "loss": 0.4255,
+ "step": 2386
+ },
+ {
+ "epoch": 13.64,
+ "grad_norm": 34.191986083984375,
+ "learning_rate": 4.0400000000000006e-05,
+ "loss": 0.5086,
+ "step": 2387
+ },
+ {
+ "epoch": 13.645714285714286,
+ "grad_norm": 21.267122268676758,
+ "learning_rate": 4.039365079365079e-05,
+ "loss": 0.5265,
+ "step": 2388
+ },
+ {
+ "epoch": 13.651428571428571,
+ "grad_norm": 33.64370346069336,
+ "learning_rate": 4.038730158730159e-05,
+ "loss": 0.5539,
+ "step": 2389
+ },
+ {
+ "epoch": 13.657142857142857,
+ "grad_norm": 43.58633804321289,
+ "learning_rate": 4.038095238095238e-05,
+ "loss": 0.3569,
+ "step": 2390
+ },
+ {
+ "epoch": 13.662857142857142,
+ "grad_norm": 57.552650451660156,
+ "learning_rate": 4.037460317460318e-05,
+ "loss": 0.331,
+ "step": 2391
+ },
+ {
+ "epoch": 13.668571428571429,
+ "grad_norm": 17.721899032592773,
+ "learning_rate": 4.036825396825397e-05,
+ "loss": 0.5111,
+ "step": 2392
+ },
+ {
+ "epoch": 13.674285714285714,
+ "grad_norm": 49.80946731567383,
+ "learning_rate": 4.036190476190476e-05,
+ "loss": 0.6951,
+ "step": 2393
+ },
+ {
+ "epoch": 13.68,
+ "grad_norm": 64.67133331298828,
+ "learning_rate": 4.0355555555555555e-05,
+ "loss": 0.4337,
+ "step": 2394
+ },
+ {
+ "epoch": 13.685714285714285,
+ "grad_norm": 32.472862243652344,
+ "learning_rate": 4.0349206349206354e-05,
+ "loss": 0.4215,
+ "step": 2395
+ },
+ {
+ "epoch": 13.691428571428572,
+ "grad_norm": 22.962915420532227,
+ "learning_rate": 4.034285714285715e-05,
+ "loss": 0.498,
+ "step": 2396
+ },
+ {
+ "epoch": 13.697142857142858,
+ "grad_norm": 287.5697021484375,
+ "learning_rate": 4.033650793650794e-05,
+ "loss": 0.4389,
+ "step": 2397
+ },
+ {
+ "epoch": 13.702857142857143,
+ "grad_norm": 145.85488891601562,
+ "learning_rate": 4.033015873015873e-05,
+ "loss": 0.3825,
+ "step": 2398
+ },
+ {
+ "epoch": 13.708571428571428,
+ "grad_norm": 22.51424217224121,
+ "learning_rate": 4.0323809523809525e-05,
+ "loss": 0.4401,
+ "step": 2399
+ },
+ {
+ "epoch": 13.714285714285714,
+ "grad_norm": 54.225807189941406,
+ "learning_rate": 4.031746031746032e-05,
+ "loss": 0.4892,
+ "step": 2400
+ },
+ {
+ "epoch": 13.72,
+ "grad_norm": 58.608455657958984,
+ "learning_rate": 4.031111111111111e-05,
+ "loss": 0.6965,
+ "step": 2401
+ },
+ {
+ "epoch": 13.725714285714286,
+ "grad_norm": 97.2522964477539,
+ "learning_rate": 4.030476190476191e-05,
+ "loss": 0.5691,
+ "step": 2402
+ },
+ {
+ "epoch": 13.731428571428571,
+ "grad_norm": 44.312068939208984,
+ "learning_rate": 4.02984126984127e-05,
+ "loss": 0.3953,
+ "step": 2403
+ },
+ {
+ "epoch": 13.737142857142857,
+ "grad_norm": 31.484203338623047,
+ "learning_rate": 4.0292063492063495e-05,
+ "loss": 0.6223,
+ "step": 2404
+ },
+ {
+ "epoch": 13.742857142857144,
+ "grad_norm": 37.49378204345703,
+ "learning_rate": 4.028571428571429e-05,
+ "loss": 0.4558,
+ "step": 2405
+ },
+ {
+ "epoch": 13.748571428571429,
+ "grad_norm": 23.190092086791992,
+ "learning_rate": 4.027936507936509e-05,
+ "loss": 0.4926,
+ "step": 2406
+ },
+ {
+ "epoch": 13.754285714285714,
+ "grad_norm": 51.90544509887695,
+ "learning_rate": 4.027301587301587e-05,
+ "loss": 0.5645,
+ "step": 2407
+ },
+ {
+ "epoch": 13.76,
+ "grad_norm": 56.01433181762695,
+ "learning_rate": 4.026666666666667e-05,
+ "loss": 0.4741,
+ "step": 2408
+ },
+ {
+ "epoch": 13.765714285714285,
+ "grad_norm": 26.729398727416992,
+ "learning_rate": 4.0260317460317465e-05,
+ "loss": 0.3693,
+ "step": 2409
+ },
+ {
+ "epoch": 13.771428571428572,
+ "grad_norm": 66.43302917480469,
+ "learning_rate": 4.025396825396826e-05,
+ "loss": 0.4163,
+ "step": 2410
+ },
+ {
+ "epoch": 13.777142857142858,
+ "grad_norm": 73.76277923583984,
+ "learning_rate": 4.024761904761905e-05,
+ "loss": 0.3351,
+ "step": 2411
+ },
+ {
+ "epoch": 13.782857142857143,
+ "grad_norm": 24.860280990600586,
+ "learning_rate": 4.024126984126984e-05,
+ "loss": 0.37,
+ "step": 2412
+ },
+ {
+ "epoch": 13.788571428571428,
+ "grad_norm": 24.26219367980957,
+ "learning_rate": 4.0234920634920635e-05,
+ "loss": 0.5209,
+ "step": 2413
+ },
+ {
+ "epoch": 13.794285714285714,
+ "grad_norm": 75.13851165771484,
+ "learning_rate": 4.0228571428571434e-05,
+ "loss": 0.4331,
+ "step": 2414
+ },
+ {
+ "epoch": 13.8,
+ "grad_norm": 76.9747314453125,
+ "learning_rate": 4.022222222222222e-05,
+ "loss": 0.6054,
+ "step": 2415
+ },
+ {
+ "epoch": 13.805714285714286,
+ "grad_norm": 67.45930480957031,
+ "learning_rate": 4.021587301587302e-05,
+ "loss": 0.5507,
+ "step": 2416
+ },
+ {
+ "epoch": 13.811428571428571,
+ "grad_norm": 60.373104095458984,
+ "learning_rate": 4.020952380952381e-05,
+ "loss": 0.3903,
+ "step": 2417
+ },
+ {
+ "epoch": 13.817142857142857,
+ "grad_norm": 35.6954345703125,
+ "learning_rate": 4.0203174603174605e-05,
+ "loss": 0.5955,
+ "step": 2418
+ },
+ {
+ "epoch": 13.822857142857142,
+ "grad_norm": 29.355072021484375,
+ "learning_rate": 4.01968253968254e-05,
+ "loss": 0.5763,
+ "step": 2419
+ },
+ {
+ "epoch": 13.82857142857143,
+ "grad_norm": 22.599504470825195,
+ "learning_rate": 4.01904761904762e-05,
+ "loss": 0.6903,
+ "step": 2420
+ },
+ {
+ "epoch": 13.834285714285715,
+ "grad_norm": 44.9334831237793,
+ "learning_rate": 4.018412698412698e-05,
+ "loss": 0.3831,
+ "step": 2421
+ },
+ {
+ "epoch": 13.84,
+ "grad_norm": 24.91976547241211,
+ "learning_rate": 4.017777777777778e-05,
+ "loss": 0.6801,
+ "step": 2422
+ },
+ {
+ "epoch": 13.845714285714285,
+ "grad_norm": 42.77363204956055,
+ "learning_rate": 4.017142857142857e-05,
+ "loss": 0.3493,
+ "step": 2423
+ },
+ {
+ "epoch": 13.85142857142857,
+ "grad_norm": 57.37465286254883,
+ "learning_rate": 4.016507936507937e-05,
+ "loss": 0.437,
+ "step": 2424
+ },
+ {
+ "epoch": 13.857142857142858,
+ "grad_norm": 47.02690887451172,
+ "learning_rate": 4.015873015873016e-05,
+ "loss": 0.427,
+ "step": 2425
+ },
+ {
+ "epoch": 13.862857142857143,
+ "grad_norm": 76.19194793701172,
+ "learning_rate": 4.015238095238095e-05,
+ "loss": 0.558,
+ "step": 2426
+ },
+ {
+ "epoch": 13.868571428571428,
+ "grad_norm": 29.325809478759766,
+ "learning_rate": 4.0146031746031746e-05,
+ "loss": 0.4203,
+ "step": 2427
+ },
+ {
+ "epoch": 13.874285714285714,
+ "grad_norm": 19.02168846130371,
+ "learning_rate": 4.0139682539682545e-05,
+ "loss": 0.4868,
+ "step": 2428
+ },
+ {
+ "epoch": 13.88,
+ "grad_norm": 26.71924591064453,
+ "learning_rate": 4.013333333333333e-05,
+ "loss": 0.5272,
+ "step": 2429
+ },
+ {
+ "epoch": 13.885714285714286,
+ "grad_norm": 34.36274337768555,
+ "learning_rate": 4.012698412698413e-05,
+ "loss": 0.5817,
+ "step": 2430
+ },
+ {
+ "epoch": 13.891428571428571,
+ "grad_norm": 41.352142333984375,
+ "learning_rate": 4.012063492063492e-05,
+ "loss": 0.3324,
+ "step": 2431
+ },
+ {
+ "epoch": 13.897142857142857,
+ "grad_norm": 52.86463165283203,
+ "learning_rate": 4.0114285714285715e-05,
+ "loss": 0.4886,
+ "step": 2432
+ },
+ {
+ "epoch": 13.902857142857142,
+ "grad_norm": 49.43160629272461,
+ "learning_rate": 4.010793650793651e-05,
+ "loss": 0.7166,
+ "step": 2433
+ },
+ {
+ "epoch": 13.90857142857143,
+ "grad_norm": 17.74382209777832,
+ "learning_rate": 4.01015873015873e-05,
+ "loss": 0.3946,
+ "step": 2434
+ },
+ {
+ "epoch": 13.914285714285715,
+ "grad_norm": 68.80399322509766,
+ "learning_rate": 4.00952380952381e-05,
+ "loss": 0.4132,
+ "step": 2435
+ },
+ {
+ "epoch": 13.92,
+ "grad_norm": 280.85858154296875,
+ "learning_rate": 4.008888888888889e-05,
+ "loss": 0.3927,
+ "step": 2436
+ },
+ {
+ "epoch": 13.925714285714285,
+ "grad_norm": 455.3421630859375,
+ "learning_rate": 4.0082539682539685e-05,
+ "loss": 0.418,
+ "step": 2437
+ },
+ {
+ "epoch": 13.93142857142857,
+ "grad_norm": 37.32456970214844,
+ "learning_rate": 4.007619047619048e-05,
+ "loss": 0.411,
+ "step": 2438
+ },
+ {
+ "epoch": 13.937142857142858,
+ "grad_norm": 86.37480926513672,
+ "learning_rate": 4.006984126984127e-05,
+ "loss": 0.4207,
+ "step": 2439
+ },
+ {
+ "epoch": 13.942857142857143,
+ "grad_norm": 148.76866149902344,
+ "learning_rate": 4.006349206349206e-05,
+ "loss": 0.538,
+ "step": 2440
+ },
+ {
+ "epoch": 13.948571428571428,
+ "grad_norm": 67.73564147949219,
+ "learning_rate": 4.005714285714286e-05,
+ "loss": 0.4706,
+ "step": 2441
+ },
+ {
+ "epoch": 13.954285714285714,
+ "grad_norm": 54.38566589355469,
+ "learning_rate": 4.0050793650793655e-05,
+ "loss": 0.7123,
+ "step": 2442
+ },
+ {
+ "epoch": 13.96,
+ "grad_norm": 50.352149963378906,
+ "learning_rate": 4.004444444444445e-05,
+ "loss": 0.6072,
+ "step": 2443
+ },
+ {
+ "epoch": 13.965714285714286,
+ "grad_norm": 234.63272094726562,
+ "learning_rate": 4.003809523809524e-05,
+ "loss": 0.4392,
+ "step": 2444
+ },
+ {
+ "epoch": 13.971428571428572,
+ "grad_norm": 58.83427047729492,
+ "learning_rate": 4.003174603174603e-05,
+ "loss": 0.4459,
+ "step": 2445
+ },
+ {
+ "epoch": 13.977142857142857,
+ "grad_norm": 28.319311141967773,
+ "learning_rate": 4.0025396825396826e-05,
+ "loss": 0.4224,
+ "step": 2446
+ },
+ {
+ "epoch": 13.982857142857142,
+ "grad_norm": 21.747053146362305,
+ "learning_rate": 4.0019047619047625e-05,
+ "loss": 0.675,
+ "step": 2447
+ },
+ {
+ "epoch": 13.98857142857143,
+ "grad_norm": 74.70608520507812,
+ "learning_rate": 4.001269841269841e-05,
+ "loss": 0.5733,
+ "step": 2448
+ },
+ {
+ "epoch": 13.994285714285715,
+ "grad_norm": 71.20796966552734,
+ "learning_rate": 4.000634920634921e-05,
+ "loss": 0.4269,
+ "step": 2449
+ },
+ {
+ "epoch": 14.0,
+ "grad_norm": 24.509675979614258,
+ "learning_rate": 4e-05,
+ "loss": 0.446,
+ "step": 2450
+ },
+ {
+ "epoch": 14.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6337568759918213,
+ "eval_map": 0.9186,
+ "eval_map_50": 0.96,
+ "eval_map_75": 0.9528,
+ "eval_map_large": 0.9186,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9186,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.786,
+ "eval_mar_10": 0.9676,
+ "eval_mar_100": 0.9752,
+ "eval_mar_100_per_class": 0.9752,
+ "eval_mar_large": 0.9752,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 15.4814,
+ "eval_samples_per_second": 18.991,
+ "eval_steps_per_second": 2.39,
+ "step": 2450
+ },
+ {
+ "epoch": 14.005714285714285,
+ "grad_norm": 36.81268310546875,
+ "learning_rate": 3.9993650793650796e-05,
+ "loss": 0.3741,
+ "step": 2451
+ },
+ {
+ "epoch": 14.01142857142857,
+ "grad_norm": 35.905799865722656,
+ "learning_rate": 3.998730158730159e-05,
+ "loss": 0.4541,
+ "step": 2452
+ },
+ {
+ "epoch": 14.017142857142858,
+ "grad_norm": 23.208505630493164,
+ "learning_rate": 3.998095238095239e-05,
+ "loss": 0.3826,
+ "step": 2453
+ },
+ {
+ "epoch": 14.022857142857143,
+ "grad_norm": 22.888587951660156,
+ "learning_rate": 3.9974603174603174e-05,
+ "loss": 0.5514,
+ "step": 2454
+ },
+ {
+ "epoch": 14.028571428571428,
+ "grad_norm": 41.91596603393555,
+ "learning_rate": 3.996825396825397e-05,
+ "loss": 0.3492,
+ "step": 2455
+ },
+ {
+ "epoch": 14.034285714285714,
+ "grad_norm": 34.92245864868164,
+ "learning_rate": 3.996190476190476e-05,
+ "loss": 0.4104,
+ "step": 2456
+ },
+ {
+ "epoch": 14.04,
+ "grad_norm": 29.703365325927734,
+ "learning_rate": 3.995555555555556e-05,
+ "loss": 0.4348,
+ "step": 2457
+ },
+ {
+ "epoch": 14.045714285714286,
+ "grad_norm": 60.400856018066406,
+ "learning_rate": 3.994920634920635e-05,
+ "loss": 0.515,
+ "step": 2458
+ },
+ {
+ "epoch": 14.051428571428572,
+ "grad_norm": 55.89780807495117,
+ "learning_rate": 3.9942857142857144e-05,
+ "loss": 0.5957,
+ "step": 2459
+ },
+ {
+ "epoch": 14.057142857142857,
+ "grad_norm": 40.55846405029297,
+ "learning_rate": 3.9936507936507936e-05,
+ "loss": 0.457,
+ "step": 2460
+ },
+ {
+ "epoch": 14.062857142857142,
+ "grad_norm": 109.13560485839844,
+ "learning_rate": 3.9930158730158736e-05,
+ "loss": 0.3506,
+ "step": 2461
+ },
+ {
+ "epoch": 14.06857142857143,
+ "grad_norm": 79.74500274658203,
+ "learning_rate": 3.992380952380952e-05,
+ "loss": 0.36,
+ "step": 2462
+ },
+ {
+ "epoch": 14.074285714285715,
+ "grad_norm": 20.207691192626953,
+ "learning_rate": 3.991746031746032e-05,
+ "loss": 0.6307,
+ "step": 2463
+ },
+ {
+ "epoch": 14.08,
+ "grad_norm": 68.04793548583984,
+ "learning_rate": 3.9911111111111114e-05,
+ "loss": 0.4574,
+ "step": 2464
+ },
+ {
+ "epoch": 14.085714285714285,
+ "grad_norm": 261.51654052734375,
+ "learning_rate": 3.9904761904761906e-05,
+ "loss": 0.4338,
+ "step": 2465
+ },
+ {
+ "epoch": 14.09142857142857,
+ "grad_norm": 40.91068649291992,
+ "learning_rate": 3.98984126984127e-05,
+ "loss": 0.5558,
+ "step": 2466
+ },
+ {
+ "epoch": 14.097142857142858,
+ "grad_norm": 30.04355239868164,
+ "learning_rate": 3.989206349206349e-05,
+ "loss": 0.3277,
+ "step": 2467
+ },
+ {
+ "epoch": 14.102857142857143,
+ "grad_norm": 37.32410430908203,
+ "learning_rate": 3.9885714285714284e-05,
+ "loss": 0.5457,
+ "step": 2468
+ },
+ {
+ "epoch": 14.108571428571429,
+ "grad_norm": 32.55274963378906,
+ "learning_rate": 3.9879365079365084e-05,
+ "loss": 0.463,
+ "step": 2469
+ },
+ {
+ "epoch": 14.114285714285714,
+ "grad_norm": 19.690839767456055,
+ "learning_rate": 3.9873015873015876e-05,
+ "loss": 0.4994,
+ "step": 2470
+ },
+ {
+ "epoch": 14.12,
+ "grad_norm": 38.527259826660156,
+ "learning_rate": 3.986666666666667e-05,
+ "loss": 0.4435,
+ "step": 2471
+ },
+ {
+ "epoch": 14.125714285714286,
+ "grad_norm": 41.78245544433594,
+ "learning_rate": 3.986031746031746e-05,
+ "loss": 0.465,
+ "step": 2472
+ },
+ {
+ "epoch": 14.131428571428572,
+ "grad_norm": 34.67498016357422,
+ "learning_rate": 3.9853968253968254e-05,
+ "loss": 0.44,
+ "step": 2473
+ },
+ {
+ "epoch": 14.137142857142857,
+ "grad_norm": 27.37482261657715,
+ "learning_rate": 3.9847619047619054e-05,
+ "loss": 0.3372,
+ "step": 2474
+ },
+ {
+ "epoch": 14.142857142857142,
+ "grad_norm": 156.32009887695312,
+ "learning_rate": 3.984126984126984e-05,
+ "loss": 0.5525,
+ "step": 2475
+ },
+ {
+ "epoch": 14.14857142857143,
+ "grad_norm": 45.042030334472656,
+ "learning_rate": 3.983492063492064e-05,
+ "loss": 0.3852,
+ "step": 2476
+ },
+ {
+ "epoch": 14.154285714285715,
+ "grad_norm": 93.20071411132812,
+ "learning_rate": 3.982857142857143e-05,
+ "loss": 0.4532,
+ "step": 2477
+ },
+ {
+ "epoch": 14.16,
+ "grad_norm": 54.58212661743164,
+ "learning_rate": 3.9822222222222224e-05,
+ "loss": 0.3697,
+ "step": 2478
+ },
+ {
+ "epoch": 14.165714285714285,
+ "grad_norm": 74.4084243774414,
+ "learning_rate": 3.981587301587302e-05,
+ "loss": 0.4921,
+ "step": 2479
+ },
+ {
+ "epoch": 14.17142857142857,
+ "grad_norm": 70.5937728881836,
+ "learning_rate": 3.9809523809523816e-05,
+ "loss": 0.7412,
+ "step": 2480
+ },
+ {
+ "epoch": 14.177142857142858,
+ "grad_norm": 85.14163208007812,
+ "learning_rate": 3.98031746031746e-05,
+ "loss": 0.6297,
+ "step": 2481
+ },
+ {
+ "epoch": 14.182857142857143,
+ "grad_norm": 29.404062271118164,
+ "learning_rate": 3.97968253968254e-05,
+ "loss": 0.615,
+ "step": 2482
+ },
+ {
+ "epoch": 14.188571428571429,
+ "grad_norm": 284.3392028808594,
+ "learning_rate": 3.9790476190476194e-05,
+ "loss": 0.4211,
+ "step": 2483
+ },
+ {
+ "epoch": 14.194285714285714,
+ "grad_norm": 46.706485748291016,
+ "learning_rate": 3.978412698412699e-05,
+ "loss": 0.3809,
+ "step": 2484
+ },
+ {
+ "epoch": 14.2,
+ "grad_norm": 40.72541046142578,
+ "learning_rate": 3.977777777777778e-05,
+ "loss": 0.4838,
+ "step": 2485
+ },
+ {
+ "epoch": 14.205714285714286,
+ "grad_norm": 24.260883331298828,
+ "learning_rate": 3.977142857142857e-05,
+ "loss": 0.4957,
+ "step": 2486
+ },
+ {
+ "epoch": 14.211428571428572,
+ "grad_norm": 29.26984977722168,
+ "learning_rate": 3.9765079365079365e-05,
+ "loss": 0.429,
+ "step": 2487
+ },
+ {
+ "epoch": 14.217142857142857,
+ "grad_norm": 101.20543670654297,
+ "learning_rate": 3.9758730158730164e-05,
+ "loss": 0.4955,
+ "step": 2488
+ },
+ {
+ "epoch": 14.222857142857142,
+ "grad_norm": 28.793743133544922,
+ "learning_rate": 3.975238095238095e-05,
+ "loss": 0.5254,
+ "step": 2489
+ },
+ {
+ "epoch": 14.228571428571428,
+ "grad_norm": 84.4249267578125,
+ "learning_rate": 3.974603174603175e-05,
+ "loss": 0.3372,
+ "step": 2490
+ },
+ {
+ "epoch": 14.234285714285715,
+ "grad_norm": 65.11558532714844,
+ "learning_rate": 3.973968253968254e-05,
+ "loss": 0.5795,
+ "step": 2491
+ },
+ {
+ "epoch": 14.24,
+ "grad_norm": 51.24958419799805,
+ "learning_rate": 3.9733333333333335e-05,
+ "loss": 0.5393,
+ "step": 2492
+ },
+ {
+ "epoch": 14.245714285714286,
+ "grad_norm": 37.17159652709961,
+ "learning_rate": 3.972698412698413e-05,
+ "loss": 0.5153,
+ "step": 2493
+ },
+ {
+ "epoch": 14.251428571428571,
+ "grad_norm": 28.747060775756836,
+ "learning_rate": 3.972063492063493e-05,
+ "loss": 0.5044,
+ "step": 2494
+ },
+ {
+ "epoch": 14.257142857142856,
+ "grad_norm": 29.33413314819336,
+ "learning_rate": 3.971428571428571e-05,
+ "loss": 0.602,
+ "step": 2495
+ },
+ {
+ "epoch": 14.262857142857143,
+ "grad_norm": 70.5453872680664,
+ "learning_rate": 3.970793650793651e-05,
+ "loss": 0.5681,
+ "step": 2496
+ },
+ {
+ "epoch": 14.268571428571429,
+ "grad_norm": 44.95606994628906,
+ "learning_rate": 3.97015873015873e-05,
+ "loss": 0.3336,
+ "step": 2497
+ },
+ {
+ "epoch": 14.274285714285714,
+ "grad_norm": 47.51212692260742,
+ "learning_rate": 3.96952380952381e-05,
+ "loss": 0.5591,
+ "step": 2498
+ },
+ {
+ "epoch": 14.28,
+ "grad_norm": 39.193973541259766,
+ "learning_rate": 3.968888888888889e-05,
+ "loss": 0.3975,
+ "step": 2499
+ },
+ {
+ "epoch": 14.285714285714286,
+ "grad_norm": 151.0368194580078,
+ "learning_rate": 3.968253968253968e-05,
+ "loss": 0.4476,
+ "step": 2500
+ },
+ {
+ "epoch": 14.291428571428572,
+ "grad_norm": 27.16161346435547,
+ "learning_rate": 3.9676190476190475e-05,
+ "loss": 0.3779,
+ "step": 2501
+ },
+ {
+ "epoch": 14.297142857142857,
+ "grad_norm": 35.30384826660156,
+ "learning_rate": 3.9669841269841275e-05,
+ "loss": 0.5124,
+ "step": 2502
+ },
+ {
+ "epoch": 14.302857142857142,
+ "grad_norm": 26.686216354370117,
+ "learning_rate": 3.966349206349206e-05,
+ "loss": 0.4631,
+ "step": 2503
+ },
+ {
+ "epoch": 14.308571428571428,
+ "grad_norm": 26.295207977294922,
+ "learning_rate": 3.965714285714286e-05,
+ "loss": 0.4499,
+ "step": 2504
+ },
+ {
+ "epoch": 14.314285714285715,
+ "grad_norm": 33.198848724365234,
+ "learning_rate": 3.965079365079365e-05,
+ "loss": 0.408,
+ "step": 2505
+ },
+ {
+ "epoch": 14.32,
+ "grad_norm": 29.399959564208984,
+ "learning_rate": 3.9644444444444445e-05,
+ "loss": 0.4149,
+ "step": 2506
+ },
+ {
+ "epoch": 14.325714285714286,
+ "grad_norm": 142.16004943847656,
+ "learning_rate": 3.963809523809524e-05,
+ "loss": 0.3737,
+ "step": 2507
+ },
+ {
+ "epoch": 14.331428571428571,
+ "grad_norm": 22.166606903076172,
+ "learning_rate": 3.963174603174603e-05,
+ "loss": 0.5191,
+ "step": 2508
+ },
+ {
+ "epoch": 14.337142857142856,
+ "grad_norm": 45.35384750366211,
+ "learning_rate": 3.962539682539683e-05,
+ "loss": 0.3282,
+ "step": 2509
+ },
+ {
+ "epoch": 14.342857142857143,
+ "grad_norm": 34.01889419555664,
+ "learning_rate": 3.961904761904762e-05,
+ "loss": 0.4813,
+ "step": 2510
+ },
+ {
+ "epoch": 14.348571428571429,
+ "grad_norm": 40.84413146972656,
+ "learning_rate": 3.9612698412698415e-05,
+ "loss": 0.6772,
+ "step": 2511
+ },
+ {
+ "epoch": 14.354285714285714,
+ "grad_norm": 29.83700180053711,
+ "learning_rate": 3.960634920634921e-05,
+ "loss": 0.3248,
+ "step": 2512
+ },
+ {
+ "epoch": 14.36,
+ "grad_norm": 51.366172790527344,
+ "learning_rate": 3.960000000000001e-05,
+ "loss": 0.3728,
+ "step": 2513
+ },
+ {
+ "epoch": 14.365714285714287,
+ "grad_norm": 54.66666793823242,
+ "learning_rate": 3.959365079365079e-05,
+ "loss": 0.319,
+ "step": 2514
+ },
+ {
+ "epoch": 14.371428571428572,
+ "grad_norm": 91.00899505615234,
+ "learning_rate": 3.958730158730159e-05,
+ "loss": 0.5796,
+ "step": 2515
+ },
+ {
+ "epoch": 14.377142857142857,
+ "grad_norm": 48.039817810058594,
+ "learning_rate": 3.9580952380952385e-05,
+ "loss": 0.4446,
+ "step": 2516
+ },
+ {
+ "epoch": 14.382857142857143,
+ "grad_norm": 38.14234924316406,
+ "learning_rate": 3.957460317460318e-05,
+ "loss": 0.5097,
+ "step": 2517
+ },
+ {
+ "epoch": 14.388571428571428,
+ "grad_norm": 43.835147857666016,
+ "learning_rate": 3.956825396825397e-05,
+ "loss": 0.4423,
+ "step": 2518
+ },
+ {
+ "epoch": 14.394285714285715,
+ "grad_norm": 46.32112121582031,
+ "learning_rate": 3.956190476190476e-05,
+ "loss": 0.4711,
+ "step": 2519
+ },
+ {
+ "epoch": 14.4,
+ "grad_norm": 204.66468811035156,
+ "learning_rate": 3.9555555555555556e-05,
+ "loss": 0.6653,
+ "step": 2520
+ },
+ {
+ "epoch": 14.405714285714286,
+ "grad_norm": 56.643836975097656,
+ "learning_rate": 3.9549206349206355e-05,
+ "loss": 0.5713,
+ "step": 2521
+ },
+ {
+ "epoch": 14.411428571428571,
+ "grad_norm": 56.983455657958984,
+ "learning_rate": 3.954285714285714e-05,
+ "loss": 0.4003,
+ "step": 2522
+ },
+ {
+ "epoch": 14.417142857142856,
+ "grad_norm": 100.86315155029297,
+ "learning_rate": 3.953650793650794e-05,
+ "loss": 0.3291,
+ "step": 2523
+ },
+ {
+ "epoch": 14.422857142857143,
+ "grad_norm": 77.1080093383789,
+ "learning_rate": 3.953015873015873e-05,
+ "loss": 0.3666,
+ "step": 2524
+ },
+ {
+ "epoch": 14.428571428571429,
+ "grad_norm": 35.71822738647461,
+ "learning_rate": 3.9523809523809526e-05,
+ "loss": 0.3828,
+ "step": 2525
+ },
+ {
+ "epoch": 14.434285714285714,
+ "grad_norm": 60.82173538208008,
+ "learning_rate": 3.951746031746032e-05,
+ "loss": 0.5301,
+ "step": 2526
+ },
+ {
+ "epoch": 14.44,
+ "grad_norm": 37.55204391479492,
+ "learning_rate": 3.951111111111112e-05,
+ "loss": 0.3292,
+ "step": 2527
+ },
+ {
+ "epoch": 14.445714285714285,
+ "grad_norm": 93.9327621459961,
+ "learning_rate": 3.9504761904761903e-05,
+ "loss": 0.5047,
+ "step": 2528
+ },
+ {
+ "epoch": 14.451428571428572,
+ "grad_norm": 46.62969970703125,
+ "learning_rate": 3.94984126984127e-05,
+ "loss": 0.3249,
+ "step": 2529
+ },
+ {
+ "epoch": 14.457142857142857,
+ "grad_norm": 72.3116683959961,
+ "learning_rate": 3.949206349206349e-05,
+ "loss": 0.4743,
+ "step": 2530
+ },
+ {
+ "epoch": 14.462857142857143,
+ "grad_norm": 55.59286117553711,
+ "learning_rate": 3.948571428571429e-05,
+ "loss": 0.537,
+ "step": 2531
+ },
+ {
+ "epoch": 14.468571428571428,
+ "grad_norm": 44.294151306152344,
+ "learning_rate": 3.947936507936508e-05,
+ "loss": 0.4722,
+ "step": 2532
+ },
+ {
+ "epoch": 14.474285714285715,
+ "grad_norm": 50.04436111450195,
+ "learning_rate": 3.947301587301587e-05,
+ "loss": 0.6354,
+ "step": 2533
+ },
+ {
+ "epoch": 14.48,
+ "grad_norm": 82.75171661376953,
+ "learning_rate": 3.9466666666666666e-05,
+ "loss": 0.4525,
+ "step": 2534
+ },
+ {
+ "epoch": 14.485714285714286,
+ "grad_norm": 65.42272186279297,
+ "learning_rate": 3.9460317460317465e-05,
+ "loss": 0.3971,
+ "step": 2535
+ },
+ {
+ "epoch": 14.491428571428571,
+ "grad_norm": 219.6083984375,
+ "learning_rate": 3.945396825396825e-05,
+ "loss": 0.46,
+ "step": 2536
+ },
+ {
+ "epoch": 14.497142857142856,
+ "grad_norm": 32.19956588745117,
+ "learning_rate": 3.944761904761905e-05,
+ "loss": 0.4132,
+ "step": 2537
+ },
+ {
+ "epoch": 14.502857142857144,
+ "grad_norm": 42.3463134765625,
+ "learning_rate": 3.944126984126984e-05,
+ "loss": 0.4181,
+ "step": 2538
+ },
+ {
+ "epoch": 14.508571428571429,
+ "grad_norm": 28.293188095092773,
+ "learning_rate": 3.9434920634920636e-05,
+ "loss": 0.3657,
+ "step": 2539
+ },
+ {
+ "epoch": 14.514285714285714,
+ "grad_norm": 76.0169906616211,
+ "learning_rate": 3.942857142857143e-05,
+ "loss": 0.4506,
+ "step": 2540
+ },
+ {
+ "epoch": 14.52,
+ "grad_norm": 35.1695442199707,
+ "learning_rate": 3.942222222222222e-05,
+ "loss": 0.4247,
+ "step": 2541
+ },
+ {
+ "epoch": 14.525714285714285,
+ "grad_norm": 75.5692367553711,
+ "learning_rate": 3.9415873015873014e-05,
+ "loss": 0.542,
+ "step": 2542
+ },
+ {
+ "epoch": 14.531428571428572,
+ "grad_norm": 32.895774841308594,
+ "learning_rate": 3.940952380952381e-05,
+ "loss": 0.4416,
+ "step": 2543
+ },
+ {
+ "epoch": 14.537142857142857,
+ "grad_norm": 65.54524230957031,
+ "learning_rate": 3.9403174603174606e-05,
+ "loss": 0.3774,
+ "step": 2544
+ },
+ {
+ "epoch": 14.542857142857143,
+ "grad_norm": 31.3699893951416,
+ "learning_rate": 3.93968253968254e-05,
+ "loss": 0.4871,
+ "step": 2545
+ },
+ {
+ "epoch": 14.548571428571428,
+ "grad_norm": 37.57272720336914,
+ "learning_rate": 3.939047619047619e-05,
+ "loss": 0.4938,
+ "step": 2546
+ },
+ {
+ "epoch": 14.554285714285715,
+ "grad_norm": 81.45382690429688,
+ "learning_rate": 3.9384126984126984e-05,
+ "loss": 0.4315,
+ "step": 2547
+ },
+ {
+ "epoch": 14.56,
+ "grad_norm": 54.47109603881836,
+ "learning_rate": 3.937777777777778e-05,
+ "loss": 0.4103,
+ "step": 2548
+ },
+ {
+ "epoch": 14.565714285714286,
+ "grad_norm": 62.30387496948242,
+ "learning_rate": 3.9371428571428576e-05,
+ "loss": 0.3809,
+ "step": 2549
+ },
+ {
+ "epoch": 14.571428571428571,
+ "grad_norm": 38.18788528442383,
+ "learning_rate": 3.936507936507937e-05,
+ "loss": 0.3779,
+ "step": 2550
+ },
+ {
+ "epoch": 14.577142857142857,
+ "grad_norm": 110.54651641845703,
+ "learning_rate": 3.935873015873016e-05,
+ "loss": 0.4439,
+ "step": 2551
+ },
+ {
+ "epoch": 14.582857142857144,
+ "grad_norm": 64.4066162109375,
+ "learning_rate": 3.9352380952380954e-05,
+ "loss": 0.4098,
+ "step": 2552
+ },
+ {
+ "epoch": 14.588571428571429,
+ "grad_norm": 86.1274185180664,
+ "learning_rate": 3.9346031746031746e-05,
+ "loss": 0.3765,
+ "step": 2553
+ },
+ {
+ "epoch": 14.594285714285714,
+ "grad_norm": 41.547847747802734,
+ "learning_rate": 3.9339682539682546e-05,
+ "loss": 0.4719,
+ "step": 2554
+ },
+ {
+ "epoch": 14.6,
+ "grad_norm": 43.89437484741211,
+ "learning_rate": 3.933333333333333e-05,
+ "loss": 0.3632,
+ "step": 2555
+ },
+ {
+ "epoch": 14.605714285714285,
+ "grad_norm": 64.66832733154297,
+ "learning_rate": 3.932698412698413e-05,
+ "loss": 0.4349,
+ "step": 2556
+ },
+ {
+ "epoch": 14.611428571428572,
+ "grad_norm": 62.122745513916016,
+ "learning_rate": 3.9320634920634924e-05,
+ "loss": 0.3536,
+ "step": 2557
+ },
+ {
+ "epoch": 14.617142857142857,
+ "grad_norm": 51.276641845703125,
+ "learning_rate": 3.9314285714285716e-05,
+ "loss": 0.4425,
+ "step": 2558
+ },
+ {
+ "epoch": 14.622857142857143,
+ "grad_norm": 45.557525634765625,
+ "learning_rate": 3.930793650793651e-05,
+ "loss": 0.4156,
+ "step": 2559
+ },
+ {
+ "epoch": 14.628571428571428,
+ "grad_norm": 48.128719329833984,
+ "learning_rate": 3.930158730158731e-05,
+ "loss": 0.4061,
+ "step": 2560
+ },
+ {
+ "epoch": 14.634285714285713,
+ "grad_norm": 35.2762565612793,
+ "learning_rate": 3.9295238095238094e-05,
+ "loss": 0.4133,
+ "step": 2561
+ },
+ {
+ "epoch": 14.64,
+ "grad_norm": 50.81557846069336,
+ "learning_rate": 3.9288888888888894e-05,
+ "loss": 0.3663,
+ "step": 2562
+ },
+ {
+ "epoch": 14.645714285714286,
+ "grad_norm": 115.70230865478516,
+ "learning_rate": 3.928253968253968e-05,
+ "loss": 0.4082,
+ "step": 2563
+ },
+ {
+ "epoch": 14.651428571428571,
+ "grad_norm": 40.494483947753906,
+ "learning_rate": 3.927619047619048e-05,
+ "loss": 0.396,
+ "step": 2564
+ },
+ {
+ "epoch": 14.657142857142857,
+ "grad_norm": 83.41320037841797,
+ "learning_rate": 3.926984126984127e-05,
+ "loss": 0.6076,
+ "step": 2565
+ },
+ {
+ "epoch": 14.662857142857142,
+ "grad_norm": 57.01734924316406,
+ "learning_rate": 3.9263492063492064e-05,
+ "loss": 0.3043,
+ "step": 2566
+ },
+ {
+ "epoch": 14.668571428571429,
+ "grad_norm": 89.8864974975586,
+ "learning_rate": 3.925714285714286e-05,
+ "loss": 0.3443,
+ "step": 2567
+ },
+ {
+ "epoch": 14.674285714285714,
+ "grad_norm": 39.41927719116211,
+ "learning_rate": 3.9250793650793656e-05,
+ "loss": 0.5277,
+ "step": 2568
+ },
+ {
+ "epoch": 14.68,
+ "grad_norm": 67.12992095947266,
+ "learning_rate": 3.924444444444444e-05,
+ "loss": 0.582,
+ "step": 2569
+ },
+ {
+ "epoch": 14.685714285714285,
+ "grad_norm": 29.401214599609375,
+ "learning_rate": 3.923809523809524e-05,
+ "loss": 0.379,
+ "step": 2570
+ },
+ {
+ "epoch": 14.691428571428572,
+ "grad_norm": 37.86322021484375,
+ "learning_rate": 3.9231746031746034e-05,
+ "loss": 0.5949,
+ "step": 2571
+ },
+ {
+ "epoch": 14.697142857142858,
+ "grad_norm": 20.0744686126709,
+ "learning_rate": 3.922539682539683e-05,
+ "loss": 0.3908,
+ "step": 2572
+ },
+ {
+ "epoch": 14.702857142857143,
+ "grad_norm": 48.950862884521484,
+ "learning_rate": 3.921904761904762e-05,
+ "loss": 0.4258,
+ "step": 2573
+ },
+ {
+ "epoch": 14.708571428571428,
+ "grad_norm": 319.82684326171875,
+ "learning_rate": 3.921269841269841e-05,
+ "loss": 0.4514,
+ "step": 2574
+ },
+ {
+ "epoch": 14.714285714285714,
+ "grad_norm": 31.71880531311035,
+ "learning_rate": 3.9206349206349205e-05,
+ "loss": 0.493,
+ "step": 2575
+ },
+ {
+ "epoch": 14.72,
+ "grad_norm": 59.9763298034668,
+ "learning_rate": 3.9200000000000004e-05,
+ "loss": 0.4182,
+ "step": 2576
+ },
+ {
+ "epoch": 14.725714285714286,
+ "grad_norm": 94.82324981689453,
+ "learning_rate": 3.91936507936508e-05,
+ "loss": 0.3055,
+ "step": 2577
+ },
+ {
+ "epoch": 14.731428571428571,
+ "grad_norm": 37.82186508178711,
+ "learning_rate": 3.918730158730159e-05,
+ "loss": 0.4299,
+ "step": 2578
+ },
+ {
+ "epoch": 14.737142857142857,
+ "grad_norm": 39.15916442871094,
+ "learning_rate": 3.918095238095238e-05,
+ "loss": 0.5593,
+ "step": 2579
+ },
+ {
+ "epoch": 14.742857142857144,
+ "grad_norm": 35.86741638183594,
+ "learning_rate": 3.9174603174603175e-05,
+ "loss": 0.3602,
+ "step": 2580
+ },
+ {
+ "epoch": 14.748571428571429,
+ "grad_norm": 37.261138916015625,
+ "learning_rate": 3.916825396825397e-05,
+ "loss": 0.4795,
+ "step": 2581
+ },
+ {
+ "epoch": 14.754285714285714,
+ "grad_norm": 33.5512809753418,
+ "learning_rate": 3.916190476190477e-05,
+ "loss": 0.3735,
+ "step": 2582
+ },
+ {
+ "epoch": 14.76,
+ "grad_norm": 49.997344970703125,
+ "learning_rate": 3.915555555555556e-05,
+ "loss": 0.3427,
+ "step": 2583
+ },
+ {
+ "epoch": 14.765714285714285,
+ "grad_norm": 29.823055267333984,
+ "learning_rate": 3.914920634920635e-05,
+ "loss": 0.4592,
+ "step": 2584
+ },
+ {
+ "epoch": 14.771428571428572,
+ "grad_norm": 33.76132583618164,
+ "learning_rate": 3.9142857142857145e-05,
+ "loss": 0.3816,
+ "step": 2585
+ },
+ {
+ "epoch": 14.777142857142858,
+ "grad_norm": 24.215843200683594,
+ "learning_rate": 3.913650793650794e-05,
+ "loss": 0.3551,
+ "step": 2586
+ },
+ {
+ "epoch": 14.782857142857143,
+ "grad_norm": 1256.5478515625,
+ "learning_rate": 3.913015873015874e-05,
+ "loss": 0.4059,
+ "step": 2587
+ },
+ {
+ "epoch": 14.788571428571428,
+ "grad_norm": 36.900177001953125,
+ "learning_rate": 3.912380952380952e-05,
+ "loss": 0.4487,
+ "step": 2588
+ },
+ {
+ "epoch": 14.794285714285714,
+ "grad_norm": 633.5703735351562,
+ "learning_rate": 3.911746031746032e-05,
+ "loss": 0.3842,
+ "step": 2589
+ },
+ {
+ "epoch": 14.8,
+ "grad_norm": 92.28624725341797,
+ "learning_rate": 3.9111111111111115e-05,
+ "loss": 0.4321,
+ "step": 2590
+ },
+ {
+ "epoch": 14.805714285714286,
+ "grad_norm": 21.139535903930664,
+ "learning_rate": 3.910476190476191e-05,
+ "loss": 0.3397,
+ "step": 2591
+ },
+ {
+ "epoch": 14.811428571428571,
+ "grad_norm": 33.3880500793457,
+ "learning_rate": 3.90984126984127e-05,
+ "loss": 0.5109,
+ "step": 2592
+ },
+ {
+ "epoch": 14.817142857142857,
+ "grad_norm": 60.31396484375,
+ "learning_rate": 3.90920634920635e-05,
+ "loss": 0.4734,
+ "step": 2593
+ },
+ {
+ "epoch": 14.822857142857142,
+ "grad_norm": 35.14171600341797,
+ "learning_rate": 3.9085714285714285e-05,
+ "loss": 0.4481,
+ "step": 2594
+ },
+ {
+ "epoch": 14.82857142857143,
+ "grad_norm": 40.16318130493164,
+ "learning_rate": 3.9079365079365085e-05,
+ "loss": 0.3135,
+ "step": 2595
+ },
+ {
+ "epoch": 14.834285714285715,
+ "grad_norm": 45.186973571777344,
+ "learning_rate": 3.907301587301587e-05,
+ "loss": 0.468,
+ "step": 2596
+ },
+ {
+ "epoch": 14.84,
+ "grad_norm": 55.854042053222656,
+ "learning_rate": 3.906666666666667e-05,
+ "loss": 0.4176,
+ "step": 2597
+ },
+ {
+ "epoch": 14.845714285714285,
+ "grad_norm": 44.230403900146484,
+ "learning_rate": 3.906031746031746e-05,
+ "loss": 0.2856,
+ "step": 2598
+ },
+ {
+ "epoch": 14.85142857142857,
+ "grad_norm": 43.87007141113281,
+ "learning_rate": 3.9053968253968255e-05,
+ "loss": 0.4488,
+ "step": 2599
+ },
+ {
+ "epoch": 14.857142857142858,
+ "grad_norm": 39.99993896484375,
+ "learning_rate": 3.904761904761905e-05,
+ "loss": 0.4659,
+ "step": 2600
+ },
+ {
+ "epoch": 14.862857142857143,
+ "grad_norm": 38.486351013183594,
+ "learning_rate": 3.904126984126985e-05,
+ "loss": 0.4924,
+ "step": 2601
+ },
+ {
+ "epoch": 14.868571428571428,
+ "grad_norm": 58.42378616333008,
+ "learning_rate": 3.903492063492063e-05,
+ "loss": 0.3247,
+ "step": 2602
+ },
+ {
+ "epoch": 14.874285714285714,
+ "grad_norm": 26.708703994750977,
+ "learning_rate": 3.902857142857143e-05,
+ "loss": 0.3187,
+ "step": 2603
+ },
+ {
+ "epoch": 14.88,
+ "grad_norm": 54.7596549987793,
+ "learning_rate": 3.9022222222222225e-05,
+ "loss": 0.311,
+ "step": 2604
+ },
+ {
+ "epoch": 14.885714285714286,
+ "grad_norm": 53.620750427246094,
+ "learning_rate": 3.901587301587302e-05,
+ "loss": 0.3484,
+ "step": 2605
+ },
+ {
+ "epoch": 14.891428571428571,
+ "grad_norm": 28.88561248779297,
+ "learning_rate": 3.900952380952381e-05,
+ "loss": 0.3119,
+ "step": 2606
+ },
+ {
+ "epoch": 14.897142857142857,
+ "grad_norm": 26.30694580078125,
+ "learning_rate": 3.90031746031746e-05,
+ "loss": 0.3085,
+ "step": 2607
+ },
+ {
+ "epoch": 14.902857142857142,
+ "grad_norm": 37.05280685424805,
+ "learning_rate": 3.8996825396825396e-05,
+ "loss": 0.4501,
+ "step": 2608
+ },
+ {
+ "epoch": 14.90857142857143,
+ "grad_norm": 61.25300598144531,
+ "learning_rate": 3.8990476190476195e-05,
+ "loss": 0.6067,
+ "step": 2609
+ },
+ {
+ "epoch": 14.914285714285715,
+ "grad_norm": 79.2222900390625,
+ "learning_rate": 3.898412698412698e-05,
+ "loss": 0.3847,
+ "step": 2610
+ },
+ {
+ "epoch": 14.92,
+ "grad_norm": 79.72723388671875,
+ "learning_rate": 3.897777777777778e-05,
+ "loss": 0.4162,
+ "step": 2611
+ },
+ {
+ "epoch": 14.925714285714285,
+ "grad_norm": 38.832340240478516,
+ "learning_rate": 3.897142857142857e-05,
+ "loss": 0.4211,
+ "step": 2612
+ },
+ {
+ "epoch": 14.93142857142857,
+ "grad_norm": 69.52618408203125,
+ "learning_rate": 3.8965079365079366e-05,
+ "loss": 0.4996,
+ "step": 2613
+ },
+ {
+ "epoch": 14.937142857142858,
+ "grad_norm": 56.463932037353516,
+ "learning_rate": 3.895873015873016e-05,
+ "loss": 0.4568,
+ "step": 2614
+ },
+ {
+ "epoch": 14.942857142857143,
+ "grad_norm": 30.429393768310547,
+ "learning_rate": 3.895238095238096e-05,
+ "loss": 0.3387,
+ "step": 2615
+ },
+ {
+ "epoch": 14.948571428571428,
+ "grad_norm": 660.6868896484375,
+ "learning_rate": 3.894603174603175e-05,
+ "loss": 0.3617,
+ "step": 2616
+ },
+ {
+ "epoch": 14.954285714285714,
+ "grad_norm": 27.04292106628418,
+ "learning_rate": 3.893968253968254e-05,
+ "loss": 0.6176,
+ "step": 2617
+ },
+ {
+ "epoch": 14.96,
+ "grad_norm": 30.616981506347656,
+ "learning_rate": 3.8933333333333336e-05,
+ "loss": 0.4639,
+ "step": 2618
+ },
+ {
+ "epoch": 14.965714285714286,
+ "grad_norm": 47.954803466796875,
+ "learning_rate": 3.892698412698413e-05,
+ "loss": 0.3035,
+ "step": 2619
+ },
+ {
+ "epoch": 14.971428571428572,
+ "grad_norm": 77.13597106933594,
+ "learning_rate": 3.892063492063492e-05,
+ "loss": 0.3525,
+ "step": 2620
+ },
+ {
+ "epoch": 14.977142857142857,
+ "grad_norm": 94.75045013427734,
+ "learning_rate": 3.8914285714285713e-05,
+ "loss": 0.2897,
+ "step": 2621
+ },
+ {
+ "epoch": 14.982857142857142,
+ "grad_norm": 64.9283218383789,
+ "learning_rate": 3.890793650793651e-05,
+ "loss": 0.4123,
+ "step": 2622
+ },
+ {
+ "epoch": 14.98857142857143,
+ "grad_norm": 51.37266540527344,
+ "learning_rate": 3.8901587301587305e-05,
+ "loss": 0.6112,
+ "step": 2623
+ },
+ {
+ "epoch": 14.994285714285715,
+ "grad_norm": 54.290157318115234,
+ "learning_rate": 3.88952380952381e-05,
+ "loss": 0.2984,
+ "step": 2624
+ },
+ {
+ "epoch": 15.0,
+ "grad_norm": 107.26709747314453,
+ "learning_rate": 3.888888888888889e-05,
+ "loss": 0.3927,
+ "step": 2625
+ },
+ {
+ "epoch": 15.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6049719452857971,
+ "eval_map": 0.9119,
+ "eval_map_50": 0.9623,
+ "eval_map_75": 0.9503,
+ "eval_map_large": 0.912,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9119,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.781,
+ "eval_mar_10": 0.9657,
+ "eval_mar_100": 0.9695,
+ "eval_mar_100_per_class": 0.9695,
+ "eval_mar_large": 0.9695,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.7442,
+ "eval_samples_per_second": 21.391,
+ "eval_steps_per_second": 2.692,
+ "step": 2625
+ },
+ {
+ "epoch": 15.005714285714285,
+ "grad_norm": 75.1425552368164,
+ "learning_rate": 3.888253968253969e-05,
+ "loss": 0.4931,
+ "step": 2626
+ },
+ {
+ "epoch": 15.01142857142857,
+ "grad_norm": 37.94574737548828,
+ "learning_rate": 3.8876190476190476e-05,
+ "loss": 0.2928,
+ "step": 2627
+ },
+ {
+ "epoch": 15.017142857142858,
+ "grad_norm": 24.948440551757812,
+ "learning_rate": 3.8869841269841275e-05,
+ "loss": 0.3102,
+ "step": 2628
+ },
+ {
+ "epoch": 15.022857142857143,
+ "grad_norm": 28.053247451782227,
+ "learning_rate": 3.886349206349206e-05,
+ "loss": 0.3639,
+ "step": 2629
+ },
+ {
+ "epoch": 15.028571428571428,
+ "grad_norm": 34.28338623046875,
+ "learning_rate": 3.885714285714286e-05,
+ "loss": 0.2893,
+ "step": 2630
+ },
+ {
+ "epoch": 15.034285714285714,
+ "grad_norm": 223.83596801757812,
+ "learning_rate": 3.885079365079365e-05,
+ "loss": 0.4726,
+ "step": 2631
+ },
+ {
+ "epoch": 15.04,
+ "grad_norm": 40.889259338378906,
+ "learning_rate": 3.8844444444444446e-05,
+ "loss": 0.485,
+ "step": 2632
+ },
+ {
+ "epoch": 15.045714285714286,
+ "grad_norm": 44.691932678222656,
+ "learning_rate": 3.883809523809524e-05,
+ "loss": 0.3396,
+ "step": 2633
+ },
+ {
+ "epoch": 15.051428571428572,
+ "grad_norm": 35.61066818237305,
+ "learning_rate": 3.883174603174604e-05,
+ "loss": 0.4293,
+ "step": 2634
+ },
+ {
+ "epoch": 15.057142857142857,
+ "grad_norm": 260.98980712890625,
+ "learning_rate": 3.8825396825396824e-05,
+ "loss": 0.3622,
+ "step": 2635
+ },
+ {
+ "epoch": 15.062857142857142,
+ "grad_norm": 49.225555419921875,
+ "learning_rate": 3.881904761904762e-05,
+ "loss": 0.2627,
+ "step": 2636
+ },
+ {
+ "epoch": 15.06857142857143,
+ "grad_norm": 70.27244567871094,
+ "learning_rate": 3.8812698412698416e-05,
+ "loss": 0.4287,
+ "step": 2637
+ },
+ {
+ "epoch": 15.074285714285715,
+ "grad_norm": 42.6537971496582,
+ "learning_rate": 3.880634920634921e-05,
+ "loss": 0.315,
+ "step": 2638
+ },
+ {
+ "epoch": 15.08,
+ "grad_norm": 47.73801040649414,
+ "learning_rate": 3.88e-05,
+ "loss": 0.3974,
+ "step": 2639
+ },
+ {
+ "epoch": 15.085714285714285,
+ "grad_norm": 102.1975326538086,
+ "learning_rate": 3.8793650793650794e-05,
+ "loss": 0.4949,
+ "step": 2640
+ },
+ {
+ "epoch": 15.09142857142857,
+ "grad_norm": 236.9488525390625,
+ "learning_rate": 3.8787301587301586e-05,
+ "loss": 0.322,
+ "step": 2641
+ },
+ {
+ "epoch": 15.097142857142858,
+ "grad_norm": 187.68399047851562,
+ "learning_rate": 3.8780952380952386e-05,
+ "loss": 0.3594,
+ "step": 2642
+ },
+ {
+ "epoch": 15.102857142857143,
+ "grad_norm": 142.1114044189453,
+ "learning_rate": 3.877460317460317e-05,
+ "loss": 0.4132,
+ "step": 2643
+ },
+ {
+ "epoch": 15.108571428571429,
+ "grad_norm": 33.342689514160156,
+ "learning_rate": 3.876825396825397e-05,
+ "loss": 0.5165,
+ "step": 2644
+ },
+ {
+ "epoch": 15.114285714285714,
+ "grad_norm": 34.99799728393555,
+ "learning_rate": 3.8761904761904764e-05,
+ "loss": 0.3033,
+ "step": 2645
+ },
+ {
+ "epoch": 15.12,
+ "grad_norm": 33.82915496826172,
+ "learning_rate": 3.8755555555555556e-05,
+ "loss": 0.4889,
+ "step": 2646
+ },
+ {
+ "epoch": 15.125714285714286,
+ "grad_norm": 30.21843147277832,
+ "learning_rate": 3.874920634920635e-05,
+ "loss": 0.3653,
+ "step": 2647
+ },
+ {
+ "epoch": 15.131428571428572,
+ "grad_norm": 35.87263107299805,
+ "learning_rate": 3.874285714285715e-05,
+ "loss": 0.5984,
+ "step": 2648
+ },
+ {
+ "epoch": 15.137142857142857,
+ "grad_norm": 30.697744369506836,
+ "learning_rate": 3.8736507936507934e-05,
+ "loss": 0.4492,
+ "step": 2649
+ },
+ {
+ "epoch": 15.142857142857142,
+ "grad_norm": 49.59101486206055,
+ "learning_rate": 3.8730158730158734e-05,
+ "loss": 0.5365,
+ "step": 2650
+ },
+ {
+ "epoch": 15.14857142857143,
+ "grad_norm": 34.50474166870117,
+ "learning_rate": 3.8723809523809526e-05,
+ "loss": 0.5607,
+ "step": 2651
+ },
+ {
+ "epoch": 15.154285714285715,
+ "grad_norm": 39.87806701660156,
+ "learning_rate": 3.871746031746032e-05,
+ "loss": 0.5181,
+ "step": 2652
+ },
+ {
+ "epoch": 15.16,
+ "grad_norm": 240.49522399902344,
+ "learning_rate": 3.871111111111111e-05,
+ "loss": 0.4354,
+ "step": 2653
+ },
+ {
+ "epoch": 15.165714285714285,
+ "grad_norm": 39.07673645019531,
+ "learning_rate": 3.8704761904761904e-05,
+ "loss": 0.5012,
+ "step": 2654
+ },
+ {
+ "epoch": 15.17142857142857,
+ "grad_norm": 78.40770721435547,
+ "learning_rate": 3.8698412698412704e-05,
+ "loss": 0.2838,
+ "step": 2655
+ },
+ {
+ "epoch": 15.177142857142858,
+ "grad_norm": 44.3741455078125,
+ "learning_rate": 3.8692063492063496e-05,
+ "loss": 0.5775,
+ "step": 2656
+ },
+ {
+ "epoch": 15.182857142857143,
+ "grad_norm": 24.08373260498047,
+ "learning_rate": 3.868571428571429e-05,
+ "loss": 0.4942,
+ "step": 2657
+ },
+ {
+ "epoch": 15.188571428571429,
+ "grad_norm": 34.060638427734375,
+ "learning_rate": 3.867936507936508e-05,
+ "loss": 0.3849,
+ "step": 2658
+ },
+ {
+ "epoch": 15.194285714285714,
+ "grad_norm": 287.13433837890625,
+ "learning_rate": 3.8673015873015874e-05,
+ "loss": 0.473,
+ "step": 2659
+ },
+ {
+ "epoch": 15.2,
+ "grad_norm": 91.75950622558594,
+ "learning_rate": 3.866666666666667e-05,
+ "loss": 0.4938,
+ "step": 2660
+ },
+ {
+ "epoch": 15.205714285714286,
+ "grad_norm": 40.372276306152344,
+ "learning_rate": 3.8660317460317466e-05,
+ "loss": 0.4256,
+ "step": 2661
+ },
+ {
+ "epoch": 15.211428571428572,
+ "grad_norm": 58.20750045776367,
+ "learning_rate": 3.865396825396825e-05,
+ "loss": 0.4944,
+ "step": 2662
+ },
+ {
+ "epoch": 15.217142857142857,
+ "grad_norm": 48.70400619506836,
+ "learning_rate": 3.864761904761905e-05,
+ "loss": 0.5066,
+ "step": 2663
+ },
+ {
+ "epoch": 15.222857142857142,
+ "grad_norm": 59.96915817260742,
+ "learning_rate": 3.8641269841269844e-05,
+ "loss": 0.4956,
+ "step": 2664
+ },
+ {
+ "epoch": 15.228571428571428,
+ "grad_norm": 37.78099060058594,
+ "learning_rate": 3.863492063492064e-05,
+ "loss": 0.5086,
+ "step": 2665
+ },
+ {
+ "epoch": 15.234285714285715,
+ "grad_norm": 28.727447509765625,
+ "learning_rate": 3.862857142857143e-05,
+ "loss": 0.4821,
+ "step": 2666
+ },
+ {
+ "epoch": 15.24,
+ "grad_norm": 57.22956085205078,
+ "learning_rate": 3.862222222222223e-05,
+ "loss": 0.6978,
+ "step": 2667
+ },
+ {
+ "epoch": 15.245714285714286,
+ "grad_norm": 25.5422420501709,
+ "learning_rate": 3.8615873015873015e-05,
+ "loss": 0.6043,
+ "step": 2668
+ },
+ {
+ "epoch": 15.251428571428571,
+ "grad_norm": 607.6710815429688,
+ "learning_rate": 3.8609523809523814e-05,
+ "loss": 0.4465,
+ "step": 2669
+ },
+ {
+ "epoch": 15.257142857142856,
+ "grad_norm": 31.48773765563965,
+ "learning_rate": 3.860317460317461e-05,
+ "loss": 0.4792,
+ "step": 2670
+ },
+ {
+ "epoch": 15.262857142857143,
+ "grad_norm": 33.4849739074707,
+ "learning_rate": 3.85968253968254e-05,
+ "loss": 0.7286,
+ "step": 2671
+ },
+ {
+ "epoch": 15.268571428571429,
+ "grad_norm": 29.74656105041504,
+ "learning_rate": 3.859047619047619e-05,
+ "loss": 0.4638,
+ "step": 2672
+ },
+ {
+ "epoch": 15.274285714285714,
+ "grad_norm": 43.694854736328125,
+ "learning_rate": 3.8584126984126985e-05,
+ "loss": 0.5248,
+ "step": 2673
+ },
+ {
+ "epoch": 15.28,
+ "grad_norm": 42.586055755615234,
+ "learning_rate": 3.857777777777778e-05,
+ "loss": 0.5272,
+ "step": 2674
+ },
+ {
+ "epoch": 15.285714285714286,
+ "grad_norm": 20.127805709838867,
+ "learning_rate": 3.857142857142858e-05,
+ "loss": 0.4056,
+ "step": 2675
+ },
+ {
+ "epoch": 15.291428571428572,
+ "grad_norm": 37.68088912963867,
+ "learning_rate": 3.856507936507936e-05,
+ "loss": 0.4232,
+ "step": 2676
+ },
+ {
+ "epoch": 15.297142857142857,
+ "grad_norm": 37.87181091308594,
+ "learning_rate": 3.855873015873016e-05,
+ "loss": 0.3896,
+ "step": 2677
+ },
+ {
+ "epoch": 15.302857142857142,
+ "grad_norm": 100.7476806640625,
+ "learning_rate": 3.8552380952380955e-05,
+ "loss": 0.5037,
+ "step": 2678
+ },
+ {
+ "epoch": 15.308571428571428,
+ "grad_norm": 48.78540802001953,
+ "learning_rate": 3.854603174603175e-05,
+ "loss": 0.6045,
+ "step": 2679
+ },
+ {
+ "epoch": 15.314285714285715,
+ "grad_norm": 81.76539611816406,
+ "learning_rate": 3.853968253968254e-05,
+ "loss": 0.6549,
+ "step": 2680
+ },
+ {
+ "epoch": 15.32,
+ "grad_norm": 115.28096771240234,
+ "learning_rate": 3.853333333333334e-05,
+ "loss": 0.5465,
+ "step": 2681
+ },
+ {
+ "epoch": 15.325714285714286,
+ "grad_norm": 52.533226013183594,
+ "learning_rate": 3.8526984126984125e-05,
+ "loss": 0.5013,
+ "step": 2682
+ },
+ {
+ "epoch": 15.331428571428571,
+ "grad_norm": 39.55607604980469,
+ "learning_rate": 3.8520634920634925e-05,
+ "loss": 0.4071,
+ "step": 2683
+ },
+ {
+ "epoch": 15.337142857142856,
+ "grad_norm": 37.944557189941406,
+ "learning_rate": 3.851428571428571e-05,
+ "loss": 0.6956,
+ "step": 2684
+ },
+ {
+ "epoch": 15.342857142857143,
+ "grad_norm": 48.310096740722656,
+ "learning_rate": 3.850793650793651e-05,
+ "loss": 0.538,
+ "step": 2685
+ },
+ {
+ "epoch": 15.348571428571429,
+ "grad_norm": 53.69296646118164,
+ "learning_rate": 3.85015873015873e-05,
+ "loss": 0.5177,
+ "step": 2686
+ },
+ {
+ "epoch": 15.354285714285714,
+ "grad_norm": 57.018463134765625,
+ "learning_rate": 3.8495238095238095e-05,
+ "loss": 0.3286,
+ "step": 2687
+ },
+ {
+ "epoch": 15.36,
+ "grad_norm": 44.31338119506836,
+ "learning_rate": 3.848888888888889e-05,
+ "loss": 0.3722,
+ "step": 2688
+ },
+ {
+ "epoch": 15.365714285714287,
+ "grad_norm": 73.21768188476562,
+ "learning_rate": 3.848253968253969e-05,
+ "loss": 0.4856,
+ "step": 2689
+ },
+ {
+ "epoch": 15.371428571428572,
+ "grad_norm": 47.26458740234375,
+ "learning_rate": 3.847619047619048e-05,
+ "loss": 0.4554,
+ "step": 2690
+ },
+ {
+ "epoch": 15.377142857142857,
+ "grad_norm": 33.08348083496094,
+ "learning_rate": 3.846984126984127e-05,
+ "loss": 0.6845,
+ "step": 2691
+ },
+ {
+ "epoch": 15.382857142857143,
+ "grad_norm": 79.1163558959961,
+ "learning_rate": 3.8463492063492065e-05,
+ "loss": 0.695,
+ "step": 2692
+ },
+ {
+ "epoch": 15.388571428571428,
+ "grad_norm": 40.303707122802734,
+ "learning_rate": 3.845714285714286e-05,
+ "loss": 0.5144,
+ "step": 2693
+ },
+ {
+ "epoch": 15.394285714285715,
+ "grad_norm": 65.41890716552734,
+ "learning_rate": 3.845079365079366e-05,
+ "loss": 0.503,
+ "step": 2694
+ },
+ {
+ "epoch": 15.4,
+ "grad_norm": 20.049118041992188,
+ "learning_rate": 3.844444444444444e-05,
+ "loss": 0.4207,
+ "step": 2695
+ },
+ {
+ "epoch": 15.405714285714286,
+ "grad_norm": 85.63333129882812,
+ "learning_rate": 3.843809523809524e-05,
+ "loss": 0.6394,
+ "step": 2696
+ },
+ {
+ "epoch": 15.411428571428571,
+ "grad_norm": 52.71570587158203,
+ "learning_rate": 3.8431746031746035e-05,
+ "loss": 0.5849,
+ "step": 2697
+ },
+ {
+ "epoch": 15.417142857142856,
+ "grad_norm": 81.16397094726562,
+ "learning_rate": 3.842539682539683e-05,
+ "loss": 0.3905,
+ "step": 2698
+ },
+ {
+ "epoch": 15.422857142857143,
+ "grad_norm": 30.789758682250977,
+ "learning_rate": 3.841904761904762e-05,
+ "loss": 0.4725,
+ "step": 2699
+ },
+ {
+ "epoch": 15.428571428571429,
+ "grad_norm": 36.64466094970703,
+ "learning_rate": 3.841269841269842e-05,
+ "loss": 0.3179,
+ "step": 2700
+ },
+ {
+ "epoch": 15.434285714285714,
+ "grad_norm": 62.083412170410156,
+ "learning_rate": 3.8406349206349206e-05,
+ "loss": 0.3341,
+ "step": 2701
+ },
+ {
+ "epoch": 15.44,
+ "grad_norm": 47.09940719604492,
+ "learning_rate": 3.8400000000000005e-05,
+ "loss": 0.4432,
+ "step": 2702
+ },
+ {
+ "epoch": 15.445714285714285,
+ "grad_norm": 30.16719627380371,
+ "learning_rate": 3.839365079365079e-05,
+ "loss": 0.3751,
+ "step": 2703
+ },
+ {
+ "epoch": 15.451428571428572,
+ "grad_norm": 305.1468505859375,
+ "learning_rate": 3.838730158730159e-05,
+ "loss": 0.3978,
+ "step": 2704
+ },
+ {
+ "epoch": 15.457142857142857,
+ "grad_norm": 33.10639190673828,
+ "learning_rate": 3.838095238095238e-05,
+ "loss": 0.3757,
+ "step": 2705
+ },
+ {
+ "epoch": 15.462857142857143,
+ "grad_norm": 36.49113845825195,
+ "learning_rate": 3.8374603174603176e-05,
+ "loss": 0.2957,
+ "step": 2706
+ },
+ {
+ "epoch": 15.468571428571428,
+ "grad_norm": 61.94491958618164,
+ "learning_rate": 3.836825396825397e-05,
+ "loss": 0.36,
+ "step": 2707
+ },
+ {
+ "epoch": 15.474285714285715,
+ "grad_norm": 372.0150146484375,
+ "learning_rate": 3.836190476190477e-05,
+ "loss": 0.4366,
+ "step": 2708
+ },
+ {
+ "epoch": 15.48,
+ "grad_norm": 517.32861328125,
+ "learning_rate": 3.8355555555555553e-05,
+ "loss": 0.4392,
+ "step": 2709
+ },
+ {
+ "epoch": 15.485714285714286,
+ "grad_norm": 45.069026947021484,
+ "learning_rate": 3.834920634920635e-05,
+ "loss": 0.3724,
+ "step": 2710
+ },
+ {
+ "epoch": 15.491428571428571,
+ "grad_norm": 43.171451568603516,
+ "learning_rate": 3.8342857142857146e-05,
+ "loss": 0.4524,
+ "step": 2711
+ },
+ {
+ "epoch": 15.497142857142856,
+ "grad_norm": 64.76368713378906,
+ "learning_rate": 3.833650793650794e-05,
+ "loss": 0.4767,
+ "step": 2712
+ },
+ {
+ "epoch": 15.502857142857144,
+ "grad_norm": 47.70409393310547,
+ "learning_rate": 3.833015873015873e-05,
+ "loss": 0.3534,
+ "step": 2713
+ },
+ {
+ "epoch": 15.508571428571429,
+ "grad_norm": 62.24884033203125,
+ "learning_rate": 3.8323809523809523e-05,
+ "loss": 0.3876,
+ "step": 2714
+ },
+ {
+ "epoch": 15.514285714285714,
+ "grad_norm": 46.99068069458008,
+ "learning_rate": 3.8317460317460316e-05,
+ "loss": 0.7459,
+ "step": 2715
+ },
+ {
+ "epoch": 15.52,
+ "grad_norm": 192.08668518066406,
+ "learning_rate": 3.8311111111111115e-05,
+ "loss": 0.3793,
+ "step": 2716
+ },
+ {
+ "epoch": 15.525714285714285,
+ "grad_norm": 45.692264556884766,
+ "learning_rate": 3.83047619047619e-05,
+ "loss": 0.2748,
+ "step": 2717
+ },
+ {
+ "epoch": 15.531428571428572,
+ "grad_norm": 48.88400650024414,
+ "learning_rate": 3.82984126984127e-05,
+ "loss": 0.3597,
+ "step": 2718
+ },
+ {
+ "epoch": 15.537142857142857,
+ "grad_norm": 36.55814743041992,
+ "learning_rate": 3.8292063492063493e-05,
+ "loss": 0.4922,
+ "step": 2719
+ },
+ {
+ "epoch": 15.542857142857143,
+ "grad_norm": 28.92718505859375,
+ "learning_rate": 3.8285714285714286e-05,
+ "loss": 0.5391,
+ "step": 2720
+ },
+ {
+ "epoch": 15.548571428571428,
+ "grad_norm": 34.36704635620117,
+ "learning_rate": 3.827936507936508e-05,
+ "loss": 0.4521,
+ "step": 2721
+ },
+ {
+ "epoch": 15.554285714285715,
+ "grad_norm": 563.7392578125,
+ "learning_rate": 3.827301587301588e-05,
+ "loss": 0.4306,
+ "step": 2722
+ },
+ {
+ "epoch": 15.56,
+ "grad_norm": 194.91851806640625,
+ "learning_rate": 3.8266666666666664e-05,
+ "loss": 0.4852,
+ "step": 2723
+ },
+ {
+ "epoch": 15.565714285714286,
+ "grad_norm": 47.08349609375,
+ "learning_rate": 3.826031746031746e-05,
+ "loss": 0.4089,
+ "step": 2724
+ },
+ {
+ "epoch": 15.571428571428571,
+ "grad_norm": 203.97264099121094,
+ "learning_rate": 3.8253968253968256e-05,
+ "loss": 0.3607,
+ "step": 2725
+ },
+ {
+ "epoch": 15.577142857142857,
+ "grad_norm": 21.412168502807617,
+ "learning_rate": 3.824761904761905e-05,
+ "loss": 0.3815,
+ "step": 2726
+ },
+ {
+ "epoch": 15.582857142857144,
+ "grad_norm": 26.144681930541992,
+ "learning_rate": 3.824126984126984e-05,
+ "loss": 0.4553,
+ "step": 2727
+ },
+ {
+ "epoch": 15.588571428571429,
+ "grad_norm": 39.761959075927734,
+ "learning_rate": 3.8234920634920634e-05,
+ "loss": 0.3699,
+ "step": 2728
+ },
+ {
+ "epoch": 15.594285714285714,
+ "grad_norm": 22.443368911743164,
+ "learning_rate": 3.822857142857143e-05,
+ "loss": 0.3112,
+ "step": 2729
+ },
+ {
+ "epoch": 15.6,
+ "grad_norm": 40.048221588134766,
+ "learning_rate": 3.8222222222222226e-05,
+ "loss": 0.3681,
+ "step": 2730
+ },
+ {
+ "epoch": 15.605714285714285,
+ "grad_norm": 37.674468994140625,
+ "learning_rate": 3.821587301587302e-05,
+ "loss": 0.3342,
+ "step": 2731
+ },
+ {
+ "epoch": 15.611428571428572,
+ "grad_norm": 33.92021560668945,
+ "learning_rate": 3.820952380952381e-05,
+ "loss": 0.583,
+ "step": 2732
+ },
+ {
+ "epoch": 15.617142857142857,
+ "grad_norm": 64.13064575195312,
+ "learning_rate": 3.820317460317461e-05,
+ "loss": 0.5907,
+ "step": 2733
+ },
+ {
+ "epoch": 15.622857142857143,
+ "grad_norm": 39.63349151611328,
+ "learning_rate": 3.8196825396825396e-05,
+ "loss": 0.4675,
+ "step": 2734
+ },
+ {
+ "epoch": 15.628571428571428,
+ "grad_norm": 43.180381774902344,
+ "learning_rate": 3.8190476190476196e-05,
+ "loss": 0.3661,
+ "step": 2735
+ },
+ {
+ "epoch": 15.634285714285713,
+ "grad_norm": 31.936859130859375,
+ "learning_rate": 3.818412698412698e-05,
+ "loss": 0.3476,
+ "step": 2736
+ },
+ {
+ "epoch": 15.64,
+ "grad_norm": 55.57992935180664,
+ "learning_rate": 3.817777777777778e-05,
+ "loss": 0.3881,
+ "step": 2737
+ },
+ {
+ "epoch": 15.645714285714286,
+ "grad_norm": 33.439857482910156,
+ "learning_rate": 3.8171428571428574e-05,
+ "loss": 0.3601,
+ "step": 2738
+ },
+ {
+ "epoch": 15.651428571428571,
+ "grad_norm": 30.789459228515625,
+ "learning_rate": 3.8165079365079366e-05,
+ "loss": 0.4528,
+ "step": 2739
+ },
+ {
+ "epoch": 15.657142857142857,
+ "grad_norm": 41.949378967285156,
+ "learning_rate": 3.815873015873016e-05,
+ "loss": 0.5464,
+ "step": 2740
+ },
+ {
+ "epoch": 15.662857142857142,
+ "grad_norm": 28.893184661865234,
+ "learning_rate": 3.815238095238096e-05,
+ "loss": 0.3825,
+ "step": 2741
+ },
+ {
+ "epoch": 15.668571428571429,
+ "grad_norm": 79.07247161865234,
+ "learning_rate": 3.8146031746031744e-05,
+ "loss": 0.617,
+ "step": 2742
+ },
+ {
+ "epoch": 15.674285714285714,
+ "grad_norm": 30.040111541748047,
+ "learning_rate": 3.8139682539682544e-05,
+ "loss": 0.4614,
+ "step": 2743
+ },
+ {
+ "epoch": 15.68,
+ "grad_norm": 71.13114929199219,
+ "learning_rate": 3.8133333333333336e-05,
+ "loss": 0.4146,
+ "step": 2744
+ },
+ {
+ "epoch": 15.685714285714285,
+ "grad_norm": 40.686546325683594,
+ "learning_rate": 3.812698412698413e-05,
+ "loss": 0.3792,
+ "step": 2745
+ },
+ {
+ "epoch": 15.691428571428572,
+ "grad_norm": 62.28633499145508,
+ "learning_rate": 3.812063492063492e-05,
+ "loss": 0.5746,
+ "step": 2746
+ },
+ {
+ "epoch": 15.697142857142858,
+ "grad_norm": 1480.721435546875,
+ "learning_rate": 3.8114285714285714e-05,
+ "loss": 0.4081,
+ "step": 2747
+ },
+ {
+ "epoch": 15.702857142857143,
+ "grad_norm": 30.005130767822266,
+ "learning_rate": 3.810793650793651e-05,
+ "loss": 0.4535,
+ "step": 2748
+ },
+ {
+ "epoch": 15.708571428571428,
+ "grad_norm": 61.335205078125,
+ "learning_rate": 3.8101587301587306e-05,
+ "loss": 0.5528,
+ "step": 2749
+ },
+ {
+ "epoch": 15.714285714285714,
+ "grad_norm": 44.220176696777344,
+ "learning_rate": 3.809523809523809e-05,
+ "loss": 0.4153,
+ "step": 2750
+ },
+ {
+ "epoch": 15.72,
+ "grad_norm": 55.29134750366211,
+ "learning_rate": 3.808888888888889e-05,
+ "loss": 0.4407,
+ "step": 2751
+ },
+ {
+ "epoch": 15.725714285714286,
+ "grad_norm": 19.705604553222656,
+ "learning_rate": 3.8082539682539684e-05,
+ "loss": 0.3805,
+ "step": 2752
+ },
+ {
+ "epoch": 15.731428571428571,
+ "grad_norm": 51.96522521972656,
+ "learning_rate": 3.807619047619048e-05,
+ "loss": 0.432,
+ "step": 2753
+ },
+ {
+ "epoch": 15.737142857142857,
+ "grad_norm": 71.69025421142578,
+ "learning_rate": 3.806984126984127e-05,
+ "loss": 0.3856,
+ "step": 2754
+ },
+ {
+ "epoch": 15.742857142857144,
+ "grad_norm": 34.365291595458984,
+ "learning_rate": 3.806349206349207e-05,
+ "loss": 0.4562,
+ "step": 2755
+ },
+ {
+ "epoch": 15.748571428571429,
+ "grad_norm": 56.59248352050781,
+ "learning_rate": 3.8057142857142855e-05,
+ "loss": 0.3469,
+ "step": 2756
+ },
+ {
+ "epoch": 15.754285714285714,
+ "grad_norm": 44.549007415771484,
+ "learning_rate": 3.8050793650793654e-05,
+ "loss": 0.5243,
+ "step": 2757
+ },
+ {
+ "epoch": 15.76,
+ "grad_norm": 834.0654296875,
+ "learning_rate": 3.804444444444445e-05,
+ "loss": 0.455,
+ "step": 2758
+ },
+ {
+ "epoch": 15.765714285714285,
+ "grad_norm": 38.67654037475586,
+ "learning_rate": 3.803809523809524e-05,
+ "loss": 0.3957,
+ "step": 2759
+ },
+ {
+ "epoch": 15.771428571428572,
+ "grad_norm": 27.433307647705078,
+ "learning_rate": 3.803174603174603e-05,
+ "loss": 0.6249,
+ "step": 2760
+ },
+ {
+ "epoch": 15.777142857142858,
+ "grad_norm": 55.26726531982422,
+ "learning_rate": 3.8025396825396825e-05,
+ "loss": 0.4457,
+ "step": 2761
+ },
+ {
+ "epoch": 15.782857142857143,
+ "grad_norm": 131.99993896484375,
+ "learning_rate": 3.8019047619047624e-05,
+ "loss": 0.3892,
+ "step": 2762
+ },
+ {
+ "epoch": 15.788571428571428,
+ "grad_norm": 24.48535919189453,
+ "learning_rate": 3.801269841269842e-05,
+ "loss": 0.2672,
+ "step": 2763
+ },
+ {
+ "epoch": 15.794285714285714,
+ "grad_norm": 36.06562423706055,
+ "learning_rate": 3.800634920634921e-05,
+ "loss": 0.3691,
+ "step": 2764
+ },
+ {
+ "epoch": 15.8,
+ "grad_norm": 76.08797454833984,
+ "learning_rate": 3.8e-05,
+ "loss": 0.533,
+ "step": 2765
+ },
+ {
+ "epoch": 15.805714285714286,
+ "grad_norm": 72.1341781616211,
+ "learning_rate": 3.7993650793650795e-05,
+ "loss": 0.4349,
+ "step": 2766
+ },
+ {
+ "epoch": 15.811428571428571,
+ "grad_norm": 57.52500534057617,
+ "learning_rate": 3.798730158730159e-05,
+ "loss": 0.5375,
+ "step": 2767
+ },
+ {
+ "epoch": 15.817142857142857,
+ "grad_norm": 250.50469970703125,
+ "learning_rate": 3.798095238095239e-05,
+ "loss": 0.3379,
+ "step": 2768
+ },
+ {
+ "epoch": 15.822857142857142,
+ "grad_norm": 52.38911437988281,
+ "learning_rate": 3.797460317460317e-05,
+ "loss": 0.4382,
+ "step": 2769
+ },
+ {
+ "epoch": 15.82857142857143,
+ "grad_norm": 33.9091911315918,
+ "learning_rate": 3.796825396825397e-05,
+ "loss": 0.4578,
+ "step": 2770
+ },
+ {
+ "epoch": 15.834285714285715,
+ "grad_norm": 37.47408676147461,
+ "learning_rate": 3.7961904761904765e-05,
+ "loss": 0.5218,
+ "step": 2771
+ },
+ {
+ "epoch": 15.84,
+ "grad_norm": 93.31388854980469,
+ "learning_rate": 3.795555555555556e-05,
+ "loss": 0.5025,
+ "step": 2772
+ },
+ {
+ "epoch": 15.845714285714285,
+ "grad_norm": 594.1710205078125,
+ "learning_rate": 3.794920634920635e-05,
+ "loss": 0.3977,
+ "step": 2773
+ },
+ {
+ "epoch": 15.85142857142857,
+ "grad_norm": 420.21630859375,
+ "learning_rate": 3.794285714285715e-05,
+ "loss": 0.4002,
+ "step": 2774
+ },
+ {
+ "epoch": 15.857142857142858,
+ "grad_norm": 56.7486572265625,
+ "learning_rate": 3.7936507936507935e-05,
+ "loss": 0.3438,
+ "step": 2775
+ },
+ {
+ "epoch": 15.862857142857143,
+ "grad_norm": 72.90874481201172,
+ "learning_rate": 3.7930158730158735e-05,
+ "loss": 0.3971,
+ "step": 2776
+ },
+ {
+ "epoch": 15.868571428571428,
+ "grad_norm": 54.37617874145508,
+ "learning_rate": 3.792380952380953e-05,
+ "loss": 0.5038,
+ "step": 2777
+ },
+ {
+ "epoch": 15.874285714285714,
+ "grad_norm": 27.699661254882812,
+ "learning_rate": 3.791746031746032e-05,
+ "loss": 0.7789,
+ "step": 2778
+ },
+ {
+ "epoch": 15.88,
+ "grad_norm": 39.549964904785156,
+ "learning_rate": 3.791111111111111e-05,
+ "loss": 0.3185,
+ "step": 2779
+ },
+ {
+ "epoch": 15.885714285714286,
+ "grad_norm": 66.69050598144531,
+ "learning_rate": 3.7904761904761905e-05,
+ "loss": 0.4035,
+ "step": 2780
+ },
+ {
+ "epoch": 15.891428571428571,
+ "grad_norm": 27.588712692260742,
+ "learning_rate": 3.78984126984127e-05,
+ "loss": 0.3815,
+ "step": 2781
+ },
+ {
+ "epoch": 15.897142857142857,
+ "grad_norm": 66.0377197265625,
+ "learning_rate": 3.78920634920635e-05,
+ "loss": 0.5724,
+ "step": 2782
+ },
+ {
+ "epoch": 15.902857142857142,
+ "grad_norm": 53.48280715942383,
+ "learning_rate": 3.788571428571428e-05,
+ "loss": 0.2976,
+ "step": 2783
+ },
+ {
+ "epoch": 15.90857142857143,
+ "grad_norm": 182.84542846679688,
+ "learning_rate": 3.787936507936508e-05,
+ "loss": 0.483,
+ "step": 2784
+ },
+ {
+ "epoch": 15.914285714285715,
+ "grad_norm": 42.55234146118164,
+ "learning_rate": 3.7873015873015875e-05,
+ "loss": 0.5256,
+ "step": 2785
+ },
+ {
+ "epoch": 15.92,
+ "grad_norm": 54.200740814208984,
+ "learning_rate": 3.786666666666667e-05,
+ "loss": 0.5245,
+ "step": 2786
+ },
+ {
+ "epoch": 15.925714285714285,
+ "grad_norm": 43.21343231201172,
+ "learning_rate": 3.786031746031746e-05,
+ "loss": 0.485,
+ "step": 2787
+ },
+ {
+ "epoch": 15.93142857142857,
+ "grad_norm": 71.44453430175781,
+ "learning_rate": 3.785396825396826e-05,
+ "loss": 0.4702,
+ "step": 2788
+ },
+ {
+ "epoch": 15.937142857142858,
+ "grad_norm": 25.477115631103516,
+ "learning_rate": 3.7847619047619046e-05,
+ "loss": 0.754,
+ "step": 2789
+ },
+ {
+ "epoch": 15.942857142857143,
+ "grad_norm": 35.87270736694336,
+ "learning_rate": 3.7841269841269845e-05,
+ "loss": 0.417,
+ "step": 2790
+ },
+ {
+ "epoch": 15.948571428571428,
+ "grad_norm": 31.33983039855957,
+ "learning_rate": 3.783492063492063e-05,
+ "loss": 0.5738,
+ "step": 2791
+ },
+ {
+ "epoch": 15.954285714285714,
+ "grad_norm": 37.12321853637695,
+ "learning_rate": 3.782857142857143e-05,
+ "loss": 0.5188,
+ "step": 2792
+ },
+ {
+ "epoch": 15.96,
+ "grad_norm": 41.17721939086914,
+ "learning_rate": 3.782222222222222e-05,
+ "loss": 0.433,
+ "step": 2793
+ },
+ {
+ "epoch": 15.965714285714286,
+ "grad_norm": 68.94007110595703,
+ "learning_rate": 3.7815873015873016e-05,
+ "loss": 0.2966,
+ "step": 2794
+ },
+ {
+ "epoch": 15.971428571428572,
+ "grad_norm": 747.8803100585938,
+ "learning_rate": 3.780952380952381e-05,
+ "loss": 0.3319,
+ "step": 2795
+ },
+ {
+ "epoch": 15.977142857142857,
+ "grad_norm": 41.70159149169922,
+ "learning_rate": 3.780317460317461e-05,
+ "loss": 0.584,
+ "step": 2796
+ },
+ {
+ "epoch": 15.982857142857142,
+ "grad_norm": 174.68283081054688,
+ "learning_rate": 3.77968253968254e-05,
+ "loss": 0.4363,
+ "step": 2797
+ },
+ {
+ "epoch": 15.98857142857143,
+ "grad_norm": 32.647674560546875,
+ "learning_rate": 3.779047619047619e-05,
+ "loss": 0.4207,
+ "step": 2798
+ },
+ {
+ "epoch": 15.994285714285715,
+ "grad_norm": 23.21664810180664,
+ "learning_rate": 3.7784126984126986e-05,
+ "loss": 0.3731,
+ "step": 2799
+ },
+ {
+ "epoch": 16.0,
+ "grad_norm": 34.9289436340332,
+ "learning_rate": 3.777777777777778e-05,
+ "loss": 0.5294,
+ "step": 2800
+ },
+ {
+ "epoch": 16.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6346626877784729,
+ "eval_map": 0.9101,
+ "eval_map_50": 0.9563,
+ "eval_map_75": 0.9431,
+ "eval_map_large": 0.9104,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9101,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7705,
+ "eval_mar_10": 0.9676,
+ "eval_mar_100": 0.9784,
+ "eval_mar_100_per_class": 0.9784,
+ "eval_mar_large": 0.9784,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 14.2732,
+ "eval_samples_per_second": 20.598,
+ "eval_steps_per_second": 2.592,
+ "step": 2800
+ },
+ {
+ "epoch": 16.005714285714287,
+ "grad_norm": 70.0713119506836,
+ "learning_rate": 3.777142857142858e-05,
+ "loss": 0.3912,
+ "step": 2801
+ },
+ {
+ "epoch": 16.01142857142857,
+ "grad_norm": 164.6759033203125,
+ "learning_rate": 3.7765079365079364e-05,
+ "loss": 0.4858,
+ "step": 2802
+ },
+ {
+ "epoch": 16.017142857142858,
+ "grad_norm": 25.241962432861328,
+ "learning_rate": 3.775873015873016e-05,
+ "loss": 0.3923,
+ "step": 2803
+ },
+ {
+ "epoch": 16.02285714285714,
+ "grad_norm": 41.69196701049805,
+ "learning_rate": 3.7752380952380956e-05,
+ "loss": 0.4699,
+ "step": 2804
+ },
+ {
+ "epoch": 16.02857142857143,
+ "grad_norm": 58.33809280395508,
+ "learning_rate": 3.774603174603175e-05,
+ "loss": 0.3888,
+ "step": 2805
+ },
+ {
+ "epoch": 16.034285714285716,
+ "grad_norm": 31.243627548217773,
+ "learning_rate": 3.773968253968254e-05,
+ "loss": 0.524,
+ "step": 2806
+ },
+ {
+ "epoch": 16.04,
+ "grad_norm": 87.03840637207031,
+ "learning_rate": 3.773333333333334e-05,
+ "loss": 0.4914,
+ "step": 2807
+ },
+ {
+ "epoch": 16.045714285714286,
+ "grad_norm": 22.526199340820312,
+ "learning_rate": 3.7726984126984126e-05,
+ "loss": 0.3523,
+ "step": 2808
+ },
+ {
+ "epoch": 16.05142857142857,
+ "grad_norm": 45.92203903198242,
+ "learning_rate": 3.7720634920634926e-05,
+ "loss": 0.4451,
+ "step": 2809
+ },
+ {
+ "epoch": 16.057142857142857,
+ "grad_norm": 36.594539642333984,
+ "learning_rate": 3.771428571428572e-05,
+ "loss": 0.4803,
+ "step": 2810
+ },
+ {
+ "epoch": 16.062857142857144,
+ "grad_norm": 28.845773696899414,
+ "learning_rate": 3.770793650793651e-05,
+ "loss": 0.343,
+ "step": 2811
+ },
+ {
+ "epoch": 16.068571428571428,
+ "grad_norm": 58.28946304321289,
+ "learning_rate": 3.7701587301587303e-05,
+ "loss": 0.569,
+ "step": 2812
+ },
+ {
+ "epoch": 16.074285714285715,
+ "grad_norm": 48.23463439941406,
+ "learning_rate": 3.7695238095238096e-05,
+ "loss": 0.4001,
+ "step": 2813
+ },
+ {
+ "epoch": 16.08,
+ "grad_norm": 153.3620147705078,
+ "learning_rate": 3.768888888888889e-05,
+ "loss": 0.4701,
+ "step": 2814
+ },
+ {
+ "epoch": 16.085714285714285,
+ "grad_norm": 393.6240539550781,
+ "learning_rate": 3.768253968253969e-05,
+ "loss": 0.4897,
+ "step": 2815
+ },
+ {
+ "epoch": 16.091428571428573,
+ "grad_norm": 63.71856689453125,
+ "learning_rate": 3.7676190476190474e-05,
+ "loss": 0.6476,
+ "step": 2816
+ },
+ {
+ "epoch": 16.097142857142856,
+ "grad_norm": 61.07762145996094,
+ "learning_rate": 3.766984126984127e-05,
+ "loss": 0.392,
+ "step": 2817
+ },
+ {
+ "epoch": 16.102857142857143,
+ "grad_norm": 38.67390441894531,
+ "learning_rate": 3.7663492063492066e-05,
+ "loss": 0.4281,
+ "step": 2818
+ },
+ {
+ "epoch": 16.10857142857143,
+ "grad_norm": 43.751853942871094,
+ "learning_rate": 3.765714285714286e-05,
+ "loss": 0.3907,
+ "step": 2819
+ },
+ {
+ "epoch": 16.114285714285714,
+ "grad_norm": 43.01502990722656,
+ "learning_rate": 3.765079365079365e-05,
+ "loss": 0.334,
+ "step": 2820
+ },
+ {
+ "epoch": 16.12,
+ "grad_norm": 56.10337829589844,
+ "learning_rate": 3.764444444444445e-05,
+ "loss": 0.3067,
+ "step": 2821
+ },
+ {
+ "epoch": 16.125714285714285,
+ "grad_norm": 19.618566513061523,
+ "learning_rate": 3.7638095238095237e-05,
+ "loss": 0.3388,
+ "step": 2822
+ },
+ {
+ "epoch": 16.13142857142857,
+ "grad_norm": 65.33924865722656,
+ "learning_rate": 3.7631746031746036e-05,
+ "loss": 0.4639,
+ "step": 2823
+ },
+ {
+ "epoch": 16.13714285714286,
+ "grad_norm": 44.41948699951172,
+ "learning_rate": 3.762539682539682e-05,
+ "loss": 0.3743,
+ "step": 2824
+ },
+ {
+ "epoch": 16.142857142857142,
+ "grad_norm": 45.491249084472656,
+ "learning_rate": 3.761904761904762e-05,
+ "loss": 0.3742,
+ "step": 2825
+ },
+ {
+ "epoch": 16.14857142857143,
+ "grad_norm": 17.892271041870117,
+ "learning_rate": 3.7612698412698414e-05,
+ "loss": 0.3034,
+ "step": 2826
+ },
+ {
+ "epoch": 16.154285714285713,
+ "grad_norm": 52.51234436035156,
+ "learning_rate": 3.7606349206349207e-05,
+ "loss": 0.5413,
+ "step": 2827
+ },
+ {
+ "epoch": 16.16,
+ "grad_norm": 36.29309844970703,
+ "learning_rate": 3.76e-05,
+ "loss": 0.5266,
+ "step": 2828
+ },
+ {
+ "epoch": 16.165714285714287,
+ "grad_norm": 369.3990783691406,
+ "learning_rate": 3.75936507936508e-05,
+ "loss": 0.3304,
+ "step": 2829
+ },
+ {
+ "epoch": 16.17142857142857,
+ "grad_norm": 43.918094635009766,
+ "learning_rate": 3.7587301587301584e-05,
+ "loss": 0.3597,
+ "step": 2830
+ },
+ {
+ "epoch": 16.177142857142858,
+ "grad_norm": 42.64336395263672,
+ "learning_rate": 3.7580952380952384e-05,
+ "loss": 0.4644,
+ "step": 2831
+ },
+ {
+ "epoch": 16.18285714285714,
+ "grad_norm": 54.486289978027344,
+ "learning_rate": 3.7574603174603176e-05,
+ "loss": 0.4097,
+ "step": 2832
+ },
+ {
+ "epoch": 16.18857142857143,
+ "grad_norm": 43.048362731933594,
+ "learning_rate": 3.756825396825397e-05,
+ "loss": 0.403,
+ "step": 2833
+ },
+ {
+ "epoch": 16.194285714285716,
+ "grad_norm": 85.92316436767578,
+ "learning_rate": 3.756190476190476e-05,
+ "loss": 0.5137,
+ "step": 2834
+ },
+ {
+ "epoch": 16.2,
+ "grad_norm": 33.11586380004883,
+ "learning_rate": 3.7555555555555554e-05,
+ "loss": 0.284,
+ "step": 2835
+ },
+ {
+ "epoch": 16.205714285714286,
+ "grad_norm": 48.35753631591797,
+ "learning_rate": 3.7549206349206354e-05,
+ "loss": 0.5367,
+ "step": 2836
+ },
+ {
+ "epoch": 16.21142857142857,
+ "grad_norm": 41.10136413574219,
+ "learning_rate": 3.7542857142857146e-05,
+ "loss": 0.6013,
+ "step": 2837
+ },
+ {
+ "epoch": 16.217142857142857,
+ "grad_norm": 30.050140380859375,
+ "learning_rate": 3.753650793650794e-05,
+ "loss": 0.453,
+ "step": 2838
+ },
+ {
+ "epoch": 16.222857142857144,
+ "grad_norm": 70.18745422363281,
+ "learning_rate": 3.753015873015873e-05,
+ "loss": 0.4409,
+ "step": 2839
+ },
+ {
+ "epoch": 16.228571428571428,
+ "grad_norm": 42.98176193237305,
+ "learning_rate": 3.752380952380953e-05,
+ "loss": 0.4177,
+ "step": 2840
+ },
+ {
+ "epoch": 16.234285714285715,
+ "grad_norm": 54.87812042236328,
+ "learning_rate": 3.751746031746032e-05,
+ "loss": 0.4733,
+ "step": 2841
+ },
+ {
+ "epoch": 16.24,
+ "grad_norm": 57.734134674072266,
+ "learning_rate": 3.7511111111111116e-05,
+ "loss": 0.5491,
+ "step": 2842
+ },
+ {
+ "epoch": 16.245714285714286,
+ "grad_norm": 26.532094955444336,
+ "learning_rate": 3.750476190476191e-05,
+ "loss": 0.4879,
+ "step": 2843
+ },
+ {
+ "epoch": 16.251428571428573,
+ "grad_norm": 44.592567443847656,
+ "learning_rate": 3.74984126984127e-05,
+ "loss": 0.4672,
+ "step": 2844
+ },
+ {
+ "epoch": 16.257142857142856,
+ "grad_norm": 16.300241470336914,
+ "learning_rate": 3.7492063492063494e-05,
+ "loss": 0.4049,
+ "step": 2845
+ },
+ {
+ "epoch": 16.262857142857143,
+ "grad_norm": 81.98695373535156,
+ "learning_rate": 3.748571428571429e-05,
+ "loss": 0.4012,
+ "step": 2846
+ },
+ {
+ "epoch": 16.268571428571427,
+ "grad_norm": 209.06448364257812,
+ "learning_rate": 3.747936507936508e-05,
+ "loss": 0.3762,
+ "step": 2847
+ },
+ {
+ "epoch": 16.274285714285714,
+ "grad_norm": 53.82666778564453,
+ "learning_rate": 3.747301587301588e-05,
+ "loss": 0.3892,
+ "step": 2848
+ },
+ {
+ "epoch": 16.28,
+ "grad_norm": 53.240901947021484,
+ "learning_rate": 3.7466666666666665e-05,
+ "loss": 0.3839,
+ "step": 2849
+ },
+ {
+ "epoch": 16.285714285714285,
+ "grad_norm": 571.2046508789062,
+ "learning_rate": 3.7460317460317464e-05,
+ "loss": 0.4161,
+ "step": 2850
+ },
+ {
+ "epoch": 16.291428571428572,
+ "grad_norm": 135.78126525878906,
+ "learning_rate": 3.745396825396826e-05,
+ "loss": 0.46,
+ "step": 2851
+ },
+ {
+ "epoch": 16.29714285714286,
+ "grad_norm": 29.33208656311035,
+ "learning_rate": 3.744761904761905e-05,
+ "loss": 0.5425,
+ "step": 2852
+ },
+ {
+ "epoch": 16.302857142857142,
+ "grad_norm": 84.61248779296875,
+ "learning_rate": 3.744126984126984e-05,
+ "loss": 0.4112,
+ "step": 2853
+ },
+ {
+ "epoch": 16.30857142857143,
+ "grad_norm": 49.917423248291016,
+ "learning_rate": 3.743492063492064e-05,
+ "loss": 0.4645,
+ "step": 2854
+ },
+ {
+ "epoch": 16.314285714285713,
+ "grad_norm": 228.82005310058594,
+ "learning_rate": 3.742857142857143e-05,
+ "loss": 0.9195,
+ "step": 2855
+ },
+ {
+ "epoch": 16.32,
+ "grad_norm": 64.351318359375,
+ "learning_rate": 3.742222222222223e-05,
+ "loss": 0.2927,
+ "step": 2856
+ },
+ {
+ "epoch": 16.325714285714287,
+ "grad_norm": 68.40682220458984,
+ "learning_rate": 3.741587301587301e-05,
+ "loss": 0.4447,
+ "step": 2857
+ },
+ {
+ "epoch": 16.33142857142857,
+ "grad_norm": 30.027559280395508,
+ "learning_rate": 3.740952380952381e-05,
+ "loss": 0.5933,
+ "step": 2858
+ },
+ {
+ "epoch": 16.337142857142858,
+ "grad_norm": 62.45819091796875,
+ "learning_rate": 3.7403174603174605e-05,
+ "loss": 0.6428,
+ "step": 2859
+ },
+ {
+ "epoch": 16.34285714285714,
+ "grad_norm": 43.999488830566406,
+ "learning_rate": 3.73968253968254e-05,
+ "loss": 0.525,
+ "step": 2860
+ },
+ {
+ "epoch": 16.34857142857143,
+ "grad_norm": 43.05434799194336,
+ "learning_rate": 3.739047619047619e-05,
+ "loss": 0.3183,
+ "step": 2861
+ },
+ {
+ "epoch": 16.354285714285716,
+ "grad_norm": 44.31922912597656,
+ "learning_rate": 3.738412698412699e-05,
+ "loss": 0.6758,
+ "step": 2862
+ },
+ {
+ "epoch": 16.36,
+ "grad_norm": 50.412818908691406,
+ "learning_rate": 3.7377777777777775e-05,
+ "loss": 0.8291,
+ "step": 2863
+ },
+ {
+ "epoch": 16.365714285714287,
+ "grad_norm": 29.43809700012207,
+ "learning_rate": 3.7371428571428575e-05,
+ "loss": 0.5251,
+ "step": 2864
+ },
+ {
+ "epoch": 16.37142857142857,
+ "grad_norm": 30.69464874267578,
+ "learning_rate": 3.736507936507937e-05,
+ "loss": 0.3024,
+ "step": 2865
+ },
+ {
+ "epoch": 16.377142857142857,
+ "grad_norm": 77.88601684570312,
+ "learning_rate": 3.735873015873016e-05,
+ "loss": 0.4164,
+ "step": 2866
+ },
+ {
+ "epoch": 16.382857142857144,
+ "grad_norm": 47.551692962646484,
+ "learning_rate": 3.735238095238095e-05,
+ "loss": 0.4054,
+ "step": 2867
+ },
+ {
+ "epoch": 16.388571428571428,
+ "grad_norm": 44.56904602050781,
+ "learning_rate": 3.7346031746031745e-05,
+ "loss": 0.4226,
+ "step": 2868
+ },
+ {
+ "epoch": 16.394285714285715,
+ "grad_norm": 47.03104019165039,
+ "learning_rate": 3.733968253968254e-05,
+ "loss": 0.2749,
+ "step": 2869
+ },
+ {
+ "epoch": 16.4,
+ "grad_norm": 22.48953628540039,
+ "learning_rate": 3.733333333333334e-05,
+ "loss": 0.3491,
+ "step": 2870
+ },
+ {
+ "epoch": 16.405714285714286,
+ "grad_norm": 30.08992576599121,
+ "learning_rate": 3.732698412698413e-05,
+ "loss": 0.3032,
+ "step": 2871
+ },
+ {
+ "epoch": 16.411428571428573,
+ "grad_norm": 45.1522102355957,
+ "learning_rate": 3.732063492063492e-05,
+ "loss": 0.5812,
+ "step": 2872
+ },
+ {
+ "epoch": 16.417142857142856,
+ "grad_norm": 132.7905731201172,
+ "learning_rate": 3.7314285714285715e-05,
+ "loss": 0.4708,
+ "step": 2873
+ },
+ {
+ "epoch": 16.422857142857143,
+ "grad_norm": 49.749820709228516,
+ "learning_rate": 3.730793650793651e-05,
+ "loss": 0.4062,
+ "step": 2874
+ },
+ {
+ "epoch": 16.428571428571427,
+ "grad_norm": 28.60721778869629,
+ "learning_rate": 3.730158730158731e-05,
+ "loss": 0.3039,
+ "step": 2875
+ },
+ {
+ "epoch": 16.434285714285714,
+ "grad_norm": 88.1325912475586,
+ "learning_rate": 3.72952380952381e-05,
+ "loss": 0.4934,
+ "step": 2876
+ },
+ {
+ "epoch": 16.44,
+ "grad_norm": 30.96939468383789,
+ "learning_rate": 3.728888888888889e-05,
+ "loss": 0.4601,
+ "step": 2877
+ },
+ {
+ "epoch": 16.445714285714285,
+ "grad_norm": 28.20155906677246,
+ "learning_rate": 3.7282539682539685e-05,
+ "loss": 0.4153,
+ "step": 2878
+ },
+ {
+ "epoch": 16.451428571428572,
+ "grad_norm": 21.22536277770996,
+ "learning_rate": 3.727619047619048e-05,
+ "loss": 0.3379,
+ "step": 2879
+ },
+ {
+ "epoch": 16.457142857142856,
+ "grad_norm": 54.106266021728516,
+ "learning_rate": 3.726984126984127e-05,
+ "loss": 0.3259,
+ "step": 2880
+ },
+ {
+ "epoch": 16.462857142857143,
+ "grad_norm": 38.92586135864258,
+ "learning_rate": 3.726349206349207e-05,
+ "loss": 0.4354,
+ "step": 2881
+ },
+ {
+ "epoch": 16.46857142857143,
+ "grad_norm": 116.48697662353516,
+ "learning_rate": 3.7257142857142856e-05,
+ "loss": 0.3147,
+ "step": 2882
+ },
+ {
+ "epoch": 16.474285714285713,
+ "grad_norm": 49.13833999633789,
+ "learning_rate": 3.7250793650793655e-05,
+ "loss": 0.3508,
+ "step": 2883
+ },
+ {
+ "epoch": 16.48,
+ "grad_norm": 22.998550415039062,
+ "learning_rate": 3.724444444444445e-05,
+ "loss": 0.4516,
+ "step": 2884
+ },
+ {
+ "epoch": 16.485714285714284,
+ "grad_norm": 29.565940856933594,
+ "learning_rate": 3.723809523809524e-05,
+ "loss": 0.3818,
+ "step": 2885
+ },
+ {
+ "epoch": 16.49142857142857,
+ "grad_norm": 52.65448760986328,
+ "learning_rate": 3.723174603174603e-05,
+ "loss": 0.4555,
+ "step": 2886
+ },
+ {
+ "epoch": 16.497142857142858,
+ "grad_norm": 29.000492095947266,
+ "learning_rate": 3.722539682539683e-05,
+ "loss": 0.6371,
+ "step": 2887
+ },
+ {
+ "epoch": 16.502857142857142,
+ "grad_norm": 35.8780403137207,
+ "learning_rate": 3.721904761904762e-05,
+ "loss": 0.4364,
+ "step": 2888
+ },
+ {
+ "epoch": 16.50857142857143,
+ "grad_norm": 48.73274612426758,
+ "learning_rate": 3.721269841269842e-05,
+ "loss": 0.3026,
+ "step": 2889
+ },
+ {
+ "epoch": 16.514285714285712,
+ "grad_norm": 25.079204559326172,
+ "learning_rate": 3.7206349206349204e-05,
+ "loss": 0.4094,
+ "step": 2890
+ },
+ {
+ "epoch": 16.52,
+ "grad_norm": 126.92987823486328,
+ "learning_rate": 3.72e-05,
+ "loss": 0.3633,
+ "step": 2891
+ },
+ {
+ "epoch": 16.525714285714287,
+ "grad_norm": 37.80255126953125,
+ "learning_rate": 3.7193650793650796e-05,
+ "loss": 0.3065,
+ "step": 2892
+ },
+ {
+ "epoch": 16.53142857142857,
+ "grad_norm": 23.101350784301758,
+ "learning_rate": 3.718730158730159e-05,
+ "loss": 0.5256,
+ "step": 2893
+ },
+ {
+ "epoch": 16.537142857142857,
+ "grad_norm": 15.055671691894531,
+ "learning_rate": 3.718095238095238e-05,
+ "loss": 0.2747,
+ "step": 2894
+ },
+ {
+ "epoch": 16.542857142857144,
+ "grad_norm": 20.80678367614746,
+ "learning_rate": 3.717460317460318e-05,
+ "loss": 0.4808,
+ "step": 2895
+ },
+ {
+ "epoch": 16.548571428571428,
+ "grad_norm": 64.26889038085938,
+ "learning_rate": 3.7168253968253966e-05,
+ "loss": 0.4706,
+ "step": 2896
+ },
+ {
+ "epoch": 16.554285714285715,
+ "grad_norm": 42.92035675048828,
+ "learning_rate": 3.7161904761904766e-05,
+ "loss": 0.4106,
+ "step": 2897
+ },
+ {
+ "epoch": 16.56,
+ "grad_norm": 103.74580383300781,
+ "learning_rate": 3.715555555555555e-05,
+ "loss": 0.4485,
+ "step": 2898
+ },
+ {
+ "epoch": 16.565714285714286,
+ "grad_norm": 32.300621032714844,
+ "learning_rate": 3.714920634920635e-05,
+ "loss": 0.3642,
+ "step": 2899
+ },
+ {
+ "epoch": 16.571428571428573,
+ "grad_norm": 71.21755981445312,
+ "learning_rate": 3.7142857142857143e-05,
+ "loss": 0.3378,
+ "step": 2900
+ },
+ {
+ "epoch": 16.577142857142857,
+ "grad_norm": 58.02399826049805,
+ "learning_rate": 3.7136507936507936e-05,
+ "loss": 0.4404,
+ "step": 2901
+ },
+ {
+ "epoch": 16.582857142857144,
+ "grad_norm": 34.18932342529297,
+ "learning_rate": 3.713015873015873e-05,
+ "loss": 0.289,
+ "step": 2902
+ },
+ {
+ "epoch": 16.588571428571427,
+ "grad_norm": 25.37967300415039,
+ "learning_rate": 3.712380952380953e-05,
+ "loss": 0.3399,
+ "step": 2903
+ },
+ {
+ "epoch": 16.594285714285714,
+ "grad_norm": 31.51453399658203,
+ "learning_rate": 3.711746031746032e-05,
+ "loss": 0.38,
+ "step": 2904
+ },
+ {
+ "epoch": 16.6,
+ "grad_norm": 87.80889892578125,
+ "learning_rate": 3.7111111111111113e-05,
+ "loss": 0.4182,
+ "step": 2905
+ },
+ {
+ "epoch": 16.605714285714285,
+ "grad_norm": 48.856910705566406,
+ "learning_rate": 3.7104761904761906e-05,
+ "loss": 0.5091,
+ "step": 2906
+ },
+ {
+ "epoch": 16.611428571428572,
+ "grad_norm": 47.48170471191406,
+ "learning_rate": 3.70984126984127e-05,
+ "loss": 0.4715,
+ "step": 2907
+ },
+ {
+ "epoch": 16.617142857142856,
+ "grad_norm": 68.18540954589844,
+ "learning_rate": 3.709206349206349e-05,
+ "loss": 0.3001,
+ "step": 2908
+ },
+ {
+ "epoch": 16.622857142857143,
+ "grad_norm": 74.32144165039062,
+ "learning_rate": 3.7085714285714284e-05,
+ "loss": 0.3279,
+ "step": 2909
+ },
+ {
+ "epoch": 16.62857142857143,
+ "grad_norm": 46.929603576660156,
+ "learning_rate": 3.707936507936508e-05,
+ "loss": 0.4057,
+ "step": 2910
+ },
+ {
+ "epoch": 16.634285714285713,
+ "grad_norm": 138.69009399414062,
+ "learning_rate": 3.7073015873015876e-05,
+ "loss": 0.4818,
+ "step": 2911
+ },
+ {
+ "epoch": 16.64,
+ "grad_norm": 26.396936416625977,
+ "learning_rate": 3.706666666666667e-05,
+ "loss": 0.3299,
+ "step": 2912
+ },
+ {
+ "epoch": 16.645714285714284,
+ "grad_norm": 55.15882110595703,
+ "learning_rate": 3.706031746031746e-05,
+ "loss": 0.2829,
+ "step": 2913
+ },
+ {
+ "epoch": 16.65142857142857,
+ "grad_norm": 46.559879302978516,
+ "learning_rate": 3.705396825396826e-05,
+ "loss": 0.2844,
+ "step": 2914
+ },
+ {
+ "epoch": 16.65714285714286,
+ "grad_norm": 48.5512809753418,
+ "learning_rate": 3.7047619047619047e-05,
+ "loss": 0.3257,
+ "step": 2915
+ },
+ {
+ "epoch": 16.662857142857142,
+ "grad_norm": 30.01243019104004,
+ "learning_rate": 3.7041269841269846e-05,
+ "loss": 0.4041,
+ "step": 2916
+ },
+ {
+ "epoch": 16.66857142857143,
+ "grad_norm": 39.398494720458984,
+ "learning_rate": 3.703492063492064e-05,
+ "loss": 0.358,
+ "step": 2917
+ },
+ {
+ "epoch": 16.674285714285713,
+ "grad_norm": 26.072412490844727,
+ "learning_rate": 3.702857142857143e-05,
+ "loss": 0.5225,
+ "step": 2918
+ },
+ {
+ "epoch": 16.68,
+ "grad_norm": 46.15665817260742,
+ "learning_rate": 3.7022222222222224e-05,
+ "loss": 0.3997,
+ "step": 2919
+ },
+ {
+ "epoch": 16.685714285714287,
+ "grad_norm": 34.84221649169922,
+ "learning_rate": 3.7015873015873017e-05,
+ "loss": 0.3285,
+ "step": 2920
+ },
+ {
+ "epoch": 16.69142857142857,
+ "grad_norm": 174.21360778808594,
+ "learning_rate": 3.700952380952381e-05,
+ "loss": 0.3407,
+ "step": 2921
+ },
+ {
+ "epoch": 16.697142857142858,
+ "grad_norm": 75.02750396728516,
+ "learning_rate": 3.700317460317461e-05,
+ "loss": 0.3567,
+ "step": 2922
+ },
+ {
+ "epoch": 16.70285714285714,
+ "grad_norm": 30.636423110961914,
+ "learning_rate": 3.6996825396825394e-05,
+ "loss": 0.3554,
+ "step": 2923
+ },
+ {
+ "epoch": 16.708571428571428,
+ "grad_norm": 56.132301330566406,
+ "learning_rate": 3.6990476190476194e-05,
+ "loss": 0.3686,
+ "step": 2924
+ },
+ {
+ "epoch": 16.714285714285715,
+ "grad_norm": 37.22405242919922,
+ "learning_rate": 3.6984126984126986e-05,
+ "loss": 0.4488,
+ "step": 2925
+ },
+ {
+ "epoch": 16.72,
+ "grad_norm": 35.3524169921875,
+ "learning_rate": 3.697777777777778e-05,
+ "loss": 0.361,
+ "step": 2926
+ },
+ {
+ "epoch": 16.725714285714286,
+ "grad_norm": 39.20980453491211,
+ "learning_rate": 3.697142857142857e-05,
+ "loss": 0.3084,
+ "step": 2927
+ },
+ {
+ "epoch": 16.731428571428573,
+ "grad_norm": 38.22840881347656,
+ "learning_rate": 3.696507936507937e-05,
+ "loss": 0.2693,
+ "step": 2928
+ },
+ {
+ "epoch": 16.737142857142857,
+ "grad_norm": 65.73733520507812,
+ "learning_rate": 3.695873015873016e-05,
+ "loss": 0.4157,
+ "step": 2929
+ },
+ {
+ "epoch": 16.742857142857144,
+ "grad_norm": 18.452590942382812,
+ "learning_rate": 3.6952380952380956e-05,
+ "loss": 0.2219,
+ "step": 2930
+ },
+ {
+ "epoch": 16.748571428571427,
+ "grad_norm": 298.76788330078125,
+ "learning_rate": 3.694603174603174e-05,
+ "loss": 0.3322,
+ "step": 2931
+ },
+ {
+ "epoch": 16.754285714285714,
+ "grad_norm": 30.17223358154297,
+ "learning_rate": 3.693968253968254e-05,
+ "loss": 0.3755,
+ "step": 2932
+ },
+ {
+ "epoch": 16.76,
+ "grad_norm": 49.654518127441406,
+ "learning_rate": 3.6933333333333334e-05,
+ "loss": 0.5506,
+ "step": 2933
+ },
+ {
+ "epoch": 16.765714285714285,
+ "grad_norm": 82.34229278564453,
+ "learning_rate": 3.692698412698413e-05,
+ "loss": 0.3268,
+ "step": 2934
+ },
+ {
+ "epoch": 16.771428571428572,
+ "grad_norm": 23.01658058166504,
+ "learning_rate": 3.692063492063492e-05,
+ "loss": 0.3225,
+ "step": 2935
+ },
+ {
+ "epoch": 16.777142857142856,
+ "grad_norm": 60.740543365478516,
+ "learning_rate": 3.691428571428572e-05,
+ "loss": 0.3819,
+ "step": 2936
+ },
+ {
+ "epoch": 16.782857142857143,
+ "grad_norm": 28.621213912963867,
+ "learning_rate": 3.6907936507936505e-05,
+ "loss": 0.3418,
+ "step": 2937
+ },
+ {
+ "epoch": 16.78857142857143,
+ "grad_norm": 31.923004150390625,
+ "learning_rate": 3.6901587301587304e-05,
+ "loss": 0.4115,
+ "step": 2938
+ },
+ {
+ "epoch": 16.794285714285714,
+ "grad_norm": 33.66228103637695,
+ "learning_rate": 3.68952380952381e-05,
+ "loss": 0.4161,
+ "step": 2939
+ },
+ {
+ "epoch": 16.8,
+ "grad_norm": 142.54344177246094,
+ "learning_rate": 3.688888888888889e-05,
+ "loss": 0.4069,
+ "step": 2940
+ },
+ {
+ "epoch": 16.805714285714284,
+ "grad_norm": 241.73483276367188,
+ "learning_rate": 3.688253968253968e-05,
+ "loss": 0.456,
+ "step": 2941
+ },
+ {
+ "epoch": 16.81142857142857,
+ "grad_norm": 37.379966735839844,
+ "learning_rate": 3.6876190476190475e-05,
+ "loss": 0.4624,
+ "step": 2942
+ },
+ {
+ "epoch": 16.81714285714286,
+ "grad_norm": 56.85732650756836,
+ "learning_rate": 3.6869841269841274e-05,
+ "loss": 0.3562,
+ "step": 2943
+ },
+ {
+ "epoch": 16.822857142857142,
+ "grad_norm": 82.3569107055664,
+ "learning_rate": 3.686349206349207e-05,
+ "loss": 0.3482,
+ "step": 2944
+ },
+ {
+ "epoch": 16.82857142857143,
+ "grad_norm": 79.0669174194336,
+ "learning_rate": 3.685714285714286e-05,
+ "loss": 0.3587,
+ "step": 2945
+ },
+ {
+ "epoch": 16.834285714285713,
+ "grad_norm": 35.16192626953125,
+ "learning_rate": 3.685079365079365e-05,
+ "loss": 0.2756,
+ "step": 2946
+ },
+ {
+ "epoch": 16.84,
+ "grad_norm": 33.91846466064453,
+ "learning_rate": 3.6844444444444445e-05,
+ "loss": 0.3415,
+ "step": 2947
+ },
+ {
+ "epoch": 16.845714285714287,
+ "grad_norm": 41.016937255859375,
+ "learning_rate": 3.683809523809524e-05,
+ "loss": 0.4056,
+ "step": 2948
+ },
+ {
+ "epoch": 16.85142857142857,
+ "grad_norm": 916.173828125,
+ "learning_rate": 3.683174603174604e-05,
+ "loss": 0.2955,
+ "step": 2949
+ },
+ {
+ "epoch": 16.857142857142858,
+ "grad_norm": 78.90193939208984,
+ "learning_rate": 3.682539682539683e-05,
+ "loss": 0.7352,
+ "step": 2950
+ },
+ {
+ "epoch": 16.86285714285714,
+ "grad_norm": 264.2978515625,
+ "learning_rate": 3.681904761904762e-05,
+ "loss": 0.4367,
+ "step": 2951
+ },
+ {
+ "epoch": 16.86857142857143,
+ "grad_norm": 37.197513580322266,
+ "learning_rate": 3.6812698412698415e-05,
+ "loss": 0.3406,
+ "step": 2952
+ },
+ {
+ "epoch": 16.874285714285715,
+ "grad_norm": 70.45872497558594,
+ "learning_rate": 3.680634920634921e-05,
+ "loss": 0.338,
+ "step": 2953
+ },
+ {
+ "epoch": 16.88,
+ "grad_norm": 21.459962844848633,
+ "learning_rate": 3.68e-05,
+ "loss": 0.5137,
+ "step": 2954
+ },
+ {
+ "epoch": 16.885714285714286,
+ "grad_norm": 96.63094329833984,
+ "learning_rate": 3.67936507936508e-05,
+ "loss": 0.4871,
+ "step": 2955
+ },
+ {
+ "epoch": 16.89142857142857,
+ "grad_norm": 36.50151062011719,
+ "learning_rate": 3.6787301587301585e-05,
+ "loss": 0.4339,
+ "step": 2956
+ },
+ {
+ "epoch": 16.897142857142857,
+ "grad_norm": 28.966182708740234,
+ "learning_rate": 3.6780952380952385e-05,
+ "loss": 0.454,
+ "step": 2957
+ },
+ {
+ "epoch": 16.902857142857144,
+ "grad_norm": 37.07241439819336,
+ "learning_rate": 3.677460317460318e-05,
+ "loss": 0.428,
+ "step": 2958
+ },
+ {
+ "epoch": 16.908571428571427,
+ "grad_norm": 297.666015625,
+ "learning_rate": 3.676825396825397e-05,
+ "loss": 0.52,
+ "step": 2959
+ },
+ {
+ "epoch": 16.914285714285715,
+ "grad_norm": 71.79993438720703,
+ "learning_rate": 3.676190476190476e-05,
+ "loss": 0.3673,
+ "step": 2960
+ },
+ {
+ "epoch": 16.92,
+ "grad_norm": 86.70881652832031,
+ "learning_rate": 3.675555555555556e-05,
+ "loss": 0.4297,
+ "step": 2961
+ },
+ {
+ "epoch": 16.925714285714285,
+ "grad_norm": 72.41195678710938,
+ "learning_rate": 3.674920634920635e-05,
+ "loss": 0.4856,
+ "step": 2962
+ },
+ {
+ "epoch": 16.931428571428572,
+ "grad_norm": 50.68089294433594,
+ "learning_rate": 3.674285714285715e-05,
+ "loss": 0.4572,
+ "step": 2963
+ },
+ {
+ "epoch": 16.937142857142856,
+ "grad_norm": 54.560829162597656,
+ "learning_rate": 3.673650793650793e-05,
+ "loss": 0.3642,
+ "step": 2964
+ },
+ {
+ "epoch": 16.942857142857143,
+ "grad_norm": 73.60107421875,
+ "learning_rate": 3.673015873015873e-05,
+ "loss": 0.3029,
+ "step": 2965
+ },
+ {
+ "epoch": 16.94857142857143,
+ "grad_norm": 67.49030303955078,
+ "learning_rate": 3.6723809523809525e-05,
+ "loss": 0.2984,
+ "step": 2966
+ },
+ {
+ "epoch": 16.954285714285714,
+ "grad_norm": 69.36986541748047,
+ "learning_rate": 3.671746031746032e-05,
+ "loss": 0.6765,
+ "step": 2967
+ },
+ {
+ "epoch": 16.96,
+ "grad_norm": 61.46893310546875,
+ "learning_rate": 3.671111111111111e-05,
+ "loss": 0.4871,
+ "step": 2968
+ },
+ {
+ "epoch": 16.965714285714284,
+ "grad_norm": 67.94396209716797,
+ "learning_rate": 3.670476190476191e-05,
+ "loss": 0.4181,
+ "step": 2969
+ },
+ {
+ "epoch": 16.97142857142857,
+ "grad_norm": 54.474754333496094,
+ "learning_rate": 3.6698412698412696e-05,
+ "loss": 0.4505,
+ "step": 2970
+ },
+ {
+ "epoch": 16.97714285714286,
+ "grad_norm": 29.335342407226562,
+ "learning_rate": 3.6692063492063495e-05,
+ "loss": 0.4793,
+ "step": 2971
+ },
+ {
+ "epoch": 16.982857142857142,
+ "grad_norm": 64.14628601074219,
+ "learning_rate": 3.668571428571429e-05,
+ "loss": 0.412,
+ "step": 2972
+ },
+ {
+ "epoch": 16.98857142857143,
+ "grad_norm": 90.38223266601562,
+ "learning_rate": 3.667936507936508e-05,
+ "loss": 0.3173,
+ "step": 2973
+ },
+ {
+ "epoch": 16.994285714285713,
+ "grad_norm": 61.129180908203125,
+ "learning_rate": 3.667301587301587e-05,
+ "loss": 0.2842,
+ "step": 2974
+ },
+ {
+ "epoch": 17.0,
+ "grad_norm": 238.58200073242188,
+ "learning_rate": 3.6666666666666666e-05,
+ "loss": 0.5036,
+ "step": 2975
+ },
+ {
+ "epoch": 17.0,
+ "eval_classes": 0,
+ "eval_loss": 0.624391496181488,
+ "eval_map": 0.917,
+ "eval_map_50": 0.9557,
+ "eval_map_75": 0.9461,
+ "eval_map_large": 0.9173,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.917,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7778,
+ "eval_mar_10": 0.9679,
+ "eval_mar_100": 0.9781,
+ "eval_mar_100_per_class": 0.9781,
+ "eval_mar_large": 0.9781,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 15.4052,
+ "eval_samples_per_second": 19.084,
+ "eval_steps_per_second": 2.402,
+ "step": 2975
+ },
+ {
+ "epoch": 17.005714285714287,
+ "grad_norm": 39.56782531738281,
+ "learning_rate": 3.666031746031746e-05,
+ "loss": 0.5377,
+ "step": 2976
+ },
+ {
+ "epoch": 17.01142857142857,
+ "grad_norm": 73.38939666748047,
+ "learning_rate": 3.665396825396826e-05,
+ "loss": 0.3228,
+ "step": 2977
+ },
+ {
+ "epoch": 17.017142857142858,
+ "grad_norm": 69.7122802734375,
+ "learning_rate": 3.664761904761905e-05,
+ "loss": 0.2551,
+ "step": 2978
+ },
+ {
+ "epoch": 17.02285714285714,
+ "grad_norm": 40.25721740722656,
+ "learning_rate": 3.664126984126984e-05,
+ "loss": 0.452,
+ "step": 2979
+ },
+ {
+ "epoch": 17.02857142857143,
+ "grad_norm": 101.58560943603516,
+ "learning_rate": 3.6634920634920636e-05,
+ "loss": 0.5753,
+ "step": 2980
+ },
+ {
+ "epoch": 17.034285714285716,
+ "grad_norm": 51.15162658691406,
+ "learning_rate": 3.662857142857143e-05,
+ "loss": 0.4272,
+ "step": 2981
+ },
+ {
+ "epoch": 17.04,
+ "grad_norm": 44.9673957824707,
+ "learning_rate": 3.662222222222223e-05,
+ "loss": 0.3446,
+ "step": 2982
+ },
+ {
+ "epoch": 17.045714285714286,
+ "grad_norm": 63.32594299316406,
+ "learning_rate": 3.661587301587302e-05,
+ "loss": 0.2733,
+ "step": 2983
+ },
+ {
+ "epoch": 17.05142857142857,
+ "grad_norm": 79.81805419921875,
+ "learning_rate": 3.660952380952381e-05,
+ "loss": 0.3849,
+ "step": 2984
+ },
+ {
+ "epoch": 17.057142857142857,
+ "grad_norm": 109.49251556396484,
+ "learning_rate": 3.6603174603174606e-05,
+ "loss": 0.6633,
+ "step": 2985
+ },
+ {
+ "epoch": 17.062857142857144,
+ "grad_norm": 55.362274169921875,
+ "learning_rate": 3.65968253968254e-05,
+ "loss": 0.2912,
+ "step": 2986
+ },
+ {
+ "epoch": 17.068571428571428,
+ "grad_norm": 28.252758026123047,
+ "learning_rate": 3.659047619047619e-05,
+ "loss": 0.346,
+ "step": 2987
+ },
+ {
+ "epoch": 17.074285714285715,
+ "grad_norm": 34.26996994018555,
+ "learning_rate": 3.658412698412699e-05,
+ "loss": 0.476,
+ "step": 2988
+ },
+ {
+ "epoch": 17.08,
+ "grad_norm": 28.678089141845703,
+ "learning_rate": 3.6577777777777776e-05,
+ "loss": 0.381,
+ "step": 2989
+ },
+ {
+ "epoch": 17.085714285714285,
+ "grad_norm": 46.957889556884766,
+ "learning_rate": 3.6571428571428576e-05,
+ "loss": 0.3678,
+ "step": 2990
+ },
+ {
+ "epoch": 17.091428571428573,
+ "grad_norm": 25.080982208251953,
+ "learning_rate": 3.656507936507937e-05,
+ "loss": 0.4421,
+ "step": 2991
+ },
+ {
+ "epoch": 17.097142857142856,
+ "grad_norm": 60.31837844848633,
+ "learning_rate": 3.655873015873016e-05,
+ "loss": 0.3178,
+ "step": 2992
+ },
+ {
+ "epoch": 17.102857142857143,
+ "grad_norm": 39.1340446472168,
+ "learning_rate": 3.6552380952380953e-05,
+ "loss": 0.5619,
+ "step": 2993
+ },
+ {
+ "epoch": 17.10857142857143,
+ "grad_norm": 106.81813049316406,
+ "learning_rate": 3.654603174603175e-05,
+ "loss": 0.419,
+ "step": 2994
+ },
+ {
+ "epoch": 17.114285714285714,
+ "grad_norm": 25.078277587890625,
+ "learning_rate": 3.653968253968254e-05,
+ "loss": 0.37,
+ "step": 2995
+ },
+ {
+ "epoch": 17.12,
+ "grad_norm": 26.536161422729492,
+ "learning_rate": 3.653333333333334e-05,
+ "loss": 0.4933,
+ "step": 2996
+ },
+ {
+ "epoch": 17.125714285714285,
+ "grad_norm": 32.90211868286133,
+ "learning_rate": 3.6526984126984124e-05,
+ "loss": 0.4095,
+ "step": 2997
+ },
+ {
+ "epoch": 17.13142857142857,
+ "grad_norm": 39.533451080322266,
+ "learning_rate": 3.6520634920634923e-05,
+ "loss": 0.4326,
+ "step": 2998
+ },
+ {
+ "epoch": 17.13714285714286,
+ "grad_norm": 23.58121109008789,
+ "learning_rate": 3.6514285714285716e-05,
+ "loss": 0.3752,
+ "step": 2999
+ },
+ {
+ "epoch": 17.142857142857142,
+ "grad_norm": 50.7238883972168,
+ "learning_rate": 3.650793650793651e-05,
+ "loss": 0.3307,
+ "step": 3000
+ },
+ {
+ "epoch": 17.14857142857143,
+ "grad_norm": 34.07426452636719,
+ "learning_rate": 3.65015873015873e-05,
+ "loss": 0.3281,
+ "step": 3001
+ },
+ {
+ "epoch": 17.154285714285713,
+ "grad_norm": 35.5304069519043,
+ "learning_rate": 3.64952380952381e-05,
+ "loss": 0.3887,
+ "step": 3002
+ },
+ {
+ "epoch": 17.16,
+ "grad_norm": 88.01673126220703,
+ "learning_rate": 3.648888888888889e-05,
+ "loss": 0.4569,
+ "step": 3003
+ },
+ {
+ "epoch": 17.165714285714287,
+ "grad_norm": 63.651283264160156,
+ "learning_rate": 3.6482539682539686e-05,
+ "loss": 0.4374,
+ "step": 3004
+ },
+ {
+ "epoch": 17.17142857142857,
+ "grad_norm": 25.01502799987793,
+ "learning_rate": 3.647619047619048e-05,
+ "loss": 0.6202,
+ "step": 3005
+ },
+ {
+ "epoch": 17.177142857142858,
+ "grad_norm": 75.9010009765625,
+ "learning_rate": 3.646984126984127e-05,
+ "loss": 0.5473,
+ "step": 3006
+ },
+ {
+ "epoch": 17.18285714285714,
+ "grad_norm": 276.6267395019531,
+ "learning_rate": 3.6463492063492064e-05,
+ "loss": 0.3612,
+ "step": 3007
+ },
+ {
+ "epoch": 17.18857142857143,
+ "grad_norm": 23.316967010498047,
+ "learning_rate": 3.6457142857142857e-05,
+ "loss": 0.3967,
+ "step": 3008
+ },
+ {
+ "epoch": 17.194285714285716,
+ "grad_norm": 81.98340606689453,
+ "learning_rate": 3.645079365079365e-05,
+ "loss": 0.4192,
+ "step": 3009
+ },
+ {
+ "epoch": 17.2,
+ "grad_norm": 33.65291976928711,
+ "learning_rate": 3.644444444444445e-05,
+ "loss": 0.3537,
+ "step": 3010
+ },
+ {
+ "epoch": 17.205714285714286,
+ "grad_norm": 35.97007751464844,
+ "learning_rate": 3.6438095238095234e-05,
+ "loss": 0.5064,
+ "step": 3011
+ },
+ {
+ "epoch": 17.21142857142857,
+ "grad_norm": 56.14579391479492,
+ "learning_rate": 3.6431746031746034e-05,
+ "loss": 0.3929,
+ "step": 3012
+ },
+ {
+ "epoch": 17.217142857142857,
+ "grad_norm": 278.3437194824219,
+ "learning_rate": 3.6425396825396827e-05,
+ "loss": 0.6538,
+ "step": 3013
+ },
+ {
+ "epoch": 17.222857142857144,
+ "grad_norm": 61.36874771118164,
+ "learning_rate": 3.641904761904762e-05,
+ "loss": 0.4674,
+ "step": 3014
+ },
+ {
+ "epoch": 17.228571428571428,
+ "grad_norm": 97.59931182861328,
+ "learning_rate": 3.641269841269841e-05,
+ "loss": 0.4182,
+ "step": 3015
+ },
+ {
+ "epoch": 17.234285714285715,
+ "grad_norm": 35.03517532348633,
+ "learning_rate": 3.640634920634921e-05,
+ "loss": 0.3226,
+ "step": 3016
+ },
+ {
+ "epoch": 17.24,
+ "grad_norm": 21.447011947631836,
+ "learning_rate": 3.6400000000000004e-05,
+ "loss": 0.411,
+ "step": 3017
+ },
+ {
+ "epoch": 17.245714285714286,
+ "grad_norm": 66.64205169677734,
+ "learning_rate": 3.6393650793650796e-05,
+ "loss": 0.3091,
+ "step": 3018
+ },
+ {
+ "epoch": 17.251428571428573,
+ "grad_norm": 53.64439392089844,
+ "learning_rate": 3.638730158730159e-05,
+ "loss": 0.591,
+ "step": 3019
+ },
+ {
+ "epoch": 17.257142857142856,
+ "grad_norm": 48.212242126464844,
+ "learning_rate": 3.638095238095238e-05,
+ "loss": 0.3899,
+ "step": 3020
+ },
+ {
+ "epoch": 17.262857142857143,
+ "grad_norm": 30.058061599731445,
+ "learning_rate": 3.637460317460318e-05,
+ "loss": 0.242,
+ "step": 3021
+ },
+ {
+ "epoch": 17.268571428571427,
+ "grad_norm": 31.290916442871094,
+ "learning_rate": 3.636825396825397e-05,
+ "loss": 0.3487,
+ "step": 3022
+ },
+ {
+ "epoch": 17.274285714285714,
+ "grad_norm": 67.60273742675781,
+ "learning_rate": 3.6361904761904766e-05,
+ "loss": 0.3153,
+ "step": 3023
+ },
+ {
+ "epoch": 17.28,
+ "grad_norm": 43.177005767822266,
+ "learning_rate": 3.635555555555556e-05,
+ "loss": 0.2997,
+ "step": 3024
+ },
+ {
+ "epoch": 17.285714285714285,
+ "grad_norm": 66.38561248779297,
+ "learning_rate": 3.634920634920635e-05,
+ "loss": 0.2976,
+ "step": 3025
+ },
+ {
+ "epoch": 17.291428571428572,
+ "grad_norm": 34.7242317199707,
+ "learning_rate": 3.6342857142857144e-05,
+ "loss": 0.2728,
+ "step": 3026
+ },
+ {
+ "epoch": 17.29714285714286,
+ "grad_norm": 22.105117797851562,
+ "learning_rate": 3.6336507936507944e-05,
+ "loss": 0.4381,
+ "step": 3027
+ },
+ {
+ "epoch": 17.302857142857142,
+ "grad_norm": 49.966773986816406,
+ "learning_rate": 3.633015873015873e-05,
+ "loss": 0.3749,
+ "step": 3028
+ },
+ {
+ "epoch": 17.30857142857143,
+ "grad_norm": 64.73253631591797,
+ "learning_rate": 3.632380952380953e-05,
+ "loss": 0.5557,
+ "step": 3029
+ },
+ {
+ "epoch": 17.314285714285713,
+ "grad_norm": 35.297607421875,
+ "learning_rate": 3.6317460317460315e-05,
+ "loss": 0.4947,
+ "step": 3030
+ },
+ {
+ "epoch": 17.32,
+ "grad_norm": 30.194656372070312,
+ "learning_rate": 3.6311111111111114e-05,
+ "loss": 0.3203,
+ "step": 3031
+ },
+ {
+ "epoch": 17.325714285714287,
+ "grad_norm": 125.94969177246094,
+ "learning_rate": 3.630476190476191e-05,
+ "loss": 0.6281,
+ "step": 3032
+ },
+ {
+ "epoch": 17.33142857142857,
+ "grad_norm": 671.4624633789062,
+ "learning_rate": 3.62984126984127e-05,
+ "loss": 0.2862,
+ "step": 3033
+ },
+ {
+ "epoch": 17.337142857142858,
+ "grad_norm": 33.021636962890625,
+ "learning_rate": 3.629206349206349e-05,
+ "loss": 0.3952,
+ "step": 3034
+ },
+ {
+ "epoch": 17.34285714285714,
+ "grad_norm": 56.70850372314453,
+ "learning_rate": 3.628571428571429e-05,
+ "loss": 0.4269,
+ "step": 3035
+ },
+ {
+ "epoch": 17.34857142857143,
+ "grad_norm": 35.1391487121582,
+ "learning_rate": 3.627936507936508e-05,
+ "loss": 0.3951,
+ "step": 3036
+ },
+ {
+ "epoch": 17.354285714285716,
+ "grad_norm": 46.28184509277344,
+ "learning_rate": 3.627301587301588e-05,
+ "loss": 0.3088,
+ "step": 3037
+ },
+ {
+ "epoch": 17.36,
+ "grad_norm": 26.50900650024414,
+ "learning_rate": 3.626666666666667e-05,
+ "loss": 0.3955,
+ "step": 3038
+ },
+ {
+ "epoch": 17.365714285714287,
+ "grad_norm": 29.32590103149414,
+ "learning_rate": 3.626031746031746e-05,
+ "loss": 0.3719,
+ "step": 3039
+ },
+ {
+ "epoch": 17.37142857142857,
+ "grad_norm": 50.76521301269531,
+ "learning_rate": 3.6253968253968255e-05,
+ "loss": 0.4154,
+ "step": 3040
+ },
+ {
+ "epoch": 17.377142857142857,
+ "grad_norm": 64.29402160644531,
+ "learning_rate": 3.624761904761905e-05,
+ "loss": 0.3531,
+ "step": 3041
+ },
+ {
+ "epoch": 17.382857142857144,
+ "grad_norm": 60.52572250366211,
+ "learning_rate": 3.624126984126984e-05,
+ "loss": 0.4497,
+ "step": 3042
+ },
+ {
+ "epoch": 17.388571428571428,
+ "grad_norm": 24.278873443603516,
+ "learning_rate": 3.623492063492064e-05,
+ "loss": 0.3828,
+ "step": 3043
+ },
+ {
+ "epoch": 17.394285714285715,
+ "grad_norm": 70.13561248779297,
+ "learning_rate": 3.6228571428571425e-05,
+ "loss": 0.3123,
+ "step": 3044
+ },
+ {
+ "epoch": 17.4,
+ "grad_norm": 63.94584274291992,
+ "learning_rate": 3.6222222222222225e-05,
+ "loss": 0.4073,
+ "step": 3045
+ },
+ {
+ "epoch": 17.405714285714286,
+ "grad_norm": 15.398472785949707,
+ "learning_rate": 3.621587301587302e-05,
+ "loss": 0.2719,
+ "step": 3046
+ },
+ {
+ "epoch": 17.411428571428573,
+ "grad_norm": 62.43785858154297,
+ "learning_rate": 3.620952380952381e-05,
+ "loss": 0.309,
+ "step": 3047
+ },
+ {
+ "epoch": 17.417142857142856,
+ "grad_norm": 69.08655548095703,
+ "learning_rate": 3.62031746031746e-05,
+ "loss": 0.2742,
+ "step": 3048
+ },
+ {
+ "epoch": 17.422857142857143,
+ "grad_norm": 45.39681625366211,
+ "learning_rate": 3.61968253968254e-05,
+ "loss": 0.3325,
+ "step": 3049
+ },
+ {
+ "epoch": 17.428571428571427,
+ "grad_norm": 39.39765930175781,
+ "learning_rate": 3.619047619047619e-05,
+ "loss": 0.5171,
+ "step": 3050
+ },
+ {
+ "epoch": 17.434285714285714,
+ "grad_norm": 92.37077331542969,
+ "learning_rate": 3.618412698412699e-05,
+ "loss": 0.5371,
+ "step": 3051
+ },
+ {
+ "epoch": 17.44,
+ "grad_norm": 32.013282775878906,
+ "learning_rate": 3.617777777777778e-05,
+ "loss": 0.3515,
+ "step": 3052
+ },
+ {
+ "epoch": 17.445714285714285,
+ "grad_norm": 71.93909454345703,
+ "learning_rate": 3.617142857142857e-05,
+ "loss": 0.3569,
+ "step": 3053
+ },
+ {
+ "epoch": 17.451428571428572,
+ "grad_norm": 30.76221466064453,
+ "learning_rate": 3.6165079365079365e-05,
+ "loss": 0.4061,
+ "step": 3054
+ },
+ {
+ "epoch": 17.457142857142856,
+ "grad_norm": 48.4053955078125,
+ "learning_rate": 3.615873015873016e-05,
+ "loss": 0.3483,
+ "step": 3055
+ },
+ {
+ "epoch": 17.462857142857143,
+ "grad_norm": 77.21479797363281,
+ "learning_rate": 3.615238095238096e-05,
+ "loss": 0.2637,
+ "step": 3056
+ },
+ {
+ "epoch": 17.46857142857143,
+ "grad_norm": 82.1701889038086,
+ "learning_rate": 3.614603174603175e-05,
+ "loss": 0.2644,
+ "step": 3057
+ },
+ {
+ "epoch": 17.474285714285713,
+ "grad_norm": 29.19261360168457,
+ "learning_rate": 3.613968253968254e-05,
+ "loss": 0.3375,
+ "step": 3058
+ },
+ {
+ "epoch": 17.48,
+ "grad_norm": 44.2556037902832,
+ "learning_rate": 3.6133333333333335e-05,
+ "loss": 0.3602,
+ "step": 3059
+ },
+ {
+ "epoch": 17.485714285714284,
+ "grad_norm": 47.6033935546875,
+ "learning_rate": 3.6126984126984135e-05,
+ "loss": 0.2414,
+ "step": 3060
+ },
+ {
+ "epoch": 17.49142857142857,
+ "grad_norm": 87.08003997802734,
+ "learning_rate": 3.612063492063492e-05,
+ "loss": 0.5281,
+ "step": 3061
+ },
+ {
+ "epoch": 17.497142857142858,
+ "grad_norm": 33.60626983642578,
+ "learning_rate": 3.611428571428572e-05,
+ "loss": 0.452,
+ "step": 3062
+ },
+ {
+ "epoch": 17.502857142857142,
+ "grad_norm": 40.42241287231445,
+ "learning_rate": 3.6107936507936506e-05,
+ "loss": 0.3873,
+ "step": 3063
+ },
+ {
+ "epoch": 17.50857142857143,
+ "grad_norm": 61.18794250488281,
+ "learning_rate": 3.6101587301587305e-05,
+ "loss": 0.3682,
+ "step": 3064
+ },
+ {
+ "epoch": 17.514285714285712,
+ "grad_norm": 39.720245361328125,
+ "learning_rate": 3.60952380952381e-05,
+ "loss": 0.3718,
+ "step": 3065
+ },
+ {
+ "epoch": 17.52,
+ "grad_norm": 39.689727783203125,
+ "learning_rate": 3.608888888888889e-05,
+ "loss": 0.5894,
+ "step": 3066
+ },
+ {
+ "epoch": 17.525714285714287,
+ "grad_norm": 39.23014831542969,
+ "learning_rate": 3.608253968253968e-05,
+ "loss": 0.4398,
+ "step": 3067
+ },
+ {
+ "epoch": 17.53142857142857,
+ "grad_norm": 40.88357162475586,
+ "learning_rate": 3.607619047619048e-05,
+ "loss": 0.3682,
+ "step": 3068
+ },
+ {
+ "epoch": 17.537142857142857,
+ "grad_norm": 73.21632385253906,
+ "learning_rate": 3.606984126984127e-05,
+ "loss": 0.2962,
+ "step": 3069
+ },
+ {
+ "epoch": 17.542857142857144,
+ "grad_norm": 42.4008674621582,
+ "learning_rate": 3.606349206349207e-05,
+ "loss": 0.4491,
+ "step": 3070
+ },
+ {
+ "epoch": 17.548571428571428,
+ "grad_norm": 92.43096923828125,
+ "learning_rate": 3.605714285714286e-05,
+ "loss": 0.34,
+ "step": 3071
+ },
+ {
+ "epoch": 17.554285714285715,
+ "grad_norm": 30.038530349731445,
+ "learning_rate": 3.605079365079365e-05,
+ "loss": 0.4729,
+ "step": 3072
+ },
+ {
+ "epoch": 17.56,
+ "grad_norm": 28.09479522705078,
+ "learning_rate": 3.6044444444444446e-05,
+ "loss": 0.3499,
+ "step": 3073
+ },
+ {
+ "epoch": 17.565714285714286,
+ "grad_norm": 115.12698364257812,
+ "learning_rate": 3.603809523809524e-05,
+ "loss": 0.3443,
+ "step": 3074
+ },
+ {
+ "epoch": 17.571428571428573,
+ "grad_norm": 40.825111389160156,
+ "learning_rate": 3.603174603174603e-05,
+ "loss": 0.6257,
+ "step": 3075
+ },
+ {
+ "epoch": 17.577142857142857,
+ "grad_norm": 28.286985397338867,
+ "learning_rate": 3.602539682539683e-05,
+ "loss": 0.484,
+ "step": 3076
+ },
+ {
+ "epoch": 17.582857142857144,
+ "grad_norm": 38.43437957763672,
+ "learning_rate": 3.6019047619047616e-05,
+ "loss": 0.2579,
+ "step": 3077
+ },
+ {
+ "epoch": 17.588571428571427,
+ "grad_norm": 123.68315887451172,
+ "learning_rate": 3.6012698412698416e-05,
+ "loss": 0.3633,
+ "step": 3078
+ },
+ {
+ "epoch": 17.594285714285714,
+ "grad_norm": 47.84682083129883,
+ "learning_rate": 3.600634920634921e-05,
+ "loss": 0.5472,
+ "step": 3079
+ },
+ {
+ "epoch": 17.6,
+ "grad_norm": 36.36436462402344,
+ "learning_rate": 3.6e-05,
+ "loss": 0.3362,
+ "step": 3080
+ },
+ {
+ "epoch": 17.605714285714285,
+ "grad_norm": 56.942665100097656,
+ "learning_rate": 3.5993650793650794e-05,
+ "loss": 0.5156,
+ "step": 3081
+ },
+ {
+ "epoch": 17.611428571428572,
+ "grad_norm": 396.4652099609375,
+ "learning_rate": 3.598730158730159e-05,
+ "loss": 0.4402,
+ "step": 3082
+ },
+ {
+ "epoch": 17.617142857142856,
+ "grad_norm": 25.519779205322266,
+ "learning_rate": 3.598095238095238e-05,
+ "loss": 0.3491,
+ "step": 3083
+ },
+ {
+ "epoch": 17.622857142857143,
+ "grad_norm": 29.500160217285156,
+ "learning_rate": 3.597460317460318e-05,
+ "loss": 0.4003,
+ "step": 3084
+ },
+ {
+ "epoch": 17.62857142857143,
+ "grad_norm": 65.59475708007812,
+ "learning_rate": 3.596825396825397e-05,
+ "loss": 0.4087,
+ "step": 3085
+ },
+ {
+ "epoch": 17.634285714285713,
+ "grad_norm": 90.88926696777344,
+ "learning_rate": 3.5961904761904764e-05,
+ "loss": 0.3308,
+ "step": 3086
+ },
+ {
+ "epoch": 17.64,
+ "grad_norm": 31.910295486450195,
+ "learning_rate": 3.5955555555555556e-05,
+ "loss": 0.3877,
+ "step": 3087
+ },
+ {
+ "epoch": 17.645714285714284,
+ "grad_norm": 40.765869140625,
+ "learning_rate": 3.594920634920635e-05,
+ "loss": 0.396,
+ "step": 3088
+ },
+ {
+ "epoch": 17.65142857142857,
+ "grad_norm": 22.41646385192871,
+ "learning_rate": 3.594285714285714e-05,
+ "loss": 0.3837,
+ "step": 3089
+ },
+ {
+ "epoch": 17.65714285714286,
+ "grad_norm": 77.16261291503906,
+ "learning_rate": 3.593650793650794e-05,
+ "loss": 0.3443,
+ "step": 3090
+ },
+ {
+ "epoch": 17.662857142857142,
+ "grad_norm": 33.615787506103516,
+ "learning_rate": 3.5930158730158733e-05,
+ "loss": 0.2343,
+ "step": 3091
+ },
+ {
+ "epoch": 17.66857142857143,
+ "grad_norm": 26.944686889648438,
+ "learning_rate": 3.5923809523809526e-05,
+ "loss": 0.429,
+ "step": 3092
+ },
+ {
+ "epoch": 17.674285714285713,
+ "grad_norm": 90.06888580322266,
+ "learning_rate": 3.591746031746032e-05,
+ "loss": 0.4934,
+ "step": 3093
+ },
+ {
+ "epoch": 17.68,
+ "grad_norm": 46.003074645996094,
+ "learning_rate": 3.591111111111111e-05,
+ "loss": 0.525,
+ "step": 3094
+ },
+ {
+ "epoch": 17.685714285714287,
+ "grad_norm": 67.316162109375,
+ "learning_rate": 3.590476190476191e-05,
+ "loss": 0.3631,
+ "step": 3095
+ },
+ {
+ "epoch": 17.69142857142857,
+ "grad_norm": 50.84532165527344,
+ "learning_rate": 3.58984126984127e-05,
+ "loss": 0.3075,
+ "step": 3096
+ },
+ {
+ "epoch": 17.697142857142858,
+ "grad_norm": 50.38624572753906,
+ "learning_rate": 3.5892063492063496e-05,
+ "loss": 0.4122,
+ "step": 3097
+ },
+ {
+ "epoch": 17.70285714285714,
+ "grad_norm": 49.8840217590332,
+ "learning_rate": 3.588571428571429e-05,
+ "loss": 0.3559,
+ "step": 3098
+ },
+ {
+ "epoch": 17.708571428571428,
+ "grad_norm": 48.15559005737305,
+ "learning_rate": 3.587936507936508e-05,
+ "loss": 0.5752,
+ "step": 3099
+ },
+ {
+ "epoch": 17.714285714285715,
+ "grad_norm": 62.33614730834961,
+ "learning_rate": 3.5873015873015874e-05,
+ "loss": 0.3423,
+ "step": 3100
+ },
+ {
+ "epoch": 17.72,
+ "grad_norm": 35.41064453125,
+ "learning_rate": 3.586666666666667e-05,
+ "loss": 0.3472,
+ "step": 3101
+ },
+ {
+ "epoch": 17.725714285714286,
+ "grad_norm": 53.6688346862793,
+ "learning_rate": 3.586031746031746e-05,
+ "loss": 0.4619,
+ "step": 3102
+ },
+ {
+ "epoch": 17.731428571428573,
+ "grad_norm": 36.99428939819336,
+ "learning_rate": 3.585396825396826e-05,
+ "loss": 0.3232,
+ "step": 3103
+ },
+ {
+ "epoch": 17.737142857142857,
+ "grad_norm": 377.96697998046875,
+ "learning_rate": 3.584761904761905e-05,
+ "loss": 0.4811,
+ "step": 3104
+ },
+ {
+ "epoch": 17.742857142857144,
+ "grad_norm": 34.98124694824219,
+ "learning_rate": 3.5841269841269844e-05,
+ "loss": 0.5067,
+ "step": 3105
+ },
+ {
+ "epoch": 17.748571428571427,
+ "grad_norm": 53.51387405395508,
+ "learning_rate": 3.5834920634920637e-05,
+ "loss": 0.4038,
+ "step": 3106
+ },
+ {
+ "epoch": 17.754285714285714,
+ "grad_norm": 63.63230514526367,
+ "learning_rate": 3.582857142857143e-05,
+ "loss": 0.3999,
+ "step": 3107
+ },
+ {
+ "epoch": 17.76,
+ "grad_norm": 85.73343658447266,
+ "learning_rate": 3.582222222222222e-05,
+ "loss": 0.3618,
+ "step": 3108
+ },
+ {
+ "epoch": 17.765714285714285,
+ "grad_norm": 45.70502853393555,
+ "learning_rate": 3.581587301587302e-05,
+ "loss": 0.2773,
+ "step": 3109
+ },
+ {
+ "epoch": 17.771428571428572,
+ "grad_norm": 46.77682113647461,
+ "learning_rate": 3.580952380952381e-05,
+ "loss": 0.4154,
+ "step": 3110
+ },
+ {
+ "epoch": 17.777142857142856,
+ "grad_norm": 87.49949645996094,
+ "learning_rate": 3.5803174603174607e-05,
+ "loss": 0.4066,
+ "step": 3111
+ },
+ {
+ "epoch": 17.782857142857143,
+ "grad_norm": 44.131649017333984,
+ "learning_rate": 3.57968253968254e-05,
+ "loss": 0.3703,
+ "step": 3112
+ },
+ {
+ "epoch": 17.78857142857143,
+ "grad_norm": 430.80548095703125,
+ "learning_rate": 3.579047619047619e-05,
+ "loss": 0.4413,
+ "step": 3113
+ },
+ {
+ "epoch": 17.794285714285714,
+ "grad_norm": 68.55734252929688,
+ "learning_rate": 3.5784126984126984e-05,
+ "loss": 0.3507,
+ "step": 3114
+ },
+ {
+ "epoch": 17.8,
+ "grad_norm": 55.162235260009766,
+ "learning_rate": 3.577777777777778e-05,
+ "loss": 0.3646,
+ "step": 3115
+ },
+ {
+ "epoch": 17.805714285714284,
+ "grad_norm": 76.46490478515625,
+ "learning_rate": 3.577142857142857e-05,
+ "loss": 0.4046,
+ "step": 3116
+ },
+ {
+ "epoch": 17.81142857142857,
+ "grad_norm": 30.54099464416504,
+ "learning_rate": 3.576507936507937e-05,
+ "loss": 0.4883,
+ "step": 3117
+ },
+ {
+ "epoch": 17.81714285714286,
+ "grad_norm": 94.63453674316406,
+ "learning_rate": 3.5758730158730155e-05,
+ "loss": 0.4853,
+ "step": 3118
+ },
+ {
+ "epoch": 17.822857142857142,
+ "grad_norm": 52.40849304199219,
+ "learning_rate": 3.5752380952380954e-05,
+ "loss": 0.3569,
+ "step": 3119
+ },
+ {
+ "epoch": 17.82857142857143,
+ "grad_norm": 77.03597259521484,
+ "learning_rate": 3.574603174603175e-05,
+ "loss": 0.3342,
+ "step": 3120
+ },
+ {
+ "epoch": 17.834285714285713,
+ "grad_norm": 31.358612060546875,
+ "learning_rate": 3.573968253968254e-05,
+ "loss": 0.3779,
+ "step": 3121
+ },
+ {
+ "epoch": 17.84,
+ "grad_norm": 82.40419006347656,
+ "learning_rate": 3.573333333333333e-05,
+ "loss": 0.364,
+ "step": 3122
+ },
+ {
+ "epoch": 17.845714285714287,
+ "grad_norm": 142.53746032714844,
+ "learning_rate": 3.572698412698413e-05,
+ "loss": 0.4036,
+ "step": 3123
+ },
+ {
+ "epoch": 17.85142857142857,
+ "grad_norm": 70.76148986816406,
+ "learning_rate": 3.5720634920634924e-05,
+ "loss": 0.3868,
+ "step": 3124
+ },
+ {
+ "epoch": 17.857142857142858,
+ "grad_norm": 38.553592681884766,
+ "learning_rate": 3.571428571428572e-05,
+ "loss": 0.4233,
+ "step": 3125
+ },
+ {
+ "epoch": 17.86285714285714,
+ "grad_norm": 95.4551773071289,
+ "learning_rate": 3.570793650793651e-05,
+ "loss": 0.336,
+ "step": 3126
+ },
+ {
+ "epoch": 17.86857142857143,
+ "grad_norm": 46.99102783203125,
+ "learning_rate": 3.57015873015873e-05,
+ "loss": 0.3251,
+ "step": 3127
+ },
+ {
+ "epoch": 17.874285714285715,
+ "grad_norm": 39.050758361816406,
+ "learning_rate": 3.5695238095238095e-05,
+ "loss": 0.405,
+ "step": 3128
+ },
+ {
+ "epoch": 17.88,
+ "grad_norm": 113.9549331665039,
+ "learning_rate": 3.568888888888889e-05,
+ "loss": 0.3941,
+ "step": 3129
+ },
+ {
+ "epoch": 17.885714285714286,
+ "grad_norm": 65.36485290527344,
+ "learning_rate": 3.568253968253969e-05,
+ "loss": 0.3726,
+ "step": 3130
+ },
+ {
+ "epoch": 17.89142857142857,
+ "grad_norm": 39.975582122802734,
+ "learning_rate": 3.567619047619048e-05,
+ "loss": 0.2399,
+ "step": 3131
+ },
+ {
+ "epoch": 17.897142857142857,
+ "grad_norm": 42.00217819213867,
+ "learning_rate": 3.566984126984127e-05,
+ "loss": 0.4181,
+ "step": 3132
+ },
+ {
+ "epoch": 17.902857142857144,
+ "grad_norm": 54.97571563720703,
+ "learning_rate": 3.5663492063492065e-05,
+ "loss": 0.5467,
+ "step": 3133
+ },
+ {
+ "epoch": 17.908571428571427,
+ "grad_norm": 67.74969482421875,
+ "learning_rate": 3.5657142857142864e-05,
+ "loss": 0.7542,
+ "step": 3134
+ },
+ {
+ "epoch": 17.914285714285715,
+ "grad_norm": 98.58827209472656,
+ "learning_rate": 3.565079365079365e-05,
+ "loss": 0.5048,
+ "step": 3135
+ },
+ {
+ "epoch": 17.92,
+ "grad_norm": 47.780155181884766,
+ "learning_rate": 3.564444444444445e-05,
+ "loss": 0.3109,
+ "step": 3136
+ },
+ {
+ "epoch": 17.925714285714285,
+ "grad_norm": 43.75344467163086,
+ "learning_rate": 3.5638095238095235e-05,
+ "loss": 0.3467,
+ "step": 3137
+ },
+ {
+ "epoch": 17.931428571428572,
+ "grad_norm": 66.09368896484375,
+ "learning_rate": 3.5631746031746035e-05,
+ "loss": 0.3271,
+ "step": 3138
+ },
+ {
+ "epoch": 17.937142857142856,
+ "grad_norm": 25.249197006225586,
+ "learning_rate": 3.562539682539683e-05,
+ "loss": 0.5889,
+ "step": 3139
+ },
+ {
+ "epoch": 17.942857142857143,
+ "grad_norm": 43.17115783691406,
+ "learning_rate": 3.561904761904762e-05,
+ "loss": 0.3516,
+ "step": 3140
+ },
+ {
+ "epoch": 17.94857142857143,
+ "grad_norm": 54.95415496826172,
+ "learning_rate": 3.561269841269841e-05,
+ "loss": 0.4138,
+ "step": 3141
+ },
+ {
+ "epoch": 17.954285714285714,
+ "grad_norm": 52.29851531982422,
+ "learning_rate": 3.560634920634921e-05,
+ "loss": 0.3052,
+ "step": 3142
+ },
+ {
+ "epoch": 17.96,
+ "grad_norm": 51.46090316772461,
+ "learning_rate": 3.56e-05,
+ "loss": 0.5461,
+ "step": 3143
+ },
+ {
+ "epoch": 17.965714285714284,
+ "grad_norm": 76.5407943725586,
+ "learning_rate": 3.55936507936508e-05,
+ "loss": 0.3495,
+ "step": 3144
+ },
+ {
+ "epoch": 17.97142857142857,
+ "grad_norm": 69.67864990234375,
+ "learning_rate": 3.558730158730159e-05,
+ "loss": 0.5133,
+ "step": 3145
+ },
+ {
+ "epoch": 17.97714285714286,
+ "grad_norm": 61.10658645629883,
+ "learning_rate": 3.558095238095238e-05,
+ "loss": 0.44,
+ "step": 3146
+ },
+ {
+ "epoch": 17.982857142857142,
+ "grad_norm": 25.05204963684082,
+ "learning_rate": 3.5574603174603175e-05,
+ "loss": 0.6404,
+ "step": 3147
+ },
+ {
+ "epoch": 17.98857142857143,
+ "grad_norm": 786.7589111328125,
+ "learning_rate": 3.556825396825397e-05,
+ "loss": 0.5873,
+ "step": 3148
+ },
+ {
+ "epoch": 17.994285714285713,
+ "grad_norm": 46.797298431396484,
+ "learning_rate": 3.556190476190476e-05,
+ "loss": 0.3733,
+ "step": 3149
+ },
+ {
+ "epoch": 18.0,
+ "grad_norm": 103.56895446777344,
+ "learning_rate": 3.555555555555556e-05,
+ "loss": 0.4865,
+ "step": 3150
+ },
+ {
+ "epoch": 18.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6253594160079956,
+ "eval_map": 0.9198,
+ "eval_map_50": 0.9628,
+ "eval_map_75": 0.9522,
+ "eval_map_large": 0.9199,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9198,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7806,
+ "eval_mar_10": 0.966,
+ "eval_mar_100": 0.9756,
+ "eval_mar_100_per_class": 0.9756,
+ "eval_mar_large": 0.9756,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 14.5229,
+ "eval_samples_per_second": 20.244,
+ "eval_steps_per_second": 2.548,
+ "step": 3150
+ },
+ {
+ "epoch": 18.005714285714287,
+ "grad_norm": 43.199241638183594,
+ "learning_rate": 3.5549206349206346e-05,
+ "loss": 0.3592,
+ "step": 3151
+ },
+ {
+ "epoch": 18.01142857142857,
+ "grad_norm": 55.78414535522461,
+ "learning_rate": 3.5542857142857145e-05,
+ "loss": 0.3606,
+ "step": 3152
+ },
+ {
+ "epoch": 18.017142857142858,
+ "grad_norm": 30.153759002685547,
+ "learning_rate": 3.553650793650794e-05,
+ "loss": 0.4209,
+ "step": 3153
+ },
+ {
+ "epoch": 18.02285714285714,
+ "grad_norm": 36.56952667236328,
+ "learning_rate": 3.553015873015873e-05,
+ "loss": 0.3696,
+ "step": 3154
+ },
+ {
+ "epoch": 18.02857142857143,
+ "grad_norm": 40.697784423828125,
+ "learning_rate": 3.552380952380952e-05,
+ "loss": 0.2977,
+ "step": 3155
+ },
+ {
+ "epoch": 18.034285714285716,
+ "grad_norm": 23.288288116455078,
+ "learning_rate": 3.551746031746032e-05,
+ "loss": 0.3205,
+ "step": 3156
+ },
+ {
+ "epoch": 18.04,
+ "grad_norm": 40.85267639160156,
+ "learning_rate": 3.551111111111111e-05,
+ "loss": 0.4282,
+ "step": 3157
+ },
+ {
+ "epoch": 18.045714285714286,
+ "grad_norm": 94.27670288085938,
+ "learning_rate": 3.550476190476191e-05,
+ "loss": 0.3684,
+ "step": 3158
+ },
+ {
+ "epoch": 18.05142857142857,
+ "grad_norm": 76.08488464355469,
+ "learning_rate": 3.54984126984127e-05,
+ "loss": 0.3418,
+ "step": 3159
+ },
+ {
+ "epoch": 18.057142857142857,
+ "grad_norm": 83.23991394042969,
+ "learning_rate": 3.549206349206349e-05,
+ "loss": 0.3182,
+ "step": 3160
+ },
+ {
+ "epoch": 18.062857142857144,
+ "grad_norm": 44.40380859375,
+ "learning_rate": 3.5485714285714286e-05,
+ "loss": 0.3782,
+ "step": 3161
+ },
+ {
+ "epoch": 18.068571428571428,
+ "grad_norm": 41.11808395385742,
+ "learning_rate": 3.547936507936508e-05,
+ "loss": 0.2367,
+ "step": 3162
+ },
+ {
+ "epoch": 18.074285714285715,
+ "grad_norm": 57.094913482666016,
+ "learning_rate": 3.547301587301588e-05,
+ "loss": 0.2702,
+ "step": 3163
+ },
+ {
+ "epoch": 18.08,
+ "grad_norm": 63.90068054199219,
+ "learning_rate": 3.546666666666667e-05,
+ "loss": 0.3866,
+ "step": 3164
+ },
+ {
+ "epoch": 18.085714285714285,
+ "grad_norm": 52.84870529174805,
+ "learning_rate": 3.546031746031746e-05,
+ "loss": 0.2268,
+ "step": 3165
+ },
+ {
+ "epoch": 18.091428571428573,
+ "grad_norm": 161.87777709960938,
+ "learning_rate": 3.5453968253968256e-05,
+ "loss": 0.3524,
+ "step": 3166
+ },
+ {
+ "epoch": 18.097142857142856,
+ "grad_norm": 65.514404296875,
+ "learning_rate": 3.5447619047619055e-05,
+ "loss": 0.4595,
+ "step": 3167
+ },
+ {
+ "epoch": 18.102857142857143,
+ "grad_norm": 33.12297821044922,
+ "learning_rate": 3.544126984126984e-05,
+ "loss": 0.377,
+ "step": 3168
+ },
+ {
+ "epoch": 18.10857142857143,
+ "grad_norm": 65.4037857055664,
+ "learning_rate": 3.543492063492064e-05,
+ "loss": 0.3687,
+ "step": 3169
+ },
+ {
+ "epoch": 18.114285714285714,
+ "grad_norm": 62.527587890625,
+ "learning_rate": 3.5428571428571426e-05,
+ "loss": 0.2815,
+ "step": 3170
+ },
+ {
+ "epoch": 18.12,
+ "grad_norm": 57.75078582763672,
+ "learning_rate": 3.5422222222222226e-05,
+ "loss": 0.346,
+ "step": 3171
+ },
+ {
+ "epoch": 18.125714285714285,
+ "grad_norm": 39.5456657409668,
+ "learning_rate": 3.541587301587302e-05,
+ "loss": 0.4617,
+ "step": 3172
+ },
+ {
+ "epoch": 18.13142857142857,
+ "grad_norm": 51.52202606201172,
+ "learning_rate": 3.540952380952381e-05,
+ "loss": 0.2961,
+ "step": 3173
+ },
+ {
+ "epoch": 18.13714285714286,
+ "grad_norm": 43.48557662963867,
+ "learning_rate": 3.5403174603174604e-05,
+ "loss": 0.4519,
+ "step": 3174
+ },
+ {
+ "epoch": 18.142857142857142,
+ "grad_norm": 61.564796447753906,
+ "learning_rate": 3.53968253968254e-05,
+ "loss": 0.2732,
+ "step": 3175
+ },
+ {
+ "epoch": 18.14857142857143,
+ "grad_norm": 454.7781677246094,
+ "learning_rate": 3.539047619047619e-05,
+ "loss": 0.4123,
+ "step": 3176
+ },
+ {
+ "epoch": 18.154285714285713,
+ "grad_norm": 29.593908309936523,
+ "learning_rate": 3.538412698412699e-05,
+ "loss": 0.5679,
+ "step": 3177
+ },
+ {
+ "epoch": 18.16,
+ "grad_norm": 85.99415588378906,
+ "learning_rate": 3.537777777777778e-05,
+ "loss": 0.4035,
+ "step": 3178
+ },
+ {
+ "epoch": 18.165714285714287,
+ "grad_norm": 95.85099029541016,
+ "learning_rate": 3.5371428571428574e-05,
+ "loss": 0.3839,
+ "step": 3179
+ },
+ {
+ "epoch": 18.17142857142857,
+ "grad_norm": 49.89384460449219,
+ "learning_rate": 3.5365079365079366e-05,
+ "loss": 0.4968,
+ "step": 3180
+ },
+ {
+ "epoch": 18.177142857142858,
+ "grad_norm": 118.2928466796875,
+ "learning_rate": 3.535873015873016e-05,
+ "loss": 0.3729,
+ "step": 3181
+ },
+ {
+ "epoch": 18.18285714285714,
+ "grad_norm": 87.427734375,
+ "learning_rate": 3.535238095238095e-05,
+ "loss": 0.4553,
+ "step": 3182
+ },
+ {
+ "epoch": 18.18857142857143,
+ "grad_norm": 71.05023956298828,
+ "learning_rate": 3.534603174603175e-05,
+ "loss": 0.3504,
+ "step": 3183
+ },
+ {
+ "epoch": 18.194285714285716,
+ "grad_norm": 76.40508270263672,
+ "learning_rate": 3.533968253968254e-05,
+ "loss": 0.245,
+ "step": 3184
+ },
+ {
+ "epoch": 18.2,
+ "grad_norm": 61.27660369873047,
+ "learning_rate": 3.5333333333333336e-05,
+ "loss": 0.2832,
+ "step": 3185
+ },
+ {
+ "epoch": 18.205714285714286,
+ "grad_norm": 38.750450134277344,
+ "learning_rate": 3.532698412698413e-05,
+ "loss": 0.4819,
+ "step": 3186
+ },
+ {
+ "epoch": 18.21142857142857,
+ "grad_norm": 38.24382400512695,
+ "learning_rate": 3.532063492063492e-05,
+ "loss": 0.2887,
+ "step": 3187
+ },
+ {
+ "epoch": 18.217142857142857,
+ "grad_norm": 72.64383697509766,
+ "learning_rate": 3.5314285714285714e-05,
+ "loss": 0.3745,
+ "step": 3188
+ },
+ {
+ "epoch": 18.222857142857144,
+ "grad_norm": 42.5111083984375,
+ "learning_rate": 3.5307936507936513e-05,
+ "loss": 0.3943,
+ "step": 3189
+ },
+ {
+ "epoch": 18.228571428571428,
+ "grad_norm": 54.62807083129883,
+ "learning_rate": 3.53015873015873e-05,
+ "loss": 0.4434,
+ "step": 3190
+ },
+ {
+ "epoch": 18.234285714285715,
+ "grad_norm": 63.11427688598633,
+ "learning_rate": 3.52952380952381e-05,
+ "loss": 0.4309,
+ "step": 3191
+ },
+ {
+ "epoch": 18.24,
+ "grad_norm": 42.79474639892578,
+ "learning_rate": 3.528888888888889e-05,
+ "loss": 0.3559,
+ "step": 3192
+ },
+ {
+ "epoch": 18.245714285714286,
+ "grad_norm": 48.369224548339844,
+ "learning_rate": 3.5282539682539684e-05,
+ "loss": 0.3314,
+ "step": 3193
+ },
+ {
+ "epoch": 18.251428571428573,
+ "grad_norm": 74.17704010009766,
+ "learning_rate": 3.5276190476190477e-05,
+ "loss": 0.4334,
+ "step": 3194
+ },
+ {
+ "epoch": 18.257142857142856,
+ "grad_norm": 67.38375091552734,
+ "learning_rate": 3.526984126984127e-05,
+ "loss": 0.5354,
+ "step": 3195
+ },
+ {
+ "epoch": 18.262857142857143,
+ "grad_norm": 30.76678466796875,
+ "learning_rate": 3.526349206349206e-05,
+ "loss": 0.3759,
+ "step": 3196
+ },
+ {
+ "epoch": 18.268571428571427,
+ "grad_norm": 36.84651184082031,
+ "learning_rate": 3.525714285714286e-05,
+ "loss": 0.3854,
+ "step": 3197
+ },
+ {
+ "epoch": 18.274285714285714,
+ "grad_norm": 37.59387969970703,
+ "learning_rate": 3.5250793650793654e-05,
+ "loss": 0.3735,
+ "step": 3198
+ },
+ {
+ "epoch": 18.28,
+ "grad_norm": 64.6196517944336,
+ "learning_rate": 3.5244444444444447e-05,
+ "loss": 0.3306,
+ "step": 3199
+ },
+ {
+ "epoch": 18.285714285714285,
+ "grad_norm": 59.37558364868164,
+ "learning_rate": 3.523809523809524e-05,
+ "loss": 0.3057,
+ "step": 3200
+ },
+ {
+ "epoch": 18.291428571428572,
+ "grad_norm": 113.92902374267578,
+ "learning_rate": 3.523174603174603e-05,
+ "loss": 0.4922,
+ "step": 3201
+ },
+ {
+ "epoch": 18.29714285714286,
+ "grad_norm": 85.95907592773438,
+ "learning_rate": 3.522539682539683e-05,
+ "loss": 0.4439,
+ "step": 3202
+ },
+ {
+ "epoch": 18.302857142857142,
+ "grad_norm": 74.69219207763672,
+ "learning_rate": 3.521904761904762e-05,
+ "loss": 0.3237,
+ "step": 3203
+ },
+ {
+ "epoch": 18.30857142857143,
+ "grad_norm": 101.25186920166016,
+ "learning_rate": 3.5212698412698417e-05,
+ "loss": 0.3955,
+ "step": 3204
+ },
+ {
+ "epoch": 18.314285714285713,
+ "grad_norm": 84.63744354248047,
+ "learning_rate": 3.520634920634921e-05,
+ "loss": 0.3627,
+ "step": 3205
+ },
+ {
+ "epoch": 18.32,
+ "grad_norm": 84.56731414794922,
+ "learning_rate": 3.52e-05,
+ "loss": 0.3336,
+ "step": 3206
+ },
+ {
+ "epoch": 18.325714285714287,
+ "grad_norm": 44.616302490234375,
+ "learning_rate": 3.5193650793650794e-05,
+ "loss": 0.3344,
+ "step": 3207
+ },
+ {
+ "epoch": 18.33142857142857,
+ "grad_norm": 69.16703033447266,
+ "learning_rate": 3.5187301587301594e-05,
+ "loss": 0.3907,
+ "step": 3208
+ },
+ {
+ "epoch": 18.337142857142858,
+ "grad_norm": 79.36182403564453,
+ "learning_rate": 3.518095238095238e-05,
+ "loss": 0.3139,
+ "step": 3209
+ },
+ {
+ "epoch": 18.34285714285714,
+ "grad_norm": 85.03105926513672,
+ "learning_rate": 3.517460317460318e-05,
+ "loss": 0.2982,
+ "step": 3210
+ },
+ {
+ "epoch": 18.34857142857143,
+ "grad_norm": 39.43722915649414,
+ "learning_rate": 3.516825396825397e-05,
+ "loss": 0.3983,
+ "step": 3211
+ },
+ {
+ "epoch": 18.354285714285716,
+ "grad_norm": 41.102115631103516,
+ "learning_rate": 3.5161904761904764e-05,
+ "loss": 0.5258,
+ "step": 3212
+ },
+ {
+ "epoch": 18.36,
+ "grad_norm": 36.52622985839844,
+ "learning_rate": 3.515555555555556e-05,
+ "loss": 0.3115,
+ "step": 3213
+ },
+ {
+ "epoch": 18.365714285714287,
+ "grad_norm": 76.81031036376953,
+ "learning_rate": 3.514920634920635e-05,
+ "loss": 0.4972,
+ "step": 3214
+ },
+ {
+ "epoch": 18.37142857142857,
+ "grad_norm": 54.84734344482422,
+ "learning_rate": 3.514285714285714e-05,
+ "loss": 0.3642,
+ "step": 3215
+ },
+ {
+ "epoch": 18.377142857142857,
+ "grad_norm": 100.14500427246094,
+ "learning_rate": 3.513650793650794e-05,
+ "loss": 0.6879,
+ "step": 3216
+ },
+ {
+ "epoch": 18.382857142857144,
+ "grad_norm": 36.46712112426758,
+ "learning_rate": 3.513015873015873e-05,
+ "loss": 0.2939,
+ "step": 3217
+ },
+ {
+ "epoch": 18.388571428571428,
+ "grad_norm": 62.769412994384766,
+ "learning_rate": 3.512380952380953e-05,
+ "loss": 0.9316,
+ "step": 3218
+ },
+ {
+ "epoch": 18.394285714285715,
+ "grad_norm": 39.455169677734375,
+ "learning_rate": 3.511746031746032e-05,
+ "loss": 0.3678,
+ "step": 3219
+ },
+ {
+ "epoch": 18.4,
+ "grad_norm": 325.67950439453125,
+ "learning_rate": 3.511111111111111e-05,
+ "loss": 0.3816,
+ "step": 3220
+ },
+ {
+ "epoch": 18.405714285714286,
+ "grad_norm": 79.74323272705078,
+ "learning_rate": 3.5104761904761905e-05,
+ "loss": 0.5354,
+ "step": 3221
+ },
+ {
+ "epoch": 18.411428571428573,
+ "grad_norm": 197.36605834960938,
+ "learning_rate": 3.5098412698412704e-05,
+ "loss": 0.3999,
+ "step": 3222
+ },
+ {
+ "epoch": 18.417142857142856,
+ "grad_norm": 85.20283508300781,
+ "learning_rate": 3.509206349206349e-05,
+ "loss": 0.4736,
+ "step": 3223
+ },
+ {
+ "epoch": 18.422857142857143,
+ "grad_norm": 212.142578125,
+ "learning_rate": 3.508571428571429e-05,
+ "loss": 0.5339,
+ "step": 3224
+ },
+ {
+ "epoch": 18.428571428571427,
+ "grad_norm": 83.710205078125,
+ "learning_rate": 3.5079365079365075e-05,
+ "loss": 0.3566,
+ "step": 3225
+ },
+ {
+ "epoch": 18.434285714285714,
+ "grad_norm": 27.423030853271484,
+ "learning_rate": 3.5073015873015875e-05,
+ "loss": 0.3837,
+ "step": 3226
+ },
+ {
+ "epoch": 18.44,
+ "grad_norm": 50.2109260559082,
+ "learning_rate": 3.506666666666667e-05,
+ "loss": 0.3335,
+ "step": 3227
+ },
+ {
+ "epoch": 18.445714285714285,
+ "grad_norm": 28.182552337646484,
+ "learning_rate": 3.506031746031746e-05,
+ "loss": 0.4677,
+ "step": 3228
+ },
+ {
+ "epoch": 18.451428571428572,
+ "grad_norm": 36.24056625366211,
+ "learning_rate": 3.505396825396825e-05,
+ "loss": 0.2841,
+ "step": 3229
+ },
+ {
+ "epoch": 18.457142857142856,
+ "grad_norm": 28.210988998413086,
+ "learning_rate": 3.504761904761905e-05,
+ "loss": 0.3922,
+ "step": 3230
+ },
+ {
+ "epoch": 18.462857142857143,
+ "grad_norm": 57.17612075805664,
+ "learning_rate": 3.5041269841269845e-05,
+ "loss": 0.3097,
+ "step": 3231
+ },
+ {
+ "epoch": 18.46857142857143,
+ "grad_norm": 27.290563583374023,
+ "learning_rate": 3.503492063492064e-05,
+ "loss": 0.3782,
+ "step": 3232
+ },
+ {
+ "epoch": 18.474285714285713,
+ "grad_norm": 63.29109191894531,
+ "learning_rate": 3.502857142857143e-05,
+ "loss": 0.3363,
+ "step": 3233
+ },
+ {
+ "epoch": 18.48,
+ "grad_norm": 43.08891296386719,
+ "learning_rate": 3.502222222222222e-05,
+ "loss": 0.509,
+ "step": 3234
+ },
+ {
+ "epoch": 18.485714285714284,
+ "grad_norm": 57.84274673461914,
+ "learning_rate": 3.5015873015873015e-05,
+ "loss": 0.2903,
+ "step": 3235
+ },
+ {
+ "epoch": 18.49142857142857,
+ "grad_norm": 29.947025299072266,
+ "learning_rate": 3.500952380952381e-05,
+ "loss": 0.5381,
+ "step": 3236
+ },
+ {
+ "epoch": 18.497142857142858,
+ "grad_norm": 27.890987396240234,
+ "learning_rate": 3.500317460317461e-05,
+ "loss": 0.2969,
+ "step": 3237
+ },
+ {
+ "epoch": 18.502857142857142,
+ "grad_norm": 52.41931915283203,
+ "learning_rate": 3.49968253968254e-05,
+ "loss": 0.3769,
+ "step": 3238
+ },
+ {
+ "epoch": 18.50857142857143,
+ "grad_norm": 40.1724967956543,
+ "learning_rate": 3.499047619047619e-05,
+ "loss": 0.4841,
+ "step": 3239
+ },
+ {
+ "epoch": 18.514285714285712,
+ "grad_norm": 89.99263763427734,
+ "learning_rate": 3.4984126984126985e-05,
+ "loss": 0.5822,
+ "step": 3240
+ },
+ {
+ "epoch": 18.52,
+ "grad_norm": 36.47848129272461,
+ "learning_rate": 3.4977777777777785e-05,
+ "loss": 0.3401,
+ "step": 3241
+ },
+ {
+ "epoch": 18.525714285714287,
+ "grad_norm": 53.10023498535156,
+ "learning_rate": 3.497142857142857e-05,
+ "loss": 0.3334,
+ "step": 3242
+ },
+ {
+ "epoch": 18.53142857142857,
+ "grad_norm": 25.754384994506836,
+ "learning_rate": 3.496507936507937e-05,
+ "loss": 0.3938,
+ "step": 3243
+ },
+ {
+ "epoch": 18.537142857142857,
+ "grad_norm": 220.01368713378906,
+ "learning_rate": 3.495873015873016e-05,
+ "loss": 0.6147,
+ "step": 3244
+ },
+ {
+ "epoch": 18.542857142857144,
+ "grad_norm": 32.224937438964844,
+ "learning_rate": 3.4952380952380955e-05,
+ "loss": 0.3356,
+ "step": 3245
+ },
+ {
+ "epoch": 18.548571428571428,
+ "grad_norm": 66.4201431274414,
+ "learning_rate": 3.494603174603175e-05,
+ "loss": 0.4077,
+ "step": 3246
+ },
+ {
+ "epoch": 18.554285714285715,
+ "grad_norm": 112.10049438476562,
+ "learning_rate": 3.493968253968254e-05,
+ "loss": 0.3764,
+ "step": 3247
+ },
+ {
+ "epoch": 18.56,
+ "grad_norm": 44.7986946105957,
+ "learning_rate": 3.493333333333333e-05,
+ "loss": 0.329,
+ "step": 3248
+ },
+ {
+ "epoch": 18.565714285714286,
+ "grad_norm": 93.60568237304688,
+ "learning_rate": 3.492698412698413e-05,
+ "loss": 0.3607,
+ "step": 3249
+ },
+ {
+ "epoch": 18.571428571428573,
+ "grad_norm": 65.51329040527344,
+ "learning_rate": 3.492063492063492e-05,
+ "loss": 0.3553,
+ "step": 3250
+ },
+ {
+ "epoch": 18.577142857142857,
+ "grad_norm": 76.55573272705078,
+ "learning_rate": 3.491428571428572e-05,
+ "loss": 0.4236,
+ "step": 3251
+ },
+ {
+ "epoch": 18.582857142857144,
+ "grad_norm": 28.30959701538086,
+ "learning_rate": 3.490793650793651e-05,
+ "loss": 0.5552,
+ "step": 3252
+ },
+ {
+ "epoch": 18.588571428571427,
+ "grad_norm": 537.5946044921875,
+ "learning_rate": 3.49015873015873e-05,
+ "loss": 0.3013,
+ "step": 3253
+ },
+ {
+ "epoch": 18.594285714285714,
+ "grad_norm": 37.939945220947266,
+ "learning_rate": 3.4895238095238096e-05,
+ "loss": 0.3303,
+ "step": 3254
+ },
+ {
+ "epoch": 18.6,
+ "grad_norm": 21.929025650024414,
+ "learning_rate": 3.4888888888888895e-05,
+ "loss": 0.378,
+ "step": 3255
+ },
+ {
+ "epoch": 18.605714285714285,
+ "grad_norm": 62.76546859741211,
+ "learning_rate": 3.488253968253968e-05,
+ "loss": 0.3737,
+ "step": 3256
+ },
+ {
+ "epoch": 18.611428571428572,
+ "grad_norm": 43.64086151123047,
+ "learning_rate": 3.487619047619048e-05,
+ "loss": 0.375,
+ "step": 3257
+ },
+ {
+ "epoch": 18.617142857142856,
+ "grad_norm": 49.89305114746094,
+ "learning_rate": 3.4869841269841266e-05,
+ "loss": 0.409,
+ "step": 3258
+ },
+ {
+ "epoch": 18.622857142857143,
+ "grad_norm": 41.23617935180664,
+ "learning_rate": 3.4863492063492066e-05,
+ "loss": 0.3682,
+ "step": 3259
+ },
+ {
+ "epoch": 18.62857142857143,
+ "grad_norm": 61.988040924072266,
+ "learning_rate": 3.485714285714286e-05,
+ "loss": 0.432,
+ "step": 3260
+ },
+ {
+ "epoch": 18.634285714285713,
+ "grad_norm": 148.89205932617188,
+ "learning_rate": 3.485079365079365e-05,
+ "loss": 0.4126,
+ "step": 3261
+ },
+ {
+ "epoch": 18.64,
+ "grad_norm": 56.83814239501953,
+ "learning_rate": 3.4844444444444444e-05,
+ "loss": 0.3521,
+ "step": 3262
+ },
+ {
+ "epoch": 18.645714285714284,
+ "grad_norm": 31.013830184936523,
+ "learning_rate": 3.483809523809524e-05,
+ "loss": 0.4353,
+ "step": 3263
+ },
+ {
+ "epoch": 18.65142857142857,
+ "grad_norm": 61.98045349121094,
+ "learning_rate": 3.483174603174603e-05,
+ "loss": 0.35,
+ "step": 3264
+ },
+ {
+ "epoch": 18.65714285714286,
+ "grad_norm": 18.87017250061035,
+ "learning_rate": 3.482539682539683e-05,
+ "loss": 0.2831,
+ "step": 3265
+ },
+ {
+ "epoch": 18.662857142857142,
+ "grad_norm": 22.309398651123047,
+ "learning_rate": 3.481904761904762e-05,
+ "loss": 0.3345,
+ "step": 3266
+ },
+ {
+ "epoch": 18.66857142857143,
+ "grad_norm": 74.41686248779297,
+ "learning_rate": 3.4812698412698414e-05,
+ "loss": 0.4046,
+ "step": 3267
+ },
+ {
+ "epoch": 18.674285714285713,
+ "grad_norm": 55.20325469970703,
+ "learning_rate": 3.4806349206349206e-05,
+ "loss": 0.3444,
+ "step": 3268
+ },
+ {
+ "epoch": 18.68,
+ "grad_norm": 41.73289489746094,
+ "learning_rate": 3.48e-05,
+ "loss": 0.4791,
+ "step": 3269
+ },
+ {
+ "epoch": 18.685714285714287,
+ "grad_norm": 52.391178131103516,
+ "learning_rate": 3.47936507936508e-05,
+ "loss": 0.488,
+ "step": 3270
+ },
+ {
+ "epoch": 18.69142857142857,
+ "grad_norm": 41.188087463378906,
+ "learning_rate": 3.478730158730159e-05,
+ "loss": 0.3235,
+ "step": 3271
+ },
+ {
+ "epoch": 18.697142857142858,
+ "grad_norm": 88.90873718261719,
+ "learning_rate": 3.4780952380952384e-05,
+ "loss": 0.3831,
+ "step": 3272
+ },
+ {
+ "epoch": 18.70285714285714,
+ "grad_norm": 46.88115692138672,
+ "learning_rate": 3.4774603174603176e-05,
+ "loss": 0.3078,
+ "step": 3273
+ },
+ {
+ "epoch": 18.708571428571428,
+ "grad_norm": 18.235933303833008,
+ "learning_rate": 3.476825396825397e-05,
+ "loss": 0.464,
+ "step": 3274
+ },
+ {
+ "epoch": 18.714285714285715,
+ "grad_norm": 53.559288024902344,
+ "learning_rate": 3.476190476190476e-05,
+ "loss": 0.3916,
+ "step": 3275
+ },
+ {
+ "epoch": 18.72,
+ "grad_norm": 23.915830612182617,
+ "learning_rate": 3.475555555555556e-05,
+ "loss": 0.3189,
+ "step": 3276
+ },
+ {
+ "epoch": 18.725714285714286,
+ "grad_norm": 38.997554779052734,
+ "learning_rate": 3.4749206349206353e-05,
+ "loss": 0.3874,
+ "step": 3277
+ },
+ {
+ "epoch": 18.731428571428573,
+ "grad_norm": 32.495235443115234,
+ "learning_rate": 3.4742857142857146e-05,
+ "loss": 0.2955,
+ "step": 3278
+ },
+ {
+ "epoch": 18.737142857142857,
+ "grad_norm": 26.708459854125977,
+ "learning_rate": 3.473650793650794e-05,
+ "loss": 0.4058,
+ "step": 3279
+ },
+ {
+ "epoch": 18.742857142857144,
+ "grad_norm": 31.728487014770508,
+ "learning_rate": 3.473015873015873e-05,
+ "loss": 0.4113,
+ "step": 3280
+ },
+ {
+ "epoch": 18.748571428571427,
+ "grad_norm": 63.8240852355957,
+ "learning_rate": 3.4723809523809524e-05,
+ "loss": 0.2827,
+ "step": 3281
+ },
+ {
+ "epoch": 18.754285714285714,
+ "grad_norm": 54.470394134521484,
+ "learning_rate": 3.4717460317460323e-05,
+ "loss": 0.3469,
+ "step": 3282
+ },
+ {
+ "epoch": 18.76,
+ "grad_norm": 65.894775390625,
+ "learning_rate": 3.471111111111111e-05,
+ "loss": 0.358,
+ "step": 3283
+ },
+ {
+ "epoch": 18.765714285714285,
+ "grad_norm": 56.5354118347168,
+ "learning_rate": 3.470476190476191e-05,
+ "loss": 0.2944,
+ "step": 3284
+ },
+ {
+ "epoch": 18.771428571428572,
+ "grad_norm": 42.3811149597168,
+ "learning_rate": 3.46984126984127e-05,
+ "loss": 0.4474,
+ "step": 3285
+ },
+ {
+ "epoch": 18.777142857142856,
+ "grad_norm": 67.40564727783203,
+ "learning_rate": 3.4692063492063494e-05,
+ "loss": 0.3332,
+ "step": 3286
+ },
+ {
+ "epoch": 18.782857142857143,
+ "grad_norm": 30.77213478088379,
+ "learning_rate": 3.468571428571429e-05,
+ "loss": 0.4628,
+ "step": 3287
+ },
+ {
+ "epoch": 18.78857142857143,
+ "grad_norm": 15.608197212219238,
+ "learning_rate": 3.4679365079365086e-05,
+ "loss": 0.3094,
+ "step": 3288
+ },
+ {
+ "epoch": 18.794285714285714,
+ "grad_norm": 15.401975631713867,
+ "learning_rate": 3.467301587301587e-05,
+ "loss": 0.3165,
+ "step": 3289
+ },
+ {
+ "epoch": 18.8,
+ "grad_norm": 32.28103256225586,
+ "learning_rate": 3.466666666666667e-05,
+ "loss": 0.2876,
+ "step": 3290
+ },
+ {
+ "epoch": 18.805714285714284,
+ "grad_norm": 84.35736846923828,
+ "learning_rate": 3.466031746031746e-05,
+ "loss": 0.2955,
+ "step": 3291
+ },
+ {
+ "epoch": 18.81142857142857,
+ "grad_norm": 44.69303512573242,
+ "learning_rate": 3.4653968253968257e-05,
+ "loss": 0.3536,
+ "step": 3292
+ },
+ {
+ "epoch": 18.81714285714286,
+ "grad_norm": 48.58481979370117,
+ "learning_rate": 3.464761904761905e-05,
+ "loss": 0.6354,
+ "step": 3293
+ },
+ {
+ "epoch": 18.822857142857142,
+ "grad_norm": 35.53000259399414,
+ "learning_rate": 3.464126984126984e-05,
+ "loss": 0.4207,
+ "step": 3294
+ },
+ {
+ "epoch": 18.82857142857143,
+ "grad_norm": 34.199031829833984,
+ "learning_rate": 3.4634920634920634e-05,
+ "loss": 0.5652,
+ "step": 3295
+ },
+ {
+ "epoch": 18.834285714285713,
+ "grad_norm": 98.38899230957031,
+ "learning_rate": 3.4628571428571434e-05,
+ "loss": 0.4791,
+ "step": 3296
+ },
+ {
+ "epoch": 18.84,
+ "grad_norm": 27.646554946899414,
+ "learning_rate": 3.462222222222222e-05,
+ "loss": 0.3193,
+ "step": 3297
+ },
+ {
+ "epoch": 18.845714285714287,
+ "grad_norm": 21.69123649597168,
+ "learning_rate": 3.461587301587302e-05,
+ "loss": 0.3577,
+ "step": 3298
+ },
+ {
+ "epoch": 18.85142857142857,
+ "grad_norm": 32.46466827392578,
+ "learning_rate": 3.460952380952381e-05,
+ "loss": 0.3855,
+ "step": 3299
+ },
+ {
+ "epoch": 18.857142857142858,
+ "grad_norm": 26.237293243408203,
+ "learning_rate": 3.4603174603174604e-05,
+ "loss": 0.2561,
+ "step": 3300
+ },
+ {
+ "epoch": 18.86285714285714,
+ "grad_norm": 21.17353057861328,
+ "learning_rate": 3.45968253968254e-05,
+ "loss": 0.2684,
+ "step": 3301
+ },
+ {
+ "epoch": 18.86857142857143,
+ "grad_norm": 30.09574317932129,
+ "learning_rate": 3.459047619047619e-05,
+ "loss": 0.2668,
+ "step": 3302
+ },
+ {
+ "epoch": 18.874285714285715,
+ "grad_norm": 32.279048919677734,
+ "learning_rate": 3.458412698412698e-05,
+ "loss": 0.4253,
+ "step": 3303
+ },
+ {
+ "epoch": 18.88,
+ "grad_norm": 30.678268432617188,
+ "learning_rate": 3.457777777777778e-05,
+ "loss": 0.2842,
+ "step": 3304
+ },
+ {
+ "epoch": 18.885714285714286,
+ "grad_norm": 48.52307891845703,
+ "learning_rate": 3.4571428571428574e-05,
+ "loss": 0.4292,
+ "step": 3305
+ },
+ {
+ "epoch": 18.89142857142857,
+ "grad_norm": 25.623342514038086,
+ "learning_rate": 3.456507936507937e-05,
+ "loss": 0.533,
+ "step": 3306
+ },
+ {
+ "epoch": 18.897142857142857,
+ "grad_norm": 54.10114288330078,
+ "learning_rate": 3.455873015873016e-05,
+ "loss": 0.4586,
+ "step": 3307
+ },
+ {
+ "epoch": 18.902857142857144,
+ "grad_norm": 869.677001953125,
+ "learning_rate": 3.455238095238095e-05,
+ "loss": 0.2632,
+ "step": 3308
+ },
+ {
+ "epoch": 18.908571428571427,
+ "grad_norm": 61.15915298461914,
+ "learning_rate": 3.454603174603175e-05,
+ "loss": 0.3718,
+ "step": 3309
+ },
+ {
+ "epoch": 18.914285714285715,
+ "grad_norm": 67.51522064208984,
+ "learning_rate": 3.4539682539682544e-05,
+ "loss": 0.3563,
+ "step": 3310
+ },
+ {
+ "epoch": 18.92,
+ "grad_norm": 91.92138671875,
+ "learning_rate": 3.453333333333334e-05,
+ "loss": 0.2882,
+ "step": 3311
+ },
+ {
+ "epoch": 18.925714285714285,
+ "grad_norm": 154.09152221679688,
+ "learning_rate": 3.452698412698413e-05,
+ "loss": 0.392,
+ "step": 3312
+ },
+ {
+ "epoch": 18.931428571428572,
+ "grad_norm": 58.27983856201172,
+ "learning_rate": 3.452063492063492e-05,
+ "loss": 0.3814,
+ "step": 3313
+ },
+ {
+ "epoch": 18.937142857142856,
+ "grad_norm": 27.329254150390625,
+ "learning_rate": 3.4514285714285715e-05,
+ "loss": 0.4226,
+ "step": 3314
+ },
+ {
+ "epoch": 18.942857142857143,
+ "grad_norm": 36.9119873046875,
+ "learning_rate": 3.4507936507936514e-05,
+ "loss": 0.3041,
+ "step": 3315
+ },
+ {
+ "epoch": 18.94857142857143,
+ "grad_norm": 98.28682708740234,
+ "learning_rate": 3.45015873015873e-05,
+ "loss": 0.4285,
+ "step": 3316
+ },
+ {
+ "epoch": 18.954285714285714,
+ "grad_norm": 51.9772834777832,
+ "learning_rate": 3.44952380952381e-05,
+ "loss": 0.3947,
+ "step": 3317
+ },
+ {
+ "epoch": 18.96,
+ "grad_norm": 50.20566940307617,
+ "learning_rate": 3.448888888888889e-05,
+ "loss": 0.4644,
+ "step": 3318
+ },
+ {
+ "epoch": 18.965714285714284,
+ "grad_norm": 58.896080017089844,
+ "learning_rate": 3.4482539682539685e-05,
+ "loss": 0.3752,
+ "step": 3319
+ },
+ {
+ "epoch": 18.97142857142857,
+ "grad_norm": 102.45153045654297,
+ "learning_rate": 3.447619047619048e-05,
+ "loss": 0.5225,
+ "step": 3320
+ },
+ {
+ "epoch": 18.97714285714286,
+ "grad_norm": 19.400266647338867,
+ "learning_rate": 3.446984126984128e-05,
+ "loss": 0.3767,
+ "step": 3321
+ },
+ {
+ "epoch": 18.982857142857142,
+ "grad_norm": 28.068359375,
+ "learning_rate": 3.446349206349206e-05,
+ "loss": 0.3446,
+ "step": 3322
+ },
+ {
+ "epoch": 18.98857142857143,
+ "grad_norm": 33.36658477783203,
+ "learning_rate": 3.445714285714286e-05,
+ "loss": 0.3872,
+ "step": 3323
+ },
+ {
+ "epoch": 18.994285714285713,
+ "grad_norm": 273.208740234375,
+ "learning_rate": 3.445079365079365e-05,
+ "loss": 0.3829,
+ "step": 3324
+ },
+ {
+ "epoch": 19.0,
+ "grad_norm": 49.097652435302734,
+ "learning_rate": 3.444444444444445e-05,
+ "loss": 0.3178,
+ "step": 3325
+ },
+ {
+ "epoch": 19.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6071962714195251,
+ "eval_map": 0.918,
+ "eval_map_50": 0.9633,
+ "eval_map_75": 0.9507,
+ "eval_map_large": 0.9182,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.918,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7803,
+ "eval_mar_10": 0.9686,
+ "eval_mar_100": 0.9743,
+ "eval_mar_100_per_class": 0.9743,
+ "eval_mar_large": 0.9743,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.8546,
+ "eval_samples_per_second": 21.22,
+ "eval_steps_per_second": 2.671,
+ "step": 3325
+ },
+ {
+ "epoch": 19.005714285714287,
+ "grad_norm": 108.3809585571289,
+ "learning_rate": 3.443809523809524e-05,
+ "loss": 0.4658,
+ "step": 3326
+ },
+ {
+ "epoch": 19.01142857142857,
+ "grad_norm": 81.60541534423828,
+ "learning_rate": 3.443174603174603e-05,
+ "loss": 0.3664,
+ "step": 3327
+ },
+ {
+ "epoch": 19.017142857142858,
+ "grad_norm": 70.19352722167969,
+ "learning_rate": 3.4425396825396825e-05,
+ "loss": 0.4201,
+ "step": 3328
+ },
+ {
+ "epoch": 19.02285714285714,
+ "grad_norm": 83.45301818847656,
+ "learning_rate": 3.4419047619047625e-05,
+ "loss": 0.4855,
+ "step": 3329
+ },
+ {
+ "epoch": 19.02857142857143,
+ "grad_norm": 75.37036895751953,
+ "learning_rate": 3.441269841269841e-05,
+ "loss": 0.4015,
+ "step": 3330
+ },
+ {
+ "epoch": 19.034285714285716,
+ "grad_norm": 61.17378616333008,
+ "learning_rate": 3.440634920634921e-05,
+ "loss": 0.402,
+ "step": 3331
+ },
+ {
+ "epoch": 19.04,
+ "grad_norm": 107.7389907836914,
+ "learning_rate": 3.4399999999999996e-05,
+ "loss": 0.5198,
+ "step": 3332
+ },
+ {
+ "epoch": 19.045714285714286,
+ "grad_norm": 27.87543296813965,
+ "learning_rate": 3.4393650793650795e-05,
+ "loss": 0.2983,
+ "step": 3333
+ },
+ {
+ "epoch": 19.05142857142857,
+ "grad_norm": 63.95698928833008,
+ "learning_rate": 3.438730158730159e-05,
+ "loss": 0.4856,
+ "step": 3334
+ },
+ {
+ "epoch": 19.057142857142857,
+ "grad_norm": 59.24372100830078,
+ "learning_rate": 3.438095238095238e-05,
+ "loss": 0.4489,
+ "step": 3335
+ },
+ {
+ "epoch": 19.062857142857144,
+ "grad_norm": 49.524620056152344,
+ "learning_rate": 3.437460317460317e-05,
+ "loss": 0.346,
+ "step": 3336
+ },
+ {
+ "epoch": 19.068571428571428,
+ "grad_norm": 22.447019577026367,
+ "learning_rate": 3.436825396825397e-05,
+ "loss": 0.3913,
+ "step": 3337
+ },
+ {
+ "epoch": 19.074285714285715,
+ "grad_norm": 53.7479362487793,
+ "learning_rate": 3.436190476190476e-05,
+ "loss": 0.3361,
+ "step": 3338
+ },
+ {
+ "epoch": 19.08,
+ "grad_norm": 43.6589241027832,
+ "learning_rate": 3.435555555555556e-05,
+ "loss": 0.4637,
+ "step": 3339
+ },
+ {
+ "epoch": 19.085714285714285,
+ "grad_norm": 28.344255447387695,
+ "learning_rate": 3.434920634920635e-05,
+ "loss": 0.3446,
+ "step": 3340
+ },
+ {
+ "epoch": 19.091428571428573,
+ "grad_norm": 26.627819061279297,
+ "learning_rate": 3.434285714285714e-05,
+ "loss": 0.3049,
+ "step": 3341
+ },
+ {
+ "epoch": 19.097142857142856,
+ "grad_norm": 41.315155029296875,
+ "learning_rate": 3.4336507936507936e-05,
+ "loss": 0.3491,
+ "step": 3342
+ },
+ {
+ "epoch": 19.102857142857143,
+ "grad_norm": 25.992656707763672,
+ "learning_rate": 3.433015873015873e-05,
+ "loss": 0.4037,
+ "step": 3343
+ },
+ {
+ "epoch": 19.10857142857143,
+ "grad_norm": 24.71420669555664,
+ "learning_rate": 3.432380952380953e-05,
+ "loss": 0.3653,
+ "step": 3344
+ },
+ {
+ "epoch": 19.114285714285714,
+ "grad_norm": 57.442039489746094,
+ "learning_rate": 3.431746031746032e-05,
+ "loss": 0.4772,
+ "step": 3345
+ },
+ {
+ "epoch": 19.12,
+ "grad_norm": 36.09226608276367,
+ "learning_rate": 3.431111111111111e-05,
+ "loss": 0.3446,
+ "step": 3346
+ },
+ {
+ "epoch": 19.125714285714285,
+ "grad_norm": 81.4795150756836,
+ "learning_rate": 3.4304761904761906e-05,
+ "loss": 0.3171,
+ "step": 3347
+ },
+ {
+ "epoch": 19.13142857142857,
+ "grad_norm": 18.19621467590332,
+ "learning_rate": 3.4298412698412705e-05,
+ "loss": 0.3355,
+ "step": 3348
+ },
+ {
+ "epoch": 19.13714285714286,
+ "grad_norm": 49.13719940185547,
+ "learning_rate": 3.429206349206349e-05,
+ "loss": 0.3004,
+ "step": 3349
+ },
+ {
+ "epoch": 19.142857142857142,
+ "grad_norm": 39.006492614746094,
+ "learning_rate": 3.428571428571429e-05,
+ "loss": 0.3202,
+ "step": 3350
+ },
+ {
+ "epoch": 19.14857142857143,
+ "grad_norm": 49.013946533203125,
+ "learning_rate": 3.427936507936508e-05,
+ "loss": 0.297,
+ "step": 3351
+ },
+ {
+ "epoch": 19.154285714285713,
+ "grad_norm": 63.2196044921875,
+ "learning_rate": 3.4273015873015876e-05,
+ "loss": 0.4038,
+ "step": 3352
+ },
+ {
+ "epoch": 19.16,
+ "grad_norm": 46.97873306274414,
+ "learning_rate": 3.426666666666667e-05,
+ "loss": 0.331,
+ "step": 3353
+ },
+ {
+ "epoch": 19.165714285714287,
+ "grad_norm": 37.16547393798828,
+ "learning_rate": 3.426031746031746e-05,
+ "loss": 0.3239,
+ "step": 3354
+ },
+ {
+ "epoch": 19.17142857142857,
+ "grad_norm": 400.9812316894531,
+ "learning_rate": 3.4253968253968254e-05,
+ "loss": 0.5407,
+ "step": 3355
+ },
+ {
+ "epoch": 19.177142857142858,
+ "grad_norm": 82.57059478759766,
+ "learning_rate": 3.424761904761905e-05,
+ "loss": 0.3921,
+ "step": 3356
+ },
+ {
+ "epoch": 19.18285714285714,
+ "grad_norm": 25.264549255371094,
+ "learning_rate": 3.424126984126984e-05,
+ "loss": 0.3145,
+ "step": 3357
+ },
+ {
+ "epoch": 19.18857142857143,
+ "grad_norm": 221.32171630859375,
+ "learning_rate": 3.423492063492064e-05,
+ "loss": 0.3765,
+ "step": 3358
+ },
+ {
+ "epoch": 19.194285714285716,
+ "grad_norm": 20.174264907836914,
+ "learning_rate": 3.422857142857143e-05,
+ "loss": 0.3129,
+ "step": 3359
+ },
+ {
+ "epoch": 19.2,
+ "grad_norm": 30.072860717773438,
+ "learning_rate": 3.4222222222222224e-05,
+ "loss": 0.2448,
+ "step": 3360
+ },
+ {
+ "epoch": 19.205714285714286,
+ "grad_norm": 48.83212661743164,
+ "learning_rate": 3.4215873015873016e-05,
+ "loss": 0.3296,
+ "step": 3361
+ },
+ {
+ "epoch": 19.21142857142857,
+ "grad_norm": 55.796146392822266,
+ "learning_rate": 3.4209523809523816e-05,
+ "loss": 0.3444,
+ "step": 3362
+ },
+ {
+ "epoch": 19.217142857142857,
+ "grad_norm": 22.57940101623535,
+ "learning_rate": 3.42031746031746e-05,
+ "loss": 0.322,
+ "step": 3363
+ },
+ {
+ "epoch": 19.222857142857144,
+ "grad_norm": 147.75096130371094,
+ "learning_rate": 3.41968253968254e-05,
+ "loss": 0.4688,
+ "step": 3364
+ },
+ {
+ "epoch": 19.228571428571428,
+ "grad_norm": 31.902299880981445,
+ "learning_rate": 3.419047619047619e-05,
+ "loss": 0.3138,
+ "step": 3365
+ },
+ {
+ "epoch": 19.234285714285715,
+ "grad_norm": 91.63981628417969,
+ "learning_rate": 3.4184126984126986e-05,
+ "loss": 0.398,
+ "step": 3366
+ },
+ {
+ "epoch": 19.24,
+ "grad_norm": 60.106632232666016,
+ "learning_rate": 3.417777777777778e-05,
+ "loss": 0.3671,
+ "step": 3367
+ },
+ {
+ "epoch": 19.245714285714286,
+ "grad_norm": 145.38572692871094,
+ "learning_rate": 3.417142857142857e-05,
+ "loss": 0.4003,
+ "step": 3368
+ },
+ {
+ "epoch": 19.251428571428573,
+ "grad_norm": 39.69725036621094,
+ "learning_rate": 3.4165079365079364e-05,
+ "loss": 0.4837,
+ "step": 3369
+ },
+ {
+ "epoch": 19.257142857142856,
+ "grad_norm": 29.655527114868164,
+ "learning_rate": 3.4158730158730164e-05,
+ "loss": 0.5045,
+ "step": 3370
+ },
+ {
+ "epoch": 19.262857142857143,
+ "grad_norm": 28.100563049316406,
+ "learning_rate": 3.415238095238095e-05,
+ "loss": 0.37,
+ "step": 3371
+ },
+ {
+ "epoch": 19.268571428571427,
+ "grad_norm": 94.77354431152344,
+ "learning_rate": 3.414603174603175e-05,
+ "loss": 0.3595,
+ "step": 3372
+ },
+ {
+ "epoch": 19.274285714285714,
+ "grad_norm": 39.54533386230469,
+ "learning_rate": 3.413968253968254e-05,
+ "loss": 0.3759,
+ "step": 3373
+ },
+ {
+ "epoch": 19.28,
+ "grad_norm": 36.4666862487793,
+ "learning_rate": 3.4133333333333334e-05,
+ "loss": 0.2876,
+ "step": 3374
+ },
+ {
+ "epoch": 19.285714285714285,
+ "grad_norm": 59.548675537109375,
+ "learning_rate": 3.412698412698413e-05,
+ "loss": 0.3374,
+ "step": 3375
+ },
+ {
+ "epoch": 19.291428571428572,
+ "grad_norm": 69.64942932128906,
+ "learning_rate": 3.412063492063492e-05,
+ "loss": 0.3064,
+ "step": 3376
+ },
+ {
+ "epoch": 19.29714285714286,
+ "grad_norm": 58.498817443847656,
+ "learning_rate": 3.411428571428571e-05,
+ "loss": 0.4424,
+ "step": 3377
+ },
+ {
+ "epoch": 19.302857142857142,
+ "grad_norm": 27.89930534362793,
+ "learning_rate": 3.410793650793651e-05,
+ "loss": 0.451,
+ "step": 3378
+ },
+ {
+ "epoch": 19.30857142857143,
+ "grad_norm": 63.122528076171875,
+ "learning_rate": 3.4101587301587304e-05,
+ "loss": 0.4518,
+ "step": 3379
+ },
+ {
+ "epoch": 19.314285714285713,
+ "grad_norm": 29.976390838623047,
+ "learning_rate": 3.40952380952381e-05,
+ "loss": 0.3307,
+ "step": 3380
+ },
+ {
+ "epoch": 19.32,
+ "grad_norm": 30.81755828857422,
+ "learning_rate": 3.408888888888889e-05,
+ "loss": 0.3633,
+ "step": 3381
+ },
+ {
+ "epoch": 19.325714285714287,
+ "grad_norm": 50.29838562011719,
+ "learning_rate": 3.408253968253968e-05,
+ "loss": 0.4438,
+ "step": 3382
+ },
+ {
+ "epoch": 19.33142857142857,
+ "grad_norm": 82.86028289794922,
+ "learning_rate": 3.407619047619048e-05,
+ "loss": 0.2431,
+ "step": 3383
+ },
+ {
+ "epoch": 19.337142857142858,
+ "grad_norm": 70.57789611816406,
+ "learning_rate": 3.4069841269841274e-05,
+ "loss": 0.6577,
+ "step": 3384
+ },
+ {
+ "epoch": 19.34285714285714,
+ "grad_norm": 49.527103424072266,
+ "learning_rate": 3.4063492063492067e-05,
+ "loss": 0.3687,
+ "step": 3385
+ },
+ {
+ "epoch": 19.34857142857143,
+ "grad_norm": 35.476287841796875,
+ "learning_rate": 3.405714285714286e-05,
+ "loss": 0.3299,
+ "step": 3386
+ },
+ {
+ "epoch": 19.354285714285716,
+ "grad_norm": 43.367225646972656,
+ "learning_rate": 3.405079365079365e-05,
+ "loss": 0.2333,
+ "step": 3387
+ },
+ {
+ "epoch": 19.36,
+ "grad_norm": 30.95724868774414,
+ "learning_rate": 3.4044444444444445e-05,
+ "loss": 0.2513,
+ "step": 3388
+ },
+ {
+ "epoch": 19.365714285714287,
+ "grad_norm": 127.33224487304688,
+ "learning_rate": 3.4038095238095244e-05,
+ "loss": 0.2649,
+ "step": 3389
+ },
+ {
+ "epoch": 19.37142857142857,
+ "grad_norm": 41.26674270629883,
+ "learning_rate": 3.403174603174603e-05,
+ "loss": 0.4281,
+ "step": 3390
+ },
+ {
+ "epoch": 19.377142857142857,
+ "grad_norm": 386.9308166503906,
+ "learning_rate": 3.402539682539683e-05,
+ "loss": 0.2776,
+ "step": 3391
+ },
+ {
+ "epoch": 19.382857142857144,
+ "grad_norm": 24.09364891052246,
+ "learning_rate": 3.401904761904762e-05,
+ "loss": 0.3776,
+ "step": 3392
+ },
+ {
+ "epoch": 19.388571428571428,
+ "grad_norm": 61.508121490478516,
+ "learning_rate": 3.4012698412698414e-05,
+ "loss": 0.3687,
+ "step": 3393
+ },
+ {
+ "epoch": 19.394285714285715,
+ "grad_norm": 96.15426635742188,
+ "learning_rate": 3.400634920634921e-05,
+ "loss": 0.3404,
+ "step": 3394
+ },
+ {
+ "epoch": 19.4,
+ "grad_norm": 83.6374740600586,
+ "learning_rate": 3.4000000000000007e-05,
+ "loss": 0.3019,
+ "step": 3395
+ },
+ {
+ "epoch": 19.405714285714286,
+ "grad_norm": 55.773799896240234,
+ "learning_rate": 3.399365079365079e-05,
+ "loss": 0.3834,
+ "step": 3396
+ },
+ {
+ "epoch": 19.411428571428573,
+ "grad_norm": 68.75739288330078,
+ "learning_rate": 3.398730158730159e-05,
+ "loss": 0.3088,
+ "step": 3397
+ },
+ {
+ "epoch": 19.417142857142856,
+ "grad_norm": 148.99221801757812,
+ "learning_rate": 3.398095238095238e-05,
+ "loss": 0.3929,
+ "step": 3398
+ },
+ {
+ "epoch": 19.422857142857143,
+ "grad_norm": 106.68118286132812,
+ "learning_rate": 3.397460317460318e-05,
+ "loss": 0.3633,
+ "step": 3399
+ },
+ {
+ "epoch": 19.428571428571427,
+ "grad_norm": 122.96265411376953,
+ "learning_rate": 3.396825396825397e-05,
+ "loss": 0.2715,
+ "step": 3400
+ },
+ {
+ "epoch": 19.434285714285714,
+ "grad_norm": 187.85357666015625,
+ "learning_rate": 3.396190476190476e-05,
+ "loss": 0.4533,
+ "step": 3401
+ },
+ {
+ "epoch": 19.44,
+ "grad_norm": 48.39687728881836,
+ "learning_rate": 3.3955555555555555e-05,
+ "loss": 0.4207,
+ "step": 3402
+ },
+ {
+ "epoch": 19.445714285714285,
+ "grad_norm": 25.29762840270996,
+ "learning_rate": 3.3949206349206354e-05,
+ "loss": 0.344,
+ "step": 3403
+ },
+ {
+ "epoch": 19.451428571428572,
+ "grad_norm": 45.91965866088867,
+ "learning_rate": 3.394285714285714e-05,
+ "loss": 0.4517,
+ "step": 3404
+ },
+ {
+ "epoch": 19.457142857142856,
+ "grad_norm": 549.9448852539062,
+ "learning_rate": 3.393650793650794e-05,
+ "loss": 0.5016,
+ "step": 3405
+ },
+ {
+ "epoch": 19.462857142857143,
+ "grad_norm": 304.6712646484375,
+ "learning_rate": 3.393015873015873e-05,
+ "loss": 0.3779,
+ "step": 3406
+ },
+ {
+ "epoch": 19.46857142857143,
+ "grad_norm": 56.09303665161133,
+ "learning_rate": 3.3923809523809525e-05,
+ "loss": 0.4077,
+ "step": 3407
+ },
+ {
+ "epoch": 19.474285714285713,
+ "grad_norm": 45.298744201660156,
+ "learning_rate": 3.391746031746032e-05,
+ "loss": 0.406,
+ "step": 3408
+ },
+ {
+ "epoch": 19.48,
+ "grad_norm": 60.030433654785156,
+ "learning_rate": 3.391111111111111e-05,
+ "loss": 0.321,
+ "step": 3409
+ },
+ {
+ "epoch": 19.485714285714284,
+ "grad_norm": 71.22777557373047,
+ "learning_rate": 3.39047619047619e-05,
+ "loss": 0.5016,
+ "step": 3410
+ },
+ {
+ "epoch": 19.49142857142857,
+ "grad_norm": 81.2491226196289,
+ "learning_rate": 3.38984126984127e-05,
+ "loss": 0.4921,
+ "step": 3411
+ },
+ {
+ "epoch": 19.497142857142858,
+ "grad_norm": 27.062021255493164,
+ "learning_rate": 3.3892063492063495e-05,
+ "loss": 0.3775,
+ "step": 3412
+ },
+ {
+ "epoch": 19.502857142857142,
+ "grad_norm": 17.73770523071289,
+ "learning_rate": 3.388571428571429e-05,
+ "loss": 0.4258,
+ "step": 3413
+ },
+ {
+ "epoch": 19.50857142857143,
+ "grad_norm": 26.88576316833496,
+ "learning_rate": 3.387936507936508e-05,
+ "loss": 0.452,
+ "step": 3414
+ },
+ {
+ "epoch": 19.514285714285712,
+ "grad_norm": 104.78714752197266,
+ "learning_rate": 3.387301587301587e-05,
+ "loss": 0.4192,
+ "step": 3415
+ },
+ {
+ "epoch": 19.52,
+ "grad_norm": 40.716007232666016,
+ "learning_rate": 3.3866666666666665e-05,
+ "loss": 0.3494,
+ "step": 3416
+ },
+ {
+ "epoch": 19.525714285714287,
+ "grad_norm": 38.237709045410156,
+ "learning_rate": 3.3860317460317465e-05,
+ "loss": 0.3199,
+ "step": 3417
+ },
+ {
+ "epoch": 19.53142857142857,
+ "grad_norm": 29.721233367919922,
+ "learning_rate": 3.385396825396826e-05,
+ "loss": 0.3223,
+ "step": 3418
+ },
+ {
+ "epoch": 19.537142857142857,
+ "grad_norm": 73.37924194335938,
+ "learning_rate": 3.384761904761905e-05,
+ "loss": 0.3554,
+ "step": 3419
+ },
+ {
+ "epoch": 19.542857142857144,
+ "grad_norm": 68.08438110351562,
+ "learning_rate": 3.384126984126984e-05,
+ "loss": 0.3283,
+ "step": 3420
+ },
+ {
+ "epoch": 19.548571428571428,
+ "grad_norm": 197.18992614746094,
+ "learning_rate": 3.3834920634920635e-05,
+ "loss": 0.3387,
+ "step": 3421
+ },
+ {
+ "epoch": 19.554285714285715,
+ "grad_norm": 85.63236999511719,
+ "learning_rate": 3.3828571428571435e-05,
+ "loss": 0.3336,
+ "step": 3422
+ },
+ {
+ "epoch": 19.56,
+ "grad_norm": 62.67841720581055,
+ "learning_rate": 3.382222222222222e-05,
+ "loss": 0.4093,
+ "step": 3423
+ },
+ {
+ "epoch": 19.565714285714286,
+ "grad_norm": 73.47346496582031,
+ "learning_rate": 3.381587301587302e-05,
+ "loss": 0.3136,
+ "step": 3424
+ },
+ {
+ "epoch": 19.571428571428573,
+ "grad_norm": 73.36341857910156,
+ "learning_rate": 3.380952380952381e-05,
+ "loss": 0.393,
+ "step": 3425
+ },
+ {
+ "epoch": 19.577142857142857,
+ "grad_norm": 29.94477653503418,
+ "learning_rate": 3.3803174603174605e-05,
+ "loss": 0.3345,
+ "step": 3426
+ },
+ {
+ "epoch": 19.582857142857144,
+ "grad_norm": 35.631168365478516,
+ "learning_rate": 3.37968253968254e-05,
+ "loss": 0.3381,
+ "step": 3427
+ },
+ {
+ "epoch": 19.588571428571427,
+ "grad_norm": 38.06371307373047,
+ "learning_rate": 3.37904761904762e-05,
+ "loss": 0.3678,
+ "step": 3428
+ },
+ {
+ "epoch": 19.594285714285714,
+ "grad_norm": 62.71897506713867,
+ "learning_rate": 3.378412698412698e-05,
+ "loss": 0.5052,
+ "step": 3429
+ },
+ {
+ "epoch": 19.6,
+ "grad_norm": 340.963623046875,
+ "learning_rate": 3.377777777777778e-05,
+ "loss": 0.3526,
+ "step": 3430
+ },
+ {
+ "epoch": 19.605714285714285,
+ "grad_norm": 93.26659393310547,
+ "learning_rate": 3.377142857142857e-05,
+ "loss": 0.5047,
+ "step": 3431
+ },
+ {
+ "epoch": 19.611428571428572,
+ "grad_norm": 30.196279525756836,
+ "learning_rate": 3.376507936507937e-05,
+ "loss": 0.4641,
+ "step": 3432
+ },
+ {
+ "epoch": 19.617142857142856,
+ "grad_norm": 97.52349853515625,
+ "learning_rate": 3.375873015873016e-05,
+ "loss": 0.4104,
+ "step": 3433
+ },
+ {
+ "epoch": 19.622857142857143,
+ "grad_norm": 67.79190063476562,
+ "learning_rate": 3.375238095238095e-05,
+ "loss": 0.5344,
+ "step": 3434
+ },
+ {
+ "epoch": 19.62857142857143,
+ "grad_norm": 71.02275848388672,
+ "learning_rate": 3.3746031746031746e-05,
+ "loss": 0.3563,
+ "step": 3435
+ },
+ {
+ "epoch": 19.634285714285713,
+ "grad_norm": 60.292869567871094,
+ "learning_rate": 3.3739682539682545e-05,
+ "loss": 0.3805,
+ "step": 3436
+ },
+ {
+ "epoch": 19.64,
+ "grad_norm": 24.881738662719727,
+ "learning_rate": 3.373333333333333e-05,
+ "loss": 0.5486,
+ "step": 3437
+ },
+ {
+ "epoch": 19.645714285714284,
+ "grad_norm": 72.77564239501953,
+ "learning_rate": 3.372698412698413e-05,
+ "loss": 0.3257,
+ "step": 3438
+ },
+ {
+ "epoch": 19.65142857142857,
+ "grad_norm": 31.99180030822754,
+ "learning_rate": 3.372063492063492e-05,
+ "loss": 0.2574,
+ "step": 3439
+ },
+ {
+ "epoch": 19.65714285714286,
+ "grad_norm": 23.13508415222168,
+ "learning_rate": 3.3714285714285716e-05,
+ "loss": 0.2581,
+ "step": 3440
+ },
+ {
+ "epoch": 19.662857142857142,
+ "grad_norm": 41.29374313354492,
+ "learning_rate": 3.370793650793651e-05,
+ "loss": 0.3201,
+ "step": 3441
+ },
+ {
+ "epoch": 19.66857142857143,
+ "grad_norm": 204.74082946777344,
+ "learning_rate": 3.37015873015873e-05,
+ "loss": 0.2919,
+ "step": 3442
+ },
+ {
+ "epoch": 19.674285714285713,
+ "grad_norm": 77.87431335449219,
+ "learning_rate": 3.3695238095238094e-05,
+ "loss": 0.3872,
+ "step": 3443
+ },
+ {
+ "epoch": 19.68,
+ "grad_norm": 34.35457229614258,
+ "learning_rate": 3.368888888888889e-05,
+ "loss": 0.2946,
+ "step": 3444
+ },
+ {
+ "epoch": 19.685714285714287,
+ "grad_norm": 53.063560485839844,
+ "learning_rate": 3.368253968253968e-05,
+ "loss": 0.3896,
+ "step": 3445
+ },
+ {
+ "epoch": 19.69142857142857,
+ "grad_norm": 17.917118072509766,
+ "learning_rate": 3.367619047619048e-05,
+ "loss": 0.339,
+ "step": 3446
+ },
+ {
+ "epoch": 19.697142857142858,
+ "grad_norm": 62.1878776550293,
+ "learning_rate": 3.366984126984127e-05,
+ "loss": 0.2799,
+ "step": 3447
+ },
+ {
+ "epoch": 19.70285714285714,
+ "grad_norm": 33.710147857666016,
+ "learning_rate": 3.3663492063492064e-05,
+ "loss": 0.2277,
+ "step": 3448
+ },
+ {
+ "epoch": 19.708571428571428,
+ "grad_norm": 76.45654296875,
+ "learning_rate": 3.3657142857142856e-05,
+ "loss": 0.3065,
+ "step": 3449
+ },
+ {
+ "epoch": 19.714285714285715,
+ "grad_norm": 22.282197952270508,
+ "learning_rate": 3.3650793650793656e-05,
+ "loss": 0.3891,
+ "step": 3450
+ },
+ {
+ "epoch": 19.72,
+ "grad_norm": 844.6537475585938,
+ "learning_rate": 3.364444444444445e-05,
+ "loss": 0.4347,
+ "step": 3451
+ },
+ {
+ "epoch": 19.725714285714286,
+ "grad_norm": 37.986270904541016,
+ "learning_rate": 3.363809523809524e-05,
+ "loss": 0.3042,
+ "step": 3452
+ },
+ {
+ "epoch": 19.731428571428573,
+ "grad_norm": 47.54478073120117,
+ "learning_rate": 3.3631746031746034e-05,
+ "loss": 0.2708,
+ "step": 3453
+ },
+ {
+ "epoch": 19.737142857142857,
+ "grad_norm": 21.841947555541992,
+ "learning_rate": 3.3625396825396826e-05,
+ "loss": 0.2471,
+ "step": 3454
+ },
+ {
+ "epoch": 19.742857142857144,
+ "grad_norm": 120.77538299560547,
+ "learning_rate": 3.361904761904762e-05,
+ "loss": 0.3965,
+ "step": 3455
+ },
+ {
+ "epoch": 19.748571428571427,
+ "grad_norm": 102.79607391357422,
+ "learning_rate": 3.361269841269841e-05,
+ "loss": 0.3502,
+ "step": 3456
+ },
+ {
+ "epoch": 19.754285714285714,
+ "grad_norm": 91.01953125,
+ "learning_rate": 3.360634920634921e-05,
+ "loss": 0.2933,
+ "step": 3457
+ },
+ {
+ "epoch": 19.76,
+ "grad_norm": 192.01560974121094,
+ "learning_rate": 3.3600000000000004e-05,
+ "loss": 0.4807,
+ "step": 3458
+ },
+ {
+ "epoch": 19.765714285714285,
+ "grad_norm": 32.285369873046875,
+ "learning_rate": 3.3593650793650796e-05,
+ "loss": 0.3571,
+ "step": 3459
+ },
+ {
+ "epoch": 19.771428571428572,
+ "grad_norm": 89.04906463623047,
+ "learning_rate": 3.358730158730159e-05,
+ "loss": 0.3545,
+ "step": 3460
+ },
+ {
+ "epoch": 19.777142857142856,
+ "grad_norm": 22.91415786743164,
+ "learning_rate": 3.358095238095239e-05,
+ "loss": 0.3823,
+ "step": 3461
+ },
+ {
+ "epoch": 19.782857142857143,
+ "grad_norm": 77.87922668457031,
+ "learning_rate": 3.3574603174603174e-05,
+ "loss": 0.3593,
+ "step": 3462
+ },
+ {
+ "epoch": 19.78857142857143,
+ "grad_norm": 21.24201774597168,
+ "learning_rate": 3.3568253968253974e-05,
+ "loss": 0.4239,
+ "step": 3463
+ },
+ {
+ "epoch": 19.794285714285714,
+ "grad_norm": 48.2424430847168,
+ "learning_rate": 3.356190476190476e-05,
+ "loss": 0.3687,
+ "step": 3464
+ },
+ {
+ "epoch": 19.8,
+ "grad_norm": 47.62773895263672,
+ "learning_rate": 3.355555555555556e-05,
+ "loss": 0.348,
+ "step": 3465
+ },
+ {
+ "epoch": 19.805714285714284,
+ "grad_norm": 71.29972076416016,
+ "learning_rate": 3.354920634920635e-05,
+ "loss": 0.4168,
+ "step": 3466
+ },
+ {
+ "epoch": 19.81142857142857,
+ "grad_norm": 56.72834396362305,
+ "learning_rate": 3.3542857142857144e-05,
+ "loss": 0.2963,
+ "step": 3467
+ },
+ {
+ "epoch": 19.81714285714286,
+ "grad_norm": 41.31034469604492,
+ "learning_rate": 3.353650793650794e-05,
+ "loss": 0.3007,
+ "step": 3468
+ },
+ {
+ "epoch": 19.822857142857142,
+ "grad_norm": 90.26205444335938,
+ "learning_rate": 3.3530158730158736e-05,
+ "loss": 0.2514,
+ "step": 3469
+ },
+ {
+ "epoch": 19.82857142857143,
+ "grad_norm": 47.04402542114258,
+ "learning_rate": 3.352380952380952e-05,
+ "loss": 0.5244,
+ "step": 3470
+ },
+ {
+ "epoch": 19.834285714285713,
+ "grad_norm": 37.07173156738281,
+ "learning_rate": 3.351746031746032e-05,
+ "loss": 0.3592,
+ "step": 3471
+ },
+ {
+ "epoch": 19.84,
+ "grad_norm": 28.39764404296875,
+ "learning_rate": 3.3511111111111114e-05,
+ "loss": 0.361,
+ "step": 3472
+ },
+ {
+ "epoch": 19.845714285714287,
+ "grad_norm": 73.49800109863281,
+ "learning_rate": 3.350476190476191e-05,
+ "loss": 0.4602,
+ "step": 3473
+ },
+ {
+ "epoch": 19.85142857142857,
+ "grad_norm": 40.269344329833984,
+ "learning_rate": 3.34984126984127e-05,
+ "loss": 0.3875,
+ "step": 3474
+ },
+ {
+ "epoch": 19.857142857142858,
+ "grad_norm": 42.47099685668945,
+ "learning_rate": 3.349206349206349e-05,
+ "loss": 0.4689,
+ "step": 3475
+ },
+ {
+ "epoch": 19.86285714285714,
+ "grad_norm": 35.60416030883789,
+ "learning_rate": 3.3485714285714285e-05,
+ "loss": 0.3837,
+ "step": 3476
+ },
+ {
+ "epoch": 19.86857142857143,
+ "grad_norm": 49.231346130371094,
+ "learning_rate": 3.3479365079365084e-05,
+ "loss": 0.4882,
+ "step": 3477
+ },
+ {
+ "epoch": 19.874285714285715,
+ "grad_norm": 25.547861099243164,
+ "learning_rate": 3.347301587301587e-05,
+ "loss": 0.5016,
+ "step": 3478
+ },
+ {
+ "epoch": 19.88,
+ "grad_norm": 40.764549255371094,
+ "learning_rate": 3.346666666666667e-05,
+ "loss": 0.4769,
+ "step": 3479
+ },
+ {
+ "epoch": 19.885714285714286,
+ "grad_norm": 562.2260131835938,
+ "learning_rate": 3.346031746031746e-05,
+ "loss": 0.3894,
+ "step": 3480
+ },
+ {
+ "epoch": 19.89142857142857,
+ "grad_norm": 37.60805130004883,
+ "learning_rate": 3.3453968253968255e-05,
+ "loss": 0.3554,
+ "step": 3481
+ },
+ {
+ "epoch": 19.897142857142857,
+ "grad_norm": 42.82327651977539,
+ "learning_rate": 3.344761904761905e-05,
+ "loss": 0.3977,
+ "step": 3482
+ },
+ {
+ "epoch": 19.902857142857144,
+ "grad_norm": 35.4509162902832,
+ "learning_rate": 3.3441269841269847e-05,
+ "loss": 0.4073,
+ "step": 3483
+ },
+ {
+ "epoch": 19.908571428571427,
+ "grad_norm": 48.23896408081055,
+ "learning_rate": 3.343492063492063e-05,
+ "loss": 0.2832,
+ "step": 3484
+ },
+ {
+ "epoch": 19.914285714285715,
+ "grad_norm": 573.278564453125,
+ "learning_rate": 3.342857142857143e-05,
+ "loss": 0.5797,
+ "step": 3485
+ },
+ {
+ "epoch": 19.92,
+ "grad_norm": 224.73423767089844,
+ "learning_rate": 3.3422222222222224e-05,
+ "loss": 0.4679,
+ "step": 3486
+ },
+ {
+ "epoch": 19.925714285714285,
+ "grad_norm": 257.82159423828125,
+ "learning_rate": 3.341587301587302e-05,
+ "loss": 0.4137,
+ "step": 3487
+ },
+ {
+ "epoch": 19.931428571428572,
+ "grad_norm": 37.30079650878906,
+ "learning_rate": 3.340952380952381e-05,
+ "loss": 0.4874,
+ "step": 3488
+ },
+ {
+ "epoch": 19.937142857142856,
+ "grad_norm": 41.37504959106445,
+ "learning_rate": 3.34031746031746e-05,
+ "loss": 0.5663,
+ "step": 3489
+ },
+ {
+ "epoch": 19.942857142857143,
+ "grad_norm": 85.4715805053711,
+ "learning_rate": 3.33968253968254e-05,
+ "loss": 0.574,
+ "step": 3490
+ },
+ {
+ "epoch": 19.94857142857143,
+ "grad_norm": 270.5368347167969,
+ "learning_rate": 3.3390476190476194e-05,
+ "loss": 0.5881,
+ "step": 3491
+ },
+ {
+ "epoch": 19.954285714285714,
+ "grad_norm": 35.138389587402344,
+ "learning_rate": 3.338412698412699e-05,
+ "loss": 0.4335,
+ "step": 3492
+ },
+ {
+ "epoch": 19.96,
+ "grad_norm": 44.54415512084961,
+ "learning_rate": 3.337777777777778e-05,
+ "loss": 0.3731,
+ "step": 3493
+ },
+ {
+ "epoch": 19.965714285714284,
+ "grad_norm": 46.49395751953125,
+ "learning_rate": 3.337142857142857e-05,
+ "loss": 0.3291,
+ "step": 3494
+ },
+ {
+ "epoch": 19.97142857142857,
+ "grad_norm": 24.26507568359375,
+ "learning_rate": 3.3365079365079365e-05,
+ "loss": 0.3144,
+ "step": 3495
+ },
+ {
+ "epoch": 19.97714285714286,
+ "grad_norm": 38.6696891784668,
+ "learning_rate": 3.3358730158730164e-05,
+ "loss": 0.4773,
+ "step": 3496
+ },
+ {
+ "epoch": 19.982857142857142,
+ "grad_norm": 501.0118408203125,
+ "learning_rate": 3.335238095238095e-05,
+ "loss": 0.5959,
+ "step": 3497
+ },
+ {
+ "epoch": 19.98857142857143,
+ "grad_norm": 70.6850357055664,
+ "learning_rate": 3.334603174603175e-05,
+ "loss": 0.4196,
+ "step": 3498
+ },
+ {
+ "epoch": 19.994285714285713,
+ "grad_norm": 34.70767593383789,
+ "learning_rate": 3.333968253968254e-05,
+ "loss": 0.4273,
+ "step": 3499
+ },
+ {
+ "epoch": 20.0,
+ "grad_norm": 126.21673583984375,
+ "learning_rate": 3.3333333333333335e-05,
+ "loss": 0.5125,
+ "step": 3500
+ },
+ {
+ "epoch": 20.0,
+ "eval_classes": 0,
+ "eval_loss": 0.8406780958175659,
+ "eval_map": 0.8424,
+ "eval_map_50": 0.9321,
+ "eval_map_75": 0.8951,
+ "eval_map_large": 0.8426,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.8424,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7362,
+ "eval_mar_10": 0.9371,
+ "eval_mar_100": 0.947,
+ "eval_mar_100_per_class": 0.947,
+ "eval_mar_large": 0.947,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 14.1918,
+ "eval_samples_per_second": 20.716,
+ "eval_steps_per_second": 2.607,
+ "step": 3500
+ },
+ {
+ "epoch": 20.005714285714287,
+ "grad_norm": 20.92926597595215,
+ "learning_rate": 3.332698412698413e-05,
+ "loss": 0.3793,
+ "step": 3501
+ },
+ {
+ "epoch": 20.01142857142857,
+ "grad_norm": 36.212100982666016,
+ "learning_rate": 3.332063492063493e-05,
+ "loss": 0.4771,
+ "step": 3502
+ },
+ {
+ "epoch": 20.017142857142858,
+ "grad_norm": 28.421642303466797,
+ "learning_rate": 3.331428571428571e-05,
+ "loss": 0.414,
+ "step": 3503
+ },
+ {
+ "epoch": 20.02285714285714,
+ "grad_norm": 76.52762603759766,
+ "learning_rate": 3.330793650793651e-05,
+ "loss": 0.3281,
+ "step": 3504
+ },
+ {
+ "epoch": 20.02857142857143,
+ "grad_norm": 103.1466064453125,
+ "learning_rate": 3.3301587301587305e-05,
+ "loss": 0.3919,
+ "step": 3505
+ },
+ {
+ "epoch": 20.034285714285716,
+ "grad_norm": 61.027793884277344,
+ "learning_rate": 3.32952380952381e-05,
+ "loss": 0.3695,
+ "step": 3506
+ },
+ {
+ "epoch": 20.04,
+ "grad_norm": 64.57794952392578,
+ "learning_rate": 3.328888888888889e-05,
+ "loss": 0.4059,
+ "step": 3507
+ },
+ {
+ "epoch": 20.045714285714286,
+ "grad_norm": 40.50300216674805,
+ "learning_rate": 3.328253968253968e-05,
+ "loss": 0.5027,
+ "step": 3508
+ },
+ {
+ "epoch": 20.05142857142857,
+ "grad_norm": 76.50260925292969,
+ "learning_rate": 3.3276190476190475e-05,
+ "loss": 0.5772,
+ "step": 3509
+ },
+ {
+ "epoch": 20.057142857142857,
+ "grad_norm": 55.68210983276367,
+ "learning_rate": 3.3269841269841275e-05,
+ "loss": 0.4448,
+ "step": 3510
+ },
+ {
+ "epoch": 20.062857142857144,
+ "grad_norm": 65.80165100097656,
+ "learning_rate": 3.326349206349206e-05,
+ "loss": 0.574,
+ "step": 3511
+ },
+ {
+ "epoch": 20.068571428571428,
+ "grad_norm": 195.4805908203125,
+ "learning_rate": 3.325714285714286e-05,
+ "loss": 0.3547,
+ "step": 3512
+ },
+ {
+ "epoch": 20.074285714285715,
+ "grad_norm": 350.74420166015625,
+ "learning_rate": 3.325079365079365e-05,
+ "loss": 0.4308,
+ "step": 3513
+ },
+ {
+ "epoch": 20.08,
+ "grad_norm": 92.72909545898438,
+ "learning_rate": 3.3244444444444445e-05,
+ "loss": 0.3202,
+ "step": 3514
+ },
+ {
+ "epoch": 20.085714285714285,
+ "grad_norm": 121.07453155517578,
+ "learning_rate": 3.323809523809524e-05,
+ "loss": 0.4489,
+ "step": 3515
+ },
+ {
+ "epoch": 20.091428571428573,
+ "grad_norm": 29.92794418334961,
+ "learning_rate": 3.323174603174604e-05,
+ "loss": 0.3939,
+ "step": 3516
+ },
+ {
+ "epoch": 20.097142857142856,
+ "grad_norm": 49.57035446166992,
+ "learning_rate": 3.322539682539682e-05,
+ "loss": 0.2621,
+ "step": 3517
+ },
+ {
+ "epoch": 20.102857142857143,
+ "grad_norm": 69.45816802978516,
+ "learning_rate": 3.321904761904762e-05,
+ "loss": 0.238,
+ "step": 3518
+ },
+ {
+ "epoch": 20.10857142857143,
+ "grad_norm": 59.2504997253418,
+ "learning_rate": 3.3212698412698415e-05,
+ "loss": 0.5043,
+ "step": 3519
+ },
+ {
+ "epoch": 20.114285714285714,
+ "grad_norm": 49.68259811401367,
+ "learning_rate": 3.320634920634921e-05,
+ "loss": 0.2716,
+ "step": 3520
+ },
+ {
+ "epoch": 20.12,
+ "grad_norm": 28.603973388671875,
+ "learning_rate": 3.32e-05,
+ "loss": 0.4292,
+ "step": 3521
+ },
+ {
+ "epoch": 20.125714285714285,
+ "grad_norm": 34.90120315551758,
+ "learning_rate": 3.319365079365079e-05,
+ "loss": 0.5205,
+ "step": 3522
+ },
+ {
+ "epoch": 20.13142857142857,
+ "grad_norm": 67.77092742919922,
+ "learning_rate": 3.3187301587301586e-05,
+ "loss": 0.4614,
+ "step": 3523
+ },
+ {
+ "epoch": 20.13714285714286,
+ "grad_norm": 86.2787094116211,
+ "learning_rate": 3.3180952380952385e-05,
+ "loss": 0.4002,
+ "step": 3524
+ },
+ {
+ "epoch": 20.142857142857142,
+ "grad_norm": 59.58238983154297,
+ "learning_rate": 3.317460317460318e-05,
+ "loss": 0.2968,
+ "step": 3525
+ },
+ {
+ "epoch": 20.14857142857143,
+ "grad_norm": 53.9903678894043,
+ "learning_rate": 3.316825396825397e-05,
+ "loss": 0.4827,
+ "step": 3526
+ },
+ {
+ "epoch": 20.154285714285713,
+ "grad_norm": 47.19764709472656,
+ "learning_rate": 3.316190476190476e-05,
+ "loss": 0.3388,
+ "step": 3527
+ },
+ {
+ "epoch": 20.16,
+ "grad_norm": 87.91258239746094,
+ "learning_rate": 3.3155555555555556e-05,
+ "loss": 0.2736,
+ "step": 3528
+ },
+ {
+ "epoch": 20.165714285714287,
+ "grad_norm": 58.533050537109375,
+ "learning_rate": 3.3149206349206355e-05,
+ "loss": 0.3742,
+ "step": 3529
+ },
+ {
+ "epoch": 20.17142857142857,
+ "grad_norm": 42.35979461669922,
+ "learning_rate": 3.314285714285714e-05,
+ "loss": 0.3093,
+ "step": 3530
+ },
+ {
+ "epoch": 20.177142857142858,
+ "grad_norm": 49.007110595703125,
+ "learning_rate": 3.313650793650794e-05,
+ "loss": 0.2862,
+ "step": 3531
+ },
+ {
+ "epoch": 20.18285714285714,
+ "grad_norm": 78.78868103027344,
+ "learning_rate": 3.313015873015873e-05,
+ "loss": 0.2489,
+ "step": 3532
+ },
+ {
+ "epoch": 20.18857142857143,
+ "grad_norm": 116.97369384765625,
+ "learning_rate": 3.3123809523809526e-05,
+ "loss": 0.3767,
+ "step": 3533
+ },
+ {
+ "epoch": 20.194285714285716,
+ "grad_norm": 37.7362174987793,
+ "learning_rate": 3.311746031746032e-05,
+ "loss": 0.2782,
+ "step": 3534
+ },
+ {
+ "epoch": 20.2,
+ "grad_norm": 183.8468017578125,
+ "learning_rate": 3.311111111111112e-05,
+ "loss": 0.2742,
+ "step": 3535
+ },
+ {
+ "epoch": 20.205714285714286,
+ "grad_norm": 27.15769386291504,
+ "learning_rate": 3.3104761904761904e-05,
+ "loss": 0.2069,
+ "step": 3536
+ },
+ {
+ "epoch": 20.21142857142857,
+ "grad_norm": 43.55573272705078,
+ "learning_rate": 3.30984126984127e-05,
+ "loss": 0.3321,
+ "step": 3537
+ },
+ {
+ "epoch": 20.217142857142857,
+ "grad_norm": 36.51277542114258,
+ "learning_rate": 3.309206349206349e-05,
+ "loss": 0.4491,
+ "step": 3538
+ },
+ {
+ "epoch": 20.222857142857144,
+ "grad_norm": 30.36905860900879,
+ "learning_rate": 3.308571428571429e-05,
+ "loss": 0.5704,
+ "step": 3539
+ },
+ {
+ "epoch": 20.228571428571428,
+ "grad_norm": 296.42828369140625,
+ "learning_rate": 3.307936507936508e-05,
+ "loss": 0.377,
+ "step": 3540
+ },
+ {
+ "epoch": 20.234285714285715,
+ "grad_norm": 53.208351135253906,
+ "learning_rate": 3.3073015873015874e-05,
+ "loss": 0.4801,
+ "step": 3541
+ },
+ {
+ "epoch": 20.24,
+ "grad_norm": 112.80752563476562,
+ "learning_rate": 3.3066666666666666e-05,
+ "loss": 0.4057,
+ "step": 3542
+ },
+ {
+ "epoch": 20.245714285714286,
+ "grad_norm": 108.6169662475586,
+ "learning_rate": 3.3060317460317466e-05,
+ "loss": 0.5041,
+ "step": 3543
+ },
+ {
+ "epoch": 20.251428571428573,
+ "grad_norm": 59.439430236816406,
+ "learning_rate": 3.305396825396825e-05,
+ "loss": 0.3092,
+ "step": 3544
+ },
+ {
+ "epoch": 20.257142857142856,
+ "grad_norm": 191.1466064453125,
+ "learning_rate": 3.304761904761905e-05,
+ "loss": 0.4724,
+ "step": 3545
+ },
+ {
+ "epoch": 20.262857142857143,
+ "grad_norm": 22.837505340576172,
+ "learning_rate": 3.3041269841269844e-05,
+ "loss": 0.449,
+ "step": 3546
+ },
+ {
+ "epoch": 20.268571428571427,
+ "grad_norm": 44.4491081237793,
+ "learning_rate": 3.3034920634920636e-05,
+ "loss": 0.3667,
+ "step": 3547
+ },
+ {
+ "epoch": 20.274285714285714,
+ "grad_norm": 69.18882751464844,
+ "learning_rate": 3.302857142857143e-05,
+ "loss": 0.3382,
+ "step": 3548
+ },
+ {
+ "epoch": 20.28,
+ "grad_norm": 59.61344909667969,
+ "learning_rate": 3.302222222222222e-05,
+ "loss": 0.3483,
+ "step": 3549
+ },
+ {
+ "epoch": 20.285714285714285,
+ "grad_norm": 33.22392272949219,
+ "learning_rate": 3.3015873015873014e-05,
+ "loss": 0.353,
+ "step": 3550
+ },
+ {
+ "epoch": 20.291428571428572,
+ "grad_norm": 39.61313247680664,
+ "learning_rate": 3.3009523809523814e-05,
+ "loss": 0.4028,
+ "step": 3551
+ },
+ {
+ "epoch": 20.29714285714286,
+ "grad_norm": 124.15302276611328,
+ "learning_rate": 3.30031746031746e-05,
+ "loss": 0.5019,
+ "step": 3552
+ },
+ {
+ "epoch": 20.302857142857142,
+ "grad_norm": 39.78657150268555,
+ "learning_rate": 3.29968253968254e-05,
+ "loss": 0.3265,
+ "step": 3553
+ },
+ {
+ "epoch": 20.30857142857143,
+ "grad_norm": 60.90039825439453,
+ "learning_rate": 3.299047619047619e-05,
+ "loss": 0.2673,
+ "step": 3554
+ },
+ {
+ "epoch": 20.314285714285713,
+ "grad_norm": 41.07734680175781,
+ "learning_rate": 3.2984126984126984e-05,
+ "loss": 0.3912,
+ "step": 3555
+ },
+ {
+ "epoch": 20.32,
+ "grad_norm": 70.89137268066406,
+ "learning_rate": 3.297777777777778e-05,
+ "loss": 0.4581,
+ "step": 3556
+ },
+ {
+ "epoch": 20.325714285714287,
+ "grad_norm": 90.77945709228516,
+ "learning_rate": 3.2971428571428576e-05,
+ "loss": 0.616,
+ "step": 3557
+ },
+ {
+ "epoch": 20.33142857142857,
+ "grad_norm": 66.2533950805664,
+ "learning_rate": 3.296507936507937e-05,
+ "loss": 0.3016,
+ "step": 3558
+ },
+ {
+ "epoch": 20.337142857142858,
+ "grad_norm": 31.12895965576172,
+ "learning_rate": 3.295873015873016e-05,
+ "loss": 0.253,
+ "step": 3559
+ },
+ {
+ "epoch": 20.34285714285714,
+ "grad_norm": 53.287845611572266,
+ "learning_rate": 3.2952380952380954e-05,
+ "loss": 0.3133,
+ "step": 3560
+ },
+ {
+ "epoch": 20.34857142857143,
+ "grad_norm": 27.55655860900879,
+ "learning_rate": 3.294603174603175e-05,
+ "loss": 0.3104,
+ "step": 3561
+ },
+ {
+ "epoch": 20.354285714285716,
+ "grad_norm": 21.481998443603516,
+ "learning_rate": 3.293968253968254e-05,
+ "loss": 0.3095,
+ "step": 3562
+ },
+ {
+ "epoch": 20.36,
+ "grad_norm": 43.74786376953125,
+ "learning_rate": 3.293333333333333e-05,
+ "loss": 0.3787,
+ "step": 3563
+ },
+ {
+ "epoch": 20.365714285714287,
+ "grad_norm": 21.40505027770996,
+ "learning_rate": 3.292698412698413e-05,
+ "loss": 0.3613,
+ "step": 3564
+ },
+ {
+ "epoch": 20.37142857142857,
+ "grad_norm": 86.09355163574219,
+ "learning_rate": 3.2920634920634924e-05,
+ "loss": 0.4475,
+ "step": 3565
+ },
+ {
+ "epoch": 20.377142857142857,
+ "grad_norm": 38.143455505371094,
+ "learning_rate": 3.291428571428572e-05,
+ "loss": 0.427,
+ "step": 3566
+ },
+ {
+ "epoch": 20.382857142857144,
+ "grad_norm": 30.078758239746094,
+ "learning_rate": 3.290793650793651e-05,
+ "loss": 0.4033,
+ "step": 3567
+ },
+ {
+ "epoch": 20.388571428571428,
+ "grad_norm": 61.135982513427734,
+ "learning_rate": 3.290158730158731e-05,
+ "loss": 0.5424,
+ "step": 3568
+ },
+ {
+ "epoch": 20.394285714285715,
+ "grad_norm": 80.14017486572266,
+ "learning_rate": 3.2895238095238095e-05,
+ "loss": 0.3496,
+ "step": 3569
+ },
+ {
+ "epoch": 20.4,
+ "grad_norm": 52.26523208618164,
+ "learning_rate": 3.2888888888888894e-05,
+ "loss": 0.43,
+ "step": 3570
+ },
+ {
+ "epoch": 20.405714285714286,
+ "grad_norm": 41.19422149658203,
+ "learning_rate": 3.288253968253968e-05,
+ "loss": 0.4903,
+ "step": 3571
+ },
+ {
+ "epoch": 20.411428571428573,
+ "grad_norm": 33.27156448364258,
+ "learning_rate": 3.287619047619048e-05,
+ "loss": 0.4012,
+ "step": 3572
+ },
+ {
+ "epoch": 20.417142857142856,
+ "grad_norm": 35.2028694152832,
+ "learning_rate": 3.286984126984127e-05,
+ "loss": 0.327,
+ "step": 3573
+ },
+ {
+ "epoch": 20.422857142857143,
+ "grad_norm": 60.655948638916016,
+ "learning_rate": 3.2863492063492065e-05,
+ "loss": 0.3294,
+ "step": 3574
+ },
+ {
+ "epoch": 20.428571428571427,
+ "grad_norm": 74.69729614257812,
+ "learning_rate": 3.285714285714286e-05,
+ "loss": 0.2993,
+ "step": 3575
+ },
+ {
+ "epoch": 20.434285714285714,
+ "grad_norm": 61.959712982177734,
+ "learning_rate": 3.2850793650793657e-05,
+ "loss": 0.2402,
+ "step": 3576
+ },
+ {
+ "epoch": 20.44,
+ "grad_norm": 49.8610954284668,
+ "learning_rate": 3.284444444444444e-05,
+ "loss": 0.4154,
+ "step": 3577
+ },
+ {
+ "epoch": 20.445714285714285,
+ "grad_norm": 75.74891662597656,
+ "learning_rate": 3.283809523809524e-05,
+ "loss": 0.4226,
+ "step": 3578
+ },
+ {
+ "epoch": 20.451428571428572,
+ "grad_norm": 27.832401275634766,
+ "learning_rate": 3.2831746031746034e-05,
+ "loss": 0.3972,
+ "step": 3579
+ },
+ {
+ "epoch": 20.457142857142856,
+ "grad_norm": 21.77943229675293,
+ "learning_rate": 3.282539682539683e-05,
+ "loss": 0.4014,
+ "step": 3580
+ },
+ {
+ "epoch": 20.462857142857143,
+ "grad_norm": 37.12984085083008,
+ "learning_rate": 3.281904761904762e-05,
+ "loss": 0.2845,
+ "step": 3581
+ },
+ {
+ "epoch": 20.46857142857143,
+ "grad_norm": 65.29959869384766,
+ "learning_rate": 3.281269841269841e-05,
+ "loss": 0.3069,
+ "step": 3582
+ },
+ {
+ "epoch": 20.474285714285713,
+ "grad_norm": 54.41910171508789,
+ "learning_rate": 3.2806349206349205e-05,
+ "loss": 0.3176,
+ "step": 3583
+ },
+ {
+ "epoch": 20.48,
+ "grad_norm": 59.21097946166992,
+ "learning_rate": 3.2800000000000004e-05,
+ "loss": 0.3664,
+ "step": 3584
+ },
+ {
+ "epoch": 20.485714285714284,
+ "grad_norm": 42.1156005859375,
+ "learning_rate": 3.279365079365079e-05,
+ "loss": 0.4677,
+ "step": 3585
+ },
+ {
+ "epoch": 20.49142857142857,
+ "grad_norm": 37.332820892333984,
+ "learning_rate": 3.278730158730159e-05,
+ "loss": 0.4599,
+ "step": 3586
+ },
+ {
+ "epoch": 20.497142857142858,
+ "grad_norm": 47.108253479003906,
+ "learning_rate": 3.278095238095238e-05,
+ "loss": 0.4497,
+ "step": 3587
+ },
+ {
+ "epoch": 20.502857142857142,
+ "grad_norm": 30.58877944946289,
+ "learning_rate": 3.2774603174603175e-05,
+ "loss": 0.3067,
+ "step": 3588
+ },
+ {
+ "epoch": 20.50857142857143,
+ "grad_norm": 38.203102111816406,
+ "learning_rate": 3.276825396825397e-05,
+ "loss": 0.3709,
+ "step": 3589
+ },
+ {
+ "epoch": 20.514285714285712,
+ "grad_norm": 30.350473403930664,
+ "learning_rate": 3.276190476190477e-05,
+ "loss": 0.336,
+ "step": 3590
+ },
+ {
+ "epoch": 20.52,
+ "grad_norm": 54.0617561340332,
+ "learning_rate": 3.275555555555555e-05,
+ "loss": 0.2258,
+ "step": 3591
+ },
+ {
+ "epoch": 20.525714285714287,
+ "grad_norm": 451.57080078125,
+ "learning_rate": 3.274920634920635e-05,
+ "loss": 0.3309,
+ "step": 3592
+ },
+ {
+ "epoch": 20.53142857142857,
+ "grad_norm": 54.708030700683594,
+ "learning_rate": 3.2742857142857145e-05,
+ "loss": 0.3262,
+ "step": 3593
+ },
+ {
+ "epoch": 20.537142857142857,
+ "grad_norm": 38.319000244140625,
+ "learning_rate": 3.273650793650794e-05,
+ "loss": 0.2958,
+ "step": 3594
+ },
+ {
+ "epoch": 20.542857142857144,
+ "grad_norm": 52.20222473144531,
+ "learning_rate": 3.273015873015873e-05,
+ "loss": 0.3768,
+ "step": 3595
+ },
+ {
+ "epoch": 20.548571428571428,
+ "grad_norm": 24.321361541748047,
+ "learning_rate": 3.272380952380952e-05,
+ "loss": 0.4274,
+ "step": 3596
+ },
+ {
+ "epoch": 20.554285714285715,
+ "grad_norm": 38.93540573120117,
+ "learning_rate": 3.271746031746032e-05,
+ "loss": 0.3738,
+ "step": 3597
+ },
+ {
+ "epoch": 20.56,
+ "grad_norm": 31.37497329711914,
+ "learning_rate": 3.2711111111111115e-05,
+ "loss": 0.308,
+ "step": 3598
+ },
+ {
+ "epoch": 20.565714285714286,
+ "grad_norm": 62.128143310546875,
+ "learning_rate": 3.270476190476191e-05,
+ "loss": 0.243,
+ "step": 3599
+ },
+ {
+ "epoch": 20.571428571428573,
+ "grad_norm": 88.3516845703125,
+ "learning_rate": 3.26984126984127e-05,
+ "loss": 0.2807,
+ "step": 3600
+ },
+ {
+ "epoch": 20.577142857142857,
+ "grad_norm": 34.790374755859375,
+ "learning_rate": 3.269206349206349e-05,
+ "loss": 0.2898,
+ "step": 3601
+ },
+ {
+ "epoch": 20.582857142857144,
+ "grad_norm": 114.55731964111328,
+ "learning_rate": 3.2685714285714285e-05,
+ "loss": 0.4922,
+ "step": 3602
+ },
+ {
+ "epoch": 20.588571428571427,
+ "grad_norm": 92.77104949951172,
+ "learning_rate": 3.2679365079365085e-05,
+ "loss": 0.425,
+ "step": 3603
+ },
+ {
+ "epoch": 20.594285714285714,
+ "grad_norm": 26.583147048950195,
+ "learning_rate": 3.267301587301587e-05,
+ "loss": 0.4188,
+ "step": 3604
+ },
+ {
+ "epoch": 20.6,
+ "grad_norm": 28.349626541137695,
+ "learning_rate": 3.266666666666667e-05,
+ "loss": 0.3628,
+ "step": 3605
+ },
+ {
+ "epoch": 20.605714285714285,
+ "grad_norm": 62.43906021118164,
+ "learning_rate": 3.266031746031746e-05,
+ "loss": 0.3274,
+ "step": 3606
+ },
+ {
+ "epoch": 20.611428571428572,
+ "grad_norm": 32.20339584350586,
+ "learning_rate": 3.2653968253968255e-05,
+ "loss": 0.4397,
+ "step": 3607
+ },
+ {
+ "epoch": 20.617142857142856,
+ "grad_norm": 64.8906021118164,
+ "learning_rate": 3.264761904761905e-05,
+ "loss": 0.5333,
+ "step": 3608
+ },
+ {
+ "epoch": 20.622857142857143,
+ "grad_norm": 41.74363708496094,
+ "learning_rate": 3.264126984126985e-05,
+ "loss": 0.2679,
+ "step": 3609
+ },
+ {
+ "epoch": 20.62857142857143,
+ "grad_norm": 36.93730163574219,
+ "learning_rate": 3.263492063492063e-05,
+ "loss": 0.286,
+ "step": 3610
+ },
+ {
+ "epoch": 20.634285714285713,
+ "grad_norm": 114.60037994384766,
+ "learning_rate": 3.262857142857143e-05,
+ "loss": 0.5103,
+ "step": 3611
+ },
+ {
+ "epoch": 20.64,
+ "grad_norm": 45.13642120361328,
+ "learning_rate": 3.2622222222222225e-05,
+ "loss": 0.2691,
+ "step": 3612
+ },
+ {
+ "epoch": 20.645714285714284,
+ "grad_norm": 50.80180358886719,
+ "learning_rate": 3.261587301587302e-05,
+ "loss": 0.4702,
+ "step": 3613
+ },
+ {
+ "epoch": 20.65142857142857,
+ "grad_norm": 17.604503631591797,
+ "learning_rate": 3.260952380952381e-05,
+ "loss": 0.274,
+ "step": 3614
+ },
+ {
+ "epoch": 20.65714285714286,
+ "grad_norm": 56.79952621459961,
+ "learning_rate": 3.26031746031746e-05,
+ "loss": 0.3094,
+ "step": 3615
+ },
+ {
+ "epoch": 20.662857142857142,
+ "grad_norm": 40.64170455932617,
+ "learning_rate": 3.2596825396825396e-05,
+ "loss": 0.2468,
+ "step": 3616
+ },
+ {
+ "epoch": 20.66857142857143,
+ "grad_norm": 45.026268005371094,
+ "learning_rate": 3.2590476190476195e-05,
+ "loss": 0.3393,
+ "step": 3617
+ },
+ {
+ "epoch": 20.674285714285713,
+ "grad_norm": 30.018081665039062,
+ "learning_rate": 3.258412698412698e-05,
+ "loss": 0.3994,
+ "step": 3618
+ },
+ {
+ "epoch": 20.68,
+ "grad_norm": 56.77120590209961,
+ "learning_rate": 3.257777777777778e-05,
+ "loss": 0.3715,
+ "step": 3619
+ },
+ {
+ "epoch": 20.685714285714287,
+ "grad_norm": 1320.7337646484375,
+ "learning_rate": 3.257142857142857e-05,
+ "loss": 0.4171,
+ "step": 3620
+ },
+ {
+ "epoch": 20.69142857142857,
+ "grad_norm": 38.26957321166992,
+ "learning_rate": 3.2565079365079366e-05,
+ "loss": 0.4449,
+ "step": 3621
+ },
+ {
+ "epoch": 20.697142857142858,
+ "grad_norm": 43.36183166503906,
+ "learning_rate": 3.255873015873016e-05,
+ "loss": 0.373,
+ "step": 3622
+ },
+ {
+ "epoch": 20.70285714285714,
+ "grad_norm": 37.03742980957031,
+ "learning_rate": 3.255238095238096e-05,
+ "loss": 0.3653,
+ "step": 3623
+ },
+ {
+ "epoch": 20.708571428571428,
+ "grad_norm": 36.908016204833984,
+ "learning_rate": 3.2546031746031744e-05,
+ "loss": 0.3292,
+ "step": 3624
+ },
+ {
+ "epoch": 20.714285714285715,
+ "grad_norm": 45.65389633178711,
+ "learning_rate": 3.253968253968254e-05,
+ "loss": 0.2525,
+ "step": 3625
+ },
+ {
+ "epoch": 20.72,
+ "grad_norm": 80.70460510253906,
+ "learning_rate": 3.253333333333333e-05,
+ "loss": 0.3117,
+ "step": 3626
+ },
+ {
+ "epoch": 20.725714285714286,
+ "grad_norm": 31.633970260620117,
+ "learning_rate": 3.252698412698413e-05,
+ "loss": 0.333,
+ "step": 3627
+ },
+ {
+ "epoch": 20.731428571428573,
+ "grad_norm": 32.370887756347656,
+ "learning_rate": 3.252063492063492e-05,
+ "loss": 0.5887,
+ "step": 3628
+ },
+ {
+ "epoch": 20.737142857142857,
+ "grad_norm": 73.466796875,
+ "learning_rate": 3.2514285714285714e-05,
+ "loss": 0.3408,
+ "step": 3629
+ },
+ {
+ "epoch": 20.742857142857144,
+ "grad_norm": 43.25515365600586,
+ "learning_rate": 3.2507936507936506e-05,
+ "loss": 0.3952,
+ "step": 3630
+ },
+ {
+ "epoch": 20.748571428571427,
+ "grad_norm": 45.522369384765625,
+ "learning_rate": 3.2501587301587306e-05,
+ "loss": 0.3448,
+ "step": 3631
+ },
+ {
+ "epoch": 20.754285714285714,
+ "grad_norm": 78.45230102539062,
+ "learning_rate": 3.24952380952381e-05,
+ "loss": 0.3797,
+ "step": 3632
+ },
+ {
+ "epoch": 20.76,
+ "grad_norm": 54.089656829833984,
+ "learning_rate": 3.248888888888889e-05,
+ "loss": 0.2894,
+ "step": 3633
+ },
+ {
+ "epoch": 20.765714285714285,
+ "grad_norm": 55.51853942871094,
+ "learning_rate": 3.2482539682539684e-05,
+ "loss": 0.3811,
+ "step": 3634
+ },
+ {
+ "epoch": 20.771428571428572,
+ "grad_norm": 75.3653335571289,
+ "learning_rate": 3.2476190476190476e-05,
+ "loss": 0.3138,
+ "step": 3635
+ },
+ {
+ "epoch": 20.777142857142856,
+ "grad_norm": 594.2489013671875,
+ "learning_rate": 3.2469841269841276e-05,
+ "loss": 0.3179,
+ "step": 3636
+ },
+ {
+ "epoch": 20.782857142857143,
+ "grad_norm": 162.05276489257812,
+ "learning_rate": 3.246349206349206e-05,
+ "loss": 0.3792,
+ "step": 3637
+ },
+ {
+ "epoch": 20.78857142857143,
+ "grad_norm": 1130.9315185546875,
+ "learning_rate": 3.245714285714286e-05,
+ "loss": 0.3471,
+ "step": 3638
+ },
+ {
+ "epoch": 20.794285714285714,
+ "grad_norm": 92.6990737915039,
+ "learning_rate": 3.2450793650793654e-05,
+ "loss": 0.3092,
+ "step": 3639
+ },
+ {
+ "epoch": 20.8,
+ "grad_norm": 66.91148376464844,
+ "learning_rate": 3.2444444444444446e-05,
+ "loss": 0.4639,
+ "step": 3640
+ },
+ {
+ "epoch": 20.805714285714284,
+ "grad_norm": 79.60892486572266,
+ "learning_rate": 3.243809523809524e-05,
+ "loss": 0.4086,
+ "step": 3641
+ },
+ {
+ "epoch": 20.81142857142857,
+ "grad_norm": 38.52240753173828,
+ "learning_rate": 3.243174603174604e-05,
+ "loss": 0.5145,
+ "step": 3642
+ },
+ {
+ "epoch": 20.81714285714286,
+ "grad_norm": 54.60650634765625,
+ "learning_rate": 3.2425396825396824e-05,
+ "loss": 0.4872,
+ "step": 3643
+ },
+ {
+ "epoch": 20.822857142857142,
+ "grad_norm": 99.12556457519531,
+ "learning_rate": 3.2419047619047624e-05,
+ "loss": 0.3176,
+ "step": 3644
+ },
+ {
+ "epoch": 20.82857142857143,
+ "grad_norm": 100.69367218017578,
+ "learning_rate": 3.2412698412698416e-05,
+ "loss": 0.4089,
+ "step": 3645
+ },
+ {
+ "epoch": 20.834285714285713,
+ "grad_norm": 40.28134536743164,
+ "learning_rate": 3.240634920634921e-05,
+ "loss": 0.6446,
+ "step": 3646
+ },
+ {
+ "epoch": 20.84,
+ "grad_norm": 69.2869873046875,
+ "learning_rate": 3.24e-05,
+ "loss": 0.3736,
+ "step": 3647
+ },
+ {
+ "epoch": 20.845714285714287,
+ "grad_norm": 41.41655731201172,
+ "learning_rate": 3.2393650793650794e-05,
+ "loss": 0.3731,
+ "step": 3648
+ },
+ {
+ "epoch": 20.85142857142857,
+ "grad_norm": 30.325632095336914,
+ "learning_rate": 3.238730158730159e-05,
+ "loss": 0.3695,
+ "step": 3649
+ },
+ {
+ "epoch": 20.857142857142858,
+ "grad_norm": 43.05796432495117,
+ "learning_rate": 3.2380952380952386e-05,
+ "loss": 0.6107,
+ "step": 3650
+ },
+ {
+ "epoch": 20.86285714285714,
+ "grad_norm": 31.62732696533203,
+ "learning_rate": 3.237460317460317e-05,
+ "loss": 0.2847,
+ "step": 3651
+ },
+ {
+ "epoch": 20.86857142857143,
+ "grad_norm": 34.25601577758789,
+ "learning_rate": 3.236825396825397e-05,
+ "loss": 0.3036,
+ "step": 3652
+ },
+ {
+ "epoch": 20.874285714285715,
+ "grad_norm": 40.31098175048828,
+ "learning_rate": 3.2361904761904764e-05,
+ "loss": 0.5153,
+ "step": 3653
+ },
+ {
+ "epoch": 20.88,
+ "grad_norm": 56.50481033325195,
+ "learning_rate": 3.235555555555556e-05,
+ "loss": 0.2486,
+ "step": 3654
+ },
+ {
+ "epoch": 20.885714285714286,
+ "grad_norm": 441.31976318359375,
+ "learning_rate": 3.234920634920635e-05,
+ "loss": 0.3427,
+ "step": 3655
+ },
+ {
+ "epoch": 20.89142857142857,
+ "grad_norm": 60.5505256652832,
+ "learning_rate": 3.234285714285715e-05,
+ "loss": 0.2867,
+ "step": 3656
+ },
+ {
+ "epoch": 20.897142857142857,
+ "grad_norm": 63.12777328491211,
+ "learning_rate": 3.2336507936507935e-05,
+ "loss": 0.3733,
+ "step": 3657
+ },
+ {
+ "epoch": 20.902857142857144,
+ "grad_norm": 63.3406867980957,
+ "learning_rate": 3.2330158730158734e-05,
+ "loss": 0.3769,
+ "step": 3658
+ },
+ {
+ "epoch": 20.908571428571427,
+ "grad_norm": 79.66120910644531,
+ "learning_rate": 3.232380952380952e-05,
+ "loss": 0.3189,
+ "step": 3659
+ },
+ {
+ "epoch": 20.914285714285715,
+ "grad_norm": 32.31673049926758,
+ "learning_rate": 3.231746031746032e-05,
+ "loss": 0.4188,
+ "step": 3660
+ },
+ {
+ "epoch": 20.92,
+ "grad_norm": 98.06800842285156,
+ "learning_rate": 3.231111111111111e-05,
+ "loss": 0.4109,
+ "step": 3661
+ },
+ {
+ "epoch": 20.925714285714285,
+ "grad_norm": 63.263858795166016,
+ "learning_rate": 3.2304761904761905e-05,
+ "loss": 0.3264,
+ "step": 3662
+ },
+ {
+ "epoch": 20.931428571428572,
+ "grad_norm": 282.83843994140625,
+ "learning_rate": 3.22984126984127e-05,
+ "loss": 0.3349,
+ "step": 3663
+ },
+ {
+ "epoch": 20.937142857142856,
+ "grad_norm": 80.6650161743164,
+ "learning_rate": 3.22920634920635e-05,
+ "loss": 0.291,
+ "step": 3664
+ },
+ {
+ "epoch": 20.942857142857143,
+ "grad_norm": 764.670166015625,
+ "learning_rate": 3.228571428571428e-05,
+ "loss": 0.2556,
+ "step": 3665
+ },
+ {
+ "epoch": 20.94857142857143,
+ "grad_norm": 53.90130615234375,
+ "learning_rate": 3.227936507936508e-05,
+ "loss": 0.3263,
+ "step": 3666
+ },
+ {
+ "epoch": 20.954285714285714,
+ "grad_norm": 84.06610107421875,
+ "learning_rate": 3.2273015873015875e-05,
+ "loss": 0.4107,
+ "step": 3667
+ },
+ {
+ "epoch": 20.96,
+ "grad_norm": 159.95120239257812,
+ "learning_rate": 3.226666666666667e-05,
+ "loss": 0.369,
+ "step": 3668
+ },
+ {
+ "epoch": 20.965714285714284,
+ "grad_norm": 68.79747009277344,
+ "learning_rate": 3.226031746031746e-05,
+ "loss": 0.2484,
+ "step": 3669
+ },
+ {
+ "epoch": 20.97142857142857,
+ "grad_norm": 84.50321960449219,
+ "learning_rate": 3.225396825396825e-05,
+ "loss": 0.3455,
+ "step": 3670
+ },
+ {
+ "epoch": 20.97714285714286,
+ "grad_norm": 114.40209197998047,
+ "learning_rate": 3.224761904761905e-05,
+ "loss": 0.4595,
+ "step": 3671
+ },
+ {
+ "epoch": 20.982857142857142,
+ "grad_norm": 86.94263458251953,
+ "learning_rate": 3.2241269841269845e-05,
+ "loss": 0.4622,
+ "step": 3672
+ },
+ {
+ "epoch": 20.98857142857143,
+ "grad_norm": 71.7770004272461,
+ "learning_rate": 3.223492063492064e-05,
+ "loss": 0.4919,
+ "step": 3673
+ },
+ {
+ "epoch": 20.994285714285713,
+ "grad_norm": 65.27389526367188,
+ "learning_rate": 3.222857142857143e-05,
+ "loss": 0.2586,
+ "step": 3674
+ },
+ {
+ "epoch": 21.0,
+ "grad_norm": 21.672555923461914,
+ "learning_rate": 3.222222222222223e-05,
+ "loss": 0.3246,
+ "step": 3675
+ },
+ {
+ "epoch": 21.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6261504888534546,
+ "eval_map": 0.9119,
+ "eval_map_50": 0.9592,
+ "eval_map_75": 0.9454,
+ "eval_map_large": 0.9131,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9119,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7749,
+ "eval_mar_10": 0.9657,
+ "eval_mar_100": 0.9737,
+ "eval_mar_100_per_class": 0.9737,
+ "eval_mar_large": 0.9737,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 14.2206,
+ "eval_samples_per_second": 20.674,
+ "eval_steps_per_second": 2.602,
+ "step": 3675
+ },
+ {
+ "epoch": 21.005714285714287,
+ "grad_norm": 49.29462814331055,
+ "learning_rate": 3.2215873015873015e-05,
+ "loss": 0.2999,
+ "step": 3676
+ },
+ {
+ "epoch": 21.01142857142857,
+ "grad_norm": 31.34654998779297,
+ "learning_rate": 3.2209523809523814e-05,
+ "loss": 0.3049,
+ "step": 3677
+ },
+ {
+ "epoch": 21.017142857142858,
+ "grad_norm": 477.85162353515625,
+ "learning_rate": 3.220317460317461e-05,
+ "loss": 0.3532,
+ "step": 3678
+ },
+ {
+ "epoch": 21.02285714285714,
+ "grad_norm": 40.38692092895508,
+ "learning_rate": 3.21968253968254e-05,
+ "loss": 0.2048,
+ "step": 3679
+ },
+ {
+ "epoch": 21.02857142857143,
+ "grad_norm": 68.68042755126953,
+ "learning_rate": 3.219047619047619e-05,
+ "loss": 0.3601,
+ "step": 3680
+ },
+ {
+ "epoch": 21.034285714285716,
+ "grad_norm": 64.91019439697266,
+ "learning_rate": 3.2184126984126985e-05,
+ "loss": 0.2925,
+ "step": 3681
+ },
+ {
+ "epoch": 21.04,
+ "grad_norm": 25.79218292236328,
+ "learning_rate": 3.217777777777778e-05,
+ "loss": 0.4298,
+ "step": 3682
+ },
+ {
+ "epoch": 21.045714285714286,
+ "grad_norm": 41.89329528808594,
+ "learning_rate": 3.217142857142858e-05,
+ "loss": 0.3936,
+ "step": 3683
+ },
+ {
+ "epoch": 21.05142857142857,
+ "grad_norm": 57.092620849609375,
+ "learning_rate": 3.216507936507936e-05,
+ "loss": 0.4503,
+ "step": 3684
+ },
+ {
+ "epoch": 21.057142857142857,
+ "grad_norm": 25.358844757080078,
+ "learning_rate": 3.215873015873016e-05,
+ "loss": 0.3085,
+ "step": 3685
+ },
+ {
+ "epoch": 21.062857142857144,
+ "grad_norm": 75.70011138916016,
+ "learning_rate": 3.2152380952380955e-05,
+ "loss": 0.2991,
+ "step": 3686
+ },
+ {
+ "epoch": 21.068571428571428,
+ "grad_norm": 87.57452392578125,
+ "learning_rate": 3.214603174603175e-05,
+ "loss": 0.3838,
+ "step": 3687
+ },
+ {
+ "epoch": 21.074285714285715,
+ "grad_norm": 37.64426040649414,
+ "learning_rate": 3.213968253968254e-05,
+ "loss": 0.3772,
+ "step": 3688
+ },
+ {
+ "epoch": 21.08,
+ "grad_norm": 58.89046859741211,
+ "learning_rate": 3.213333333333334e-05,
+ "loss": 0.2258,
+ "step": 3689
+ },
+ {
+ "epoch": 21.085714285714285,
+ "grad_norm": 30.542205810546875,
+ "learning_rate": 3.2126984126984126e-05,
+ "loss": 0.3013,
+ "step": 3690
+ },
+ {
+ "epoch": 21.091428571428573,
+ "grad_norm": 35.99055480957031,
+ "learning_rate": 3.2120634920634925e-05,
+ "loss": 0.3167,
+ "step": 3691
+ },
+ {
+ "epoch": 21.097142857142856,
+ "grad_norm": 57.693511962890625,
+ "learning_rate": 3.211428571428571e-05,
+ "loss": 0.4125,
+ "step": 3692
+ },
+ {
+ "epoch": 21.102857142857143,
+ "grad_norm": 86.25928497314453,
+ "learning_rate": 3.210793650793651e-05,
+ "loss": 0.3789,
+ "step": 3693
+ },
+ {
+ "epoch": 21.10857142857143,
+ "grad_norm": 59.57147216796875,
+ "learning_rate": 3.21015873015873e-05,
+ "loss": 0.3895,
+ "step": 3694
+ },
+ {
+ "epoch": 21.114285714285714,
+ "grad_norm": 61.320220947265625,
+ "learning_rate": 3.2095238095238095e-05,
+ "loss": 0.4248,
+ "step": 3695
+ },
+ {
+ "epoch": 21.12,
+ "grad_norm": 124.69442749023438,
+ "learning_rate": 3.208888888888889e-05,
+ "loss": 0.6228,
+ "step": 3696
+ },
+ {
+ "epoch": 21.125714285714285,
+ "grad_norm": 58.30948257446289,
+ "learning_rate": 3.208253968253969e-05,
+ "loss": 0.2777,
+ "step": 3697
+ },
+ {
+ "epoch": 21.13142857142857,
+ "grad_norm": 57.231990814208984,
+ "learning_rate": 3.207619047619047e-05,
+ "loss": 0.3169,
+ "step": 3698
+ },
+ {
+ "epoch": 21.13714285714286,
+ "grad_norm": 42.46031951904297,
+ "learning_rate": 3.206984126984127e-05,
+ "loss": 0.3544,
+ "step": 3699
+ },
+ {
+ "epoch": 21.142857142857142,
+ "grad_norm": 26.119014739990234,
+ "learning_rate": 3.2063492063492065e-05,
+ "loss": 0.2699,
+ "step": 3700
+ },
+ {
+ "epoch": 21.14857142857143,
+ "grad_norm": 64.62548828125,
+ "learning_rate": 3.205714285714286e-05,
+ "loss": 0.5126,
+ "step": 3701
+ },
+ {
+ "epoch": 21.154285714285713,
+ "grad_norm": 103.31525421142578,
+ "learning_rate": 3.205079365079365e-05,
+ "loss": 0.3685,
+ "step": 3702
+ },
+ {
+ "epoch": 21.16,
+ "grad_norm": 44.26395034790039,
+ "learning_rate": 3.204444444444444e-05,
+ "loss": 0.2793,
+ "step": 3703
+ },
+ {
+ "epoch": 21.165714285714287,
+ "grad_norm": 217.64198303222656,
+ "learning_rate": 3.2038095238095236e-05,
+ "loss": 0.3558,
+ "step": 3704
+ },
+ {
+ "epoch": 21.17142857142857,
+ "grad_norm": 34.432010650634766,
+ "learning_rate": 3.2031746031746035e-05,
+ "loss": 0.3642,
+ "step": 3705
+ },
+ {
+ "epoch": 21.177142857142858,
+ "grad_norm": 50.93349838256836,
+ "learning_rate": 3.202539682539683e-05,
+ "loss": 0.3908,
+ "step": 3706
+ },
+ {
+ "epoch": 21.18285714285714,
+ "grad_norm": 226.744873046875,
+ "learning_rate": 3.201904761904762e-05,
+ "loss": 0.375,
+ "step": 3707
+ },
+ {
+ "epoch": 21.18857142857143,
+ "grad_norm": 1020.2630004882812,
+ "learning_rate": 3.201269841269841e-05,
+ "loss": 0.3685,
+ "step": 3708
+ },
+ {
+ "epoch": 21.194285714285716,
+ "grad_norm": 40.030338287353516,
+ "learning_rate": 3.2006349206349206e-05,
+ "loss": 0.4141,
+ "step": 3709
+ },
+ {
+ "epoch": 21.2,
+ "grad_norm": 33.5488395690918,
+ "learning_rate": 3.2000000000000005e-05,
+ "loss": 0.2744,
+ "step": 3710
+ },
+ {
+ "epoch": 21.205714285714286,
+ "grad_norm": 26.809579849243164,
+ "learning_rate": 3.19936507936508e-05,
+ "loss": 0.2272,
+ "step": 3711
+ },
+ {
+ "epoch": 21.21142857142857,
+ "grad_norm": 85.49323272705078,
+ "learning_rate": 3.198730158730159e-05,
+ "loss": 0.3234,
+ "step": 3712
+ },
+ {
+ "epoch": 21.217142857142857,
+ "grad_norm": 68.34223937988281,
+ "learning_rate": 3.198095238095238e-05,
+ "loss": 0.2902,
+ "step": 3713
+ },
+ {
+ "epoch": 21.222857142857144,
+ "grad_norm": 59.31341552734375,
+ "learning_rate": 3.1974603174603176e-05,
+ "loss": 0.3462,
+ "step": 3714
+ },
+ {
+ "epoch": 21.228571428571428,
+ "grad_norm": 220.84310913085938,
+ "learning_rate": 3.196825396825397e-05,
+ "loss": 0.4344,
+ "step": 3715
+ },
+ {
+ "epoch": 21.234285714285715,
+ "grad_norm": 34.62261962890625,
+ "learning_rate": 3.196190476190477e-05,
+ "loss": 0.3257,
+ "step": 3716
+ },
+ {
+ "epoch": 21.24,
+ "grad_norm": 51.02989196777344,
+ "learning_rate": 3.1955555555555554e-05,
+ "loss": 0.5492,
+ "step": 3717
+ },
+ {
+ "epoch": 21.245714285714286,
+ "grad_norm": 352.5619812011719,
+ "learning_rate": 3.194920634920635e-05,
+ "loss": 0.2573,
+ "step": 3718
+ },
+ {
+ "epoch": 21.251428571428573,
+ "grad_norm": 29.160369873046875,
+ "learning_rate": 3.1942857142857146e-05,
+ "loss": 0.4223,
+ "step": 3719
+ },
+ {
+ "epoch": 21.257142857142856,
+ "grad_norm": 64.06929016113281,
+ "learning_rate": 3.193650793650794e-05,
+ "loss": 0.3256,
+ "step": 3720
+ },
+ {
+ "epoch": 21.262857142857143,
+ "grad_norm": 50.68466567993164,
+ "learning_rate": 3.193015873015873e-05,
+ "loss": 0.2572,
+ "step": 3721
+ },
+ {
+ "epoch": 21.268571428571427,
+ "grad_norm": 74.49227142333984,
+ "learning_rate": 3.192380952380953e-05,
+ "loss": 0.4386,
+ "step": 3722
+ },
+ {
+ "epoch": 21.274285714285714,
+ "grad_norm": 49.489383697509766,
+ "learning_rate": 3.1917460317460316e-05,
+ "loss": 0.2706,
+ "step": 3723
+ },
+ {
+ "epoch": 21.28,
+ "grad_norm": 61.9919548034668,
+ "learning_rate": 3.1911111111111116e-05,
+ "loss": 0.3378,
+ "step": 3724
+ },
+ {
+ "epoch": 21.285714285714285,
+ "grad_norm": 38.12159729003906,
+ "learning_rate": 3.19047619047619e-05,
+ "loss": 0.2815,
+ "step": 3725
+ },
+ {
+ "epoch": 21.291428571428572,
+ "grad_norm": 30.66257667541504,
+ "learning_rate": 3.18984126984127e-05,
+ "loss": 0.3195,
+ "step": 3726
+ },
+ {
+ "epoch": 21.29714285714286,
+ "grad_norm": 21.73702621459961,
+ "learning_rate": 3.1892063492063494e-05,
+ "loss": 0.366,
+ "step": 3727
+ },
+ {
+ "epoch": 21.302857142857142,
+ "grad_norm": 27.58858871459961,
+ "learning_rate": 3.1885714285714286e-05,
+ "loss": 0.356,
+ "step": 3728
+ },
+ {
+ "epoch": 21.30857142857143,
+ "grad_norm": 50.17976379394531,
+ "learning_rate": 3.187936507936508e-05,
+ "loss": 0.2447,
+ "step": 3729
+ },
+ {
+ "epoch": 21.314285714285713,
+ "grad_norm": 64.07877349853516,
+ "learning_rate": 3.187301587301588e-05,
+ "loss": 0.3639,
+ "step": 3730
+ },
+ {
+ "epoch": 21.32,
+ "grad_norm": 46.20744323730469,
+ "learning_rate": 3.1866666666666664e-05,
+ "loss": 0.322,
+ "step": 3731
+ },
+ {
+ "epoch": 21.325714285714287,
+ "grad_norm": 42.561363220214844,
+ "learning_rate": 3.1860317460317464e-05,
+ "loss": 0.2724,
+ "step": 3732
+ },
+ {
+ "epoch": 21.33142857142857,
+ "grad_norm": 39.54890441894531,
+ "learning_rate": 3.1853968253968256e-05,
+ "loss": 0.2953,
+ "step": 3733
+ },
+ {
+ "epoch": 21.337142857142858,
+ "grad_norm": 123.82516479492188,
+ "learning_rate": 3.184761904761905e-05,
+ "loss": 0.2723,
+ "step": 3734
+ },
+ {
+ "epoch": 21.34285714285714,
+ "grad_norm": 18.572673797607422,
+ "learning_rate": 3.184126984126984e-05,
+ "loss": 0.5455,
+ "step": 3735
+ },
+ {
+ "epoch": 21.34857142857143,
+ "grad_norm": 29.958419799804688,
+ "learning_rate": 3.1834920634920634e-05,
+ "loss": 0.3059,
+ "step": 3736
+ },
+ {
+ "epoch": 21.354285714285716,
+ "grad_norm": 48.21112823486328,
+ "learning_rate": 3.182857142857143e-05,
+ "loss": 0.2854,
+ "step": 3737
+ },
+ {
+ "epoch": 21.36,
+ "grad_norm": 88.85002899169922,
+ "learning_rate": 3.1822222222222226e-05,
+ "loss": 0.3987,
+ "step": 3738
+ },
+ {
+ "epoch": 21.365714285714287,
+ "grad_norm": 468.011962890625,
+ "learning_rate": 3.181587301587302e-05,
+ "loss": 0.2847,
+ "step": 3739
+ },
+ {
+ "epoch": 21.37142857142857,
+ "grad_norm": 31.966691970825195,
+ "learning_rate": 3.180952380952381e-05,
+ "loss": 0.3266,
+ "step": 3740
+ },
+ {
+ "epoch": 21.377142857142857,
+ "grad_norm": 51.56507873535156,
+ "learning_rate": 3.1803174603174604e-05,
+ "loss": 0.4651,
+ "step": 3741
+ },
+ {
+ "epoch": 21.382857142857144,
+ "grad_norm": 21.6018123626709,
+ "learning_rate": 3.17968253968254e-05,
+ "loss": 0.3151,
+ "step": 3742
+ },
+ {
+ "epoch": 21.388571428571428,
+ "grad_norm": 22.11574935913086,
+ "learning_rate": 3.179047619047619e-05,
+ "loss": 0.3947,
+ "step": 3743
+ },
+ {
+ "epoch": 21.394285714285715,
+ "grad_norm": 35.184356689453125,
+ "learning_rate": 3.178412698412699e-05,
+ "loss": 0.6054,
+ "step": 3744
+ },
+ {
+ "epoch": 21.4,
+ "grad_norm": 51.54069137573242,
+ "learning_rate": 3.177777777777778e-05,
+ "loss": 0.3085,
+ "step": 3745
+ },
+ {
+ "epoch": 21.405714285714286,
+ "grad_norm": 59.191768646240234,
+ "learning_rate": 3.1771428571428574e-05,
+ "loss": 0.4951,
+ "step": 3746
+ },
+ {
+ "epoch": 21.411428571428573,
+ "grad_norm": 91.00263214111328,
+ "learning_rate": 3.176507936507937e-05,
+ "loss": 0.4317,
+ "step": 3747
+ },
+ {
+ "epoch": 21.417142857142856,
+ "grad_norm": 40.64833450317383,
+ "learning_rate": 3.175873015873016e-05,
+ "loss": 0.3584,
+ "step": 3748
+ },
+ {
+ "epoch": 21.422857142857143,
+ "grad_norm": 54.025421142578125,
+ "learning_rate": 3.175238095238096e-05,
+ "loss": 0.3442,
+ "step": 3749
+ },
+ {
+ "epoch": 21.428571428571427,
+ "grad_norm": 42.4815559387207,
+ "learning_rate": 3.1746031746031745e-05,
+ "loss": 0.234,
+ "step": 3750
+ },
+ {
+ "epoch": 21.434285714285714,
+ "grad_norm": 37.965755462646484,
+ "learning_rate": 3.1739682539682544e-05,
+ "loss": 0.3482,
+ "step": 3751
+ },
+ {
+ "epoch": 21.44,
+ "grad_norm": 433.9432678222656,
+ "learning_rate": 3.173333333333334e-05,
+ "loss": 0.3578,
+ "step": 3752
+ },
+ {
+ "epoch": 21.445714285714285,
+ "grad_norm": 56.78814697265625,
+ "learning_rate": 3.172698412698413e-05,
+ "loss": 0.3879,
+ "step": 3753
+ },
+ {
+ "epoch": 21.451428571428572,
+ "grad_norm": 50.708091735839844,
+ "learning_rate": 3.172063492063492e-05,
+ "loss": 0.3293,
+ "step": 3754
+ },
+ {
+ "epoch": 21.457142857142856,
+ "grad_norm": 40.560787200927734,
+ "learning_rate": 3.1714285714285715e-05,
+ "loss": 0.4146,
+ "step": 3755
+ },
+ {
+ "epoch": 21.462857142857143,
+ "grad_norm": 45.53681182861328,
+ "learning_rate": 3.170793650793651e-05,
+ "loss": 0.3898,
+ "step": 3756
+ },
+ {
+ "epoch": 21.46857142857143,
+ "grad_norm": 57.23739242553711,
+ "learning_rate": 3.170158730158731e-05,
+ "loss": 0.4443,
+ "step": 3757
+ },
+ {
+ "epoch": 21.474285714285713,
+ "grad_norm": 87.60155487060547,
+ "learning_rate": 3.169523809523809e-05,
+ "loss": 0.5592,
+ "step": 3758
+ },
+ {
+ "epoch": 21.48,
+ "grad_norm": 44.68388366699219,
+ "learning_rate": 3.168888888888889e-05,
+ "loss": 0.3137,
+ "step": 3759
+ },
+ {
+ "epoch": 21.485714285714284,
+ "grad_norm": 289.986328125,
+ "learning_rate": 3.1682539682539685e-05,
+ "loss": 0.3203,
+ "step": 3760
+ },
+ {
+ "epoch": 21.49142857142857,
+ "grad_norm": 37.63692092895508,
+ "learning_rate": 3.167619047619048e-05,
+ "loss": 0.3281,
+ "step": 3761
+ },
+ {
+ "epoch": 21.497142857142858,
+ "grad_norm": 114.45169067382812,
+ "learning_rate": 3.166984126984127e-05,
+ "loss": 0.414,
+ "step": 3762
+ },
+ {
+ "epoch": 21.502857142857142,
+ "grad_norm": 90.52623748779297,
+ "learning_rate": 3.166349206349207e-05,
+ "loss": 0.3976,
+ "step": 3763
+ },
+ {
+ "epoch": 21.50857142857143,
+ "grad_norm": 62.72298812866211,
+ "learning_rate": 3.1657142857142855e-05,
+ "loss": 0.4485,
+ "step": 3764
+ },
+ {
+ "epoch": 21.514285714285712,
+ "grad_norm": 22.031055450439453,
+ "learning_rate": 3.1650793650793655e-05,
+ "loss": 0.3229,
+ "step": 3765
+ },
+ {
+ "epoch": 21.52,
+ "grad_norm": 54.60057067871094,
+ "learning_rate": 3.164444444444444e-05,
+ "loss": 0.4387,
+ "step": 3766
+ },
+ {
+ "epoch": 21.525714285714287,
+ "grad_norm": 261.3981628417969,
+ "learning_rate": 3.163809523809524e-05,
+ "loss": 0.2715,
+ "step": 3767
+ },
+ {
+ "epoch": 21.53142857142857,
+ "grad_norm": 38.28535079956055,
+ "learning_rate": 3.163174603174603e-05,
+ "loss": 0.3281,
+ "step": 3768
+ },
+ {
+ "epoch": 21.537142857142857,
+ "grad_norm": 515.3873901367188,
+ "learning_rate": 3.1625396825396825e-05,
+ "loss": 0.359,
+ "step": 3769
+ },
+ {
+ "epoch": 21.542857142857144,
+ "grad_norm": 101.58968353271484,
+ "learning_rate": 3.161904761904762e-05,
+ "loss": 0.5319,
+ "step": 3770
+ },
+ {
+ "epoch": 21.548571428571428,
+ "grad_norm": 24.83993911743164,
+ "learning_rate": 3.161269841269842e-05,
+ "loss": 0.3499,
+ "step": 3771
+ },
+ {
+ "epoch": 21.554285714285715,
+ "grad_norm": 78.21588897705078,
+ "learning_rate": 3.16063492063492e-05,
+ "loss": 0.604,
+ "step": 3772
+ },
+ {
+ "epoch": 21.56,
+ "grad_norm": 50.746639251708984,
+ "learning_rate": 3.16e-05,
+ "loss": 0.3406,
+ "step": 3773
+ },
+ {
+ "epoch": 21.565714285714286,
+ "grad_norm": 30.50995635986328,
+ "learning_rate": 3.1593650793650795e-05,
+ "loss": 0.3707,
+ "step": 3774
+ },
+ {
+ "epoch": 21.571428571428573,
+ "grad_norm": 94.28235626220703,
+ "learning_rate": 3.158730158730159e-05,
+ "loss": 0.4935,
+ "step": 3775
+ },
+ {
+ "epoch": 21.577142857142857,
+ "grad_norm": 85.49809265136719,
+ "learning_rate": 3.158095238095238e-05,
+ "loss": 0.2952,
+ "step": 3776
+ },
+ {
+ "epoch": 21.582857142857144,
+ "grad_norm": 192.1874542236328,
+ "learning_rate": 3.157460317460317e-05,
+ "loss": 0.4825,
+ "step": 3777
+ },
+ {
+ "epoch": 21.588571428571427,
+ "grad_norm": 23.69519805908203,
+ "learning_rate": 3.156825396825397e-05,
+ "loss": 0.3808,
+ "step": 3778
+ },
+ {
+ "epoch": 21.594285714285714,
+ "grad_norm": 54.67176055908203,
+ "learning_rate": 3.1561904761904765e-05,
+ "loss": 0.3762,
+ "step": 3779
+ },
+ {
+ "epoch": 21.6,
+ "grad_norm": 36.722557067871094,
+ "learning_rate": 3.155555555555556e-05,
+ "loss": 0.3383,
+ "step": 3780
+ },
+ {
+ "epoch": 21.605714285714285,
+ "grad_norm": 84.50801086425781,
+ "learning_rate": 3.154920634920635e-05,
+ "loss": 0.4195,
+ "step": 3781
+ },
+ {
+ "epoch": 21.611428571428572,
+ "grad_norm": 269.7778015136719,
+ "learning_rate": 3.154285714285714e-05,
+ "loss": 0.3313,
+ "step": 3782
+ },
+ {
+ "epoch": 21.617142857142856,
+ "grad_norm": 35.74866485595703,
+ "learning_rate": 3.1536507936507936e-05,
+ "loss": 0.399,
+ "step": 3783
+ },
+ {
+ "epoch": 21.622857142857143,
+ "grad_norm": 14.791391372680664,
+ "learning_rate": 3.1530158730158735e-05,
+ "loss": 0.2369,
+ "step": 3784
+ },
+ {
+ "epoch": 21.62857142857143,
+ "grad_norm": 55.04678726196289,
+ "learning_rate": 3.152380952380953e-05,
+ "loss": 0.3338,
+ "step": 3785
+ },
+ {
+ "epoch": 21.634285714285713,
+ "grad_norm": 31.66046905517578,
+ "learning_rate": 3.151746031746032e-05,
+ "loss": 0.2855,
+ "step": 3786
+ },
+ {
+ "epoch": 21.64,
+ "grad_norm": 43.476749420166016,
+ "learning_rate": 3.151111111111111e-05,
+ "loss": 0.2053,
+ "step": 3787
+ },
+ {
+ "epoch": 21.645714285714284,
+ "grad_norm": 108.86002349853516,
+ "learning_rate": 3.1504761904761905e-05,
+ "loss": 0.2844,
+ "step": 3788
+ },
+ {
+ "epoch": 21.65142857142857,
+ "grad_norm": 73.7070541381836,
+ "learning_rate": 3.14984126984127e-05,
+ "loss": 0.5292,
+ "step": 3789
+ },
+ {
+ "epoch": 21.65714285714286,
+ "grad_norm": 78.62653350830078,
+ "learning_rate": 3.14920634920635e-05,
+ "loss": 0.3417,
+ "step": 3790
+ },
+ {
+ "epoch": 21.662857142857142,
+ "grad_norm": 54.53212356567383,
+ "learning_rate": 3.148571428571428e-05,
+ "loss": 0.3056,
+ "step": 3791
+ },
+ {
+ "epoch": 21.66857142857143,
+ "grad_norm": 29.09296226501465,
+ "learning_rate": 3.147936507936508e-05,
+ "loss": 0.326,
+ "step": 3792
+ },
+ {
+ "epoch": 21.674285714285713,
+ "grad_norm": 44.63047790527344,
+ "learning_rate": 3.1473015873015875e-05,
+ "loss": 0.3932,
+ "step": 3793
+ },
+ {
+ "epoch": 21.68,
+ "grad_norm": 20.08134651184082,
+ "learning_rate": 3.146666666666667e-05,
+ "loss": 0.4483,
+ "step": 3794
+ },
+ {
+ "epoch": 21.685714285714287,
+ "grad_norm": 44.74596405029297,
+ "learning_rate": 3.146031746031746e-05,
+ "loss": 0.3569,
+ "step": 3795
+ },
+ {
+ "epoch": 21.69142857142857,
+ "grad_norm": 59.00993347167969,
+ "learning_rate": 3.145396825396826e-05,
+ "loss": 0.3985,
+ "step": 3796
+ },
+ {
+ "epoch": 21.697142857142858,
+ "grad_norm": 94.12641143798828,
+ "learning_rate": 3.1447619047619046e-05,
+ "loss": 0.4251,
+ "step": 3797
+ },
+ {
+ "epoch": 21.70285714285714,
+ "grad_norm": 99.79872131347656,
+ "learning_rate": 3.1441269841269845e-05,
+ "loss": 0.2642,
+ "step": 3798
+ },
+ {
+ "epoch": 21.708571428571428,
+ "grad_norm": 23.622644424438477,
+ "learning_rate": 3.143492063492063e-05,
+ "loss": 0.3422,
+ "step": 3799
+ },
+ {
+ "epoch": 21.714285714285715,
+ "grad_norm": 109.50407409667969,
+ "learning_rate": 3.142857142857143e-05,
+ "loss": 0.4371,
+ "step": 3800
+ },
+ {
+ "epoch": 21.72,
+ "grad_norm": 34.26578903198242,
+ "learning_rate": 3.142222222222222e-05,
+ "loss": 0.32,
+ "step": 3801
+ },
+ {
+ "epoch": 21.725714285714286,
+ "grad_norm": 33.98945617675781,
+ "learning_rate": 3.1415873015873016e-05,
+ "loss": 0.2817,
+ "step": 3802
+ },
+ {
+ "epoch": 21.731428571428573,
+ "grad_norm": 60.104774475097656,
+ "learning_rate": 3.140952380952381e-05,
+ "loss": 0.3346,
+ "step": 3803
+ },
+ {
+ "epoch": 21.737142857142857,
+ "grad_norm": 63.730594635009766,
+ "learning_rate": 3.140317460317461e-05,
+ "loss": 0.419,
+ "step": 3804
+ },
+ {
+ "epoch": 21.742857142857144,
+ "grad_norm": 41.9618034362793,
+ "learning_rate": 3.1396825396825394e-05,
+ "loss": 0.3094,
+ "step": 3805
+ },
+ {
+ "epoch": 21.748571428571427,
+ "grad_norm": 85.70345306396484,
+ "learning_rate": 3.139047619047619e-05,
+ "loss": 0.2412,
+ "step": 3806
+ },
+ {
+ "epoch": 21.754285714285714,
+ "grad_norm": 63.57206344604492,
+ "learning_rate": 3.1384126984126986e-05,
+ "loss": 0.336,
+ "step": 3807
+ },
+ {
+ "epoch": 21.76,
+ "grad_norm": 19.747631072998047,
+ "learning_rate": 3.137777777777778e-05,
+ "loss": 0.2959,
+ "step": 3808
+ },
+ {
+ "epoch": 21.765714285714285,
+ "grad_norm": 1825.9937744140625,
+ "learning_rate": 3.137142857142857e-05,
+ "loss": 0.444,
+ "step": 3809
+ },
+ {
+ "epoch": 21.771428571428572,
+ "grad_norm": 51.031494140625,
+ "learning_rate": 3.1365079365079364e-05,
+ "loss": 0.3296,
+ "step": 3810
+ },
+ {
+ "epoch": 21.777142857142856,
+ "grad_norm": 176.57481384277344,
+ "learning_rate": 3.1358730158730156e-05,
+ "loss": 0.4362,
+ "step": 3811
+ },
+ {
+ "epoch": 21.782857142857143,
+ "grad_norm": 56.659698486328125,
+ "learning_rate": 3.1352380952380956e-05,
+ "loss": 0.355,
+ "step": 3812
+ },
+ {
+ "epoch": 21.78857142857143,
+ "grad_norm": 62.81039047241211,
+ "learning_rate": 3.134603174603175e-05,
+ "loss": 0.3525,
+ "step": 3813
+ },
+ {
+ "epoch": 21.794285714285714,
+ "grad_norm": 60.78776168823242,
+ "learning_rate": 3.133968253968254e-05,
+ "loss": 0.3585,
+ "step": 3814
+ },
+ {
+ "epoch": 21.8,
+ "grad_norm": 108.34864044189453,
+ "learning_rate": 3.1333333333333334e-05,
+ "loss": 0.2698,
+ "step": 3815
+ },
+ {
+ "epoch": 21.805714285714284,
+ "grad_norm": 196.76712036132812,
+ "learning_rate": 3.1326984126984126e-05,
+ "loss": 0.3811,
+ "step": 3816
+ },
+ {
+ "epoch": 21.81142857142857,
+ "grad_norm": 55.96894454956055,
+ "learning_rate": 3.1320634920634926e-05,
+ "loss": 0.315,
+ "step": 3817
+ },
+ {
+ "epoch": 21.81714285714286,
+ "grad_norm": 50.5468635559082,
+ "learning_rate": 3.131428571428572e-05,
+ "loss": 0.1957,
+ "step": 3818
+ },
+ {
+ "epoch": 21.822857142857142,
+ "grad_norm": 135.99075317382812,
+ "learning_rate": 3.130793650793651e-05,
+ "loss": 0.3722,
+ "step": 3819
+ },
+ {
+ "epoch": 21.82857142857143,
+ "grad_norm": 24.163867950439453,
+ "learning_rate": 3.1301587301587304e-05,
+ "loss": 0.2995,
+ "step": 3820
+ },
+ {
+ "epoch": 21.834285714285713,
+ "grad_norm": 35.99555969238281,
+ "learning_rate": 3.1295238095238096e-05,
+ "loss": 0.2903,
+ "step": 3821
+ },
+ {
+ "epoch": 21.84,
+ "grad_norm": 252.39276123046875,
+ "learning_rate": 3.128888888888889e-05,
+ "loss": 0.2276,
+ "step": 3822
+ },
+ {
+ "epoch": 21.845714285714287,
+ "grad_norm": 47.01209259033203,
+ "learning_rate": 3.128253968253969e-05,
+ "loss": 0.3877,
+ "step": 3823
+ },
+ {
+ "epoch": 21.85142857142857,
+ "grad_norm": 47.04011535644531,
+ "learning_rate": 3.1276190476190474e-05,
+ "loss": 0.2424,
+ "step": 3824
+ },
+ {
+ "epoch": 21.857142857142858,
+ "grad_norm": 322.7526550292969,
+ "learning_rate": 3.1269841269841274e-05,
+ "loss": 0.3984,
+ "step": 3825
+ },
+ {
+ "epoch": 21.86285714285714,
+ "grad_norm": 31.5167293548584,
+ "learning_rate": 3.1263492063492066e-05,
+ "loss": 0.2904,
+ "step": 3826
+ },
+ {
+ "epoch": 21.86857142857143,
+ "grad_norm": 271.3404846191406,
+ "learning_rate": 3.125714285714286e-05,
+ "loss": 0.3466,
+ "step": 3827
+ },
+ {
+ "epoch": 21.874285714285715,
+ "grad_norm": 29.509052276611328,
+ "learning_rate": 3.125079365079365e-05,
+ "loss": 0.3587,
+ "step": 3828
+ },
+ {
+ "epoch": 21.88,
+ "grad_norm": 31.222827911376953,
+ "learning_rate": 3.124444444444445e-05,
+ "loss": 0.2593,
+ "step": 3829
+ },
+ {
+ "epoch": 21.885714285714286,
+ "grad_norm": 51.91364288330078,
+ "learning_rate": 3.123809523809524e-05,
+ "loss": 0.3321,
+ "step": 3830
+ },
+ {
+ "epoch": 21.89142857142857,
+ "grad_norm": 41.987300872802734,
+ "learning_rate": 3.1231746031746036e-05,
+ "loss": 0.2314,
+ "step": 3831
+ },
+ {
+ "epoch": 21.897142857142857,
+ "grad_norm": 75.75940704345703,
+ "learning_rate": 3.122539682539682e-05,
+ "loss": 0.2687,
+ "step": 3832
+ },
+ {
+ "epoch": 21.902857142857144,
+ "grad_norm": 34.43084716796875,
+ "learning_rate": 3.121904761904762e-05,
+ "loss": 0.2096,
+ "step": 3833
+ },
+ {
+ "epoch": 21.908571428571427,
+ "grad_norm": 102.84685516357422,
+ "learning_rate": 3.1212698412698414e-05,
+ "loss": 0.2934,
+ "step": 3834
+ },
+ {
+ "epoch": 21.914285714285715,
+ "grad_norm": 455.5080261230469,
+ "learning_rate": 3.120634920634921e-05,
+ "loss": 0.4464,
+ "step": 3835
+ },
+ {
+ "epoch": 21.92,
+ "grad_norm": 29.81694793701172,
+ "learning_rate": 3.12e-05,
+ "loss": 0.238,
+ "step": 3836
+ },
+ {
+ "epoch": 21.925714285714285,
+ "grad_norm": 54.29841613769531,
+ "learning_rate": 3.11936507936508e-05,
+ "loss": 0.3183,
+ "step": 3837
+ },
+ {
+ "epoch": 21.931428571428572,
+ "grad_norm": 57.172508239746094,
+ "learning_rate": 3.1187301587301585e-05,
+ "loss": 0.3099,
+ "step": 3838
+ },
+ {
+ "epoch": 21.937142857142856,
+ "grad_norm": 68.68402862548828,
+ "learning_rate": 3.1180952380952384e-05,
+ "loss": 0.4181,
+ "step": 3839
+ },
+ {
+ "epoch": 21.942857142857143,
+ "grad_norm": 23.65928840637207,
+ "learning_rate": 3.117460317460318e-05,
+ "loss": 0.4022,
+ "step": 3840
+ },
+ {
+ "epoch": 21.94857142857143,
+ "grad_norm": 81.38803100585938,
+ "learning_rate": 3.116825396825397e-05,
+ "loss": 0.45,
+ "step": 3841
+ },
+ {
+ "epoch": 21.954285714285714,
+ "grad_norm": 80.4809799194336,
+ "learning_rate": 3.116190476190476e-05,
+ "loss": 0.2891,
+ "step": 3842
+ },
+ {
+ "epoch": 21.96,
+ "grad_norm": 49.645843505859375,
+ "learning_rate": 3.1155555555555555e-05,
+ "loss": 0.4989,
+ "step": 3843
+ },
+ {
+ "epoch": 21.965714285714284,
+ "grad_norm": 62.76024627685547,
+ "learning_rate": 3.114920634920635e-05,
+ "loss": 0.2469,
+ "step": 3844
+ },
+ {
+ "epoch": 21.97142857142857,
+ "grad_norm": 83.84510040283203,
+ "learning_rate": 3.114285714285715e-05,
+ "loss": 0.4257,
+ "step": 3845
+ },
+ {
+ "epoch": 21.97714285714286,
+ "grad_norm": 31.186817169189453,
+ "learning_rate": 3.113650793650794e-05,
+ "loss": 0.3295,
+ "step": 3846
+ },
+ {
+ "epoch": 21.982857142857142,
+ "grad_norm": 56.99137878417969,
+ "learning_rate": 3.113015873015873e-05,
+ "loss": 0.3165,
+ "step": 3847
+ },
+ {
+ "epoch": 21.98857142857143,
+ "grad_norm": 64.08834075927734,
+ "learning_rate": 3.1123809523809525e-05,
+ "loss": 0.2338,
+ "step": 3848
+ },
+ {
+ "epoch": 21.994285714285713,
+ "grad_norm": 66.41158294677734,
+ "learning_rate": 3.111746031746032e-05,
+ "loss": 0.3492,
+ "step": 3849
+ },
+ {
+ "epoch": 22.0,
+ "grad_norm": 119.21939086914062,
+ "learning_rate": 3.111111111111111e-05,
+ "loss": 0.3374,
+ "step": 3850
+ },
+ {
+ "epoch": 22.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5864205956459045,
+ "eval_map": 0.9208,
+ "eval_map_50": 0.9655,
+ "eval_map_75": 0.9527,
+ "eval_map_large": 0.9209,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9208,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7854,
+ "eval_mar_10": 0.9695,
+ "eval_mar_100": 0.973,
+ "eval_mar_100_per_class": 0.973,
+ "eval_mar_large": 0.973,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.411,
+ "eval_samples_per_second": 21.922,
+ "eval_steps_per_second": 2.759,
+ "step": 3850
+ },
+ {
+ "epoch": 22.005714285714287,
+ "grad_norm": 568.6182250976562,
+ "learning_rate": 3.110476190476191e-05,
+ "loss": 0.3612,
+ "step": 3851
+ },
+ {
+ "epoch": 22.01142857142857,
+ "grad_norm": 298.5441589355469,
+ "learning_rate": 3.10984126984127e-05,
+ "loss": 0.3196,
+ "step": 3852
+ },
+ {
+ "epoch": 22.017142857142858,
+ "grad_norm": 37.83338165283203,
+ "learning_rate": 3.1092063492063495e-05,
+ "loss": 0.2644,
+ "step": 3853
+ },
+ {
+ "epoch": 22.02285714285714,
+ "grad_norm": 139.16110229492188,
+ "learning_rate": 3.108571428571429e-05,
+ "loss": 0.3717,
+ "step": 3854
+ },
+ {
+ "epoch": 22.02857142857143,
+ "grad_norm": 437.6393737792969,
+ "learning_rate": 3.107936507936508e-05,
+ "loss": 0.3128,
+ "step": 3855
+ },
+ {
+ "epoch": 22.034285714285716,
+ "grad_norm": 37.889312744140625,
+ "learning_rate": 3.107301587301588e-05,
+ "loss": 0.4065,
+ "step": 3856
+ },
+ {
+ "epoch": 22.04,
+ "grad_norm": 26.51235580444336,
+ "learning_rate": 3.1066666666666665e-05,
+ "loss": 0.307,
+ "step": 3857
+ },
+ {
+ "epoch": 22.045714285714286,
+ "grad_norm": 24.09084129333496,
+ "learning_rate": 3.1060317460317465e-05,
+ "loss": 0.4787,
+ "step": 3858
+ },
+ {
+ "epoch": 22.05142857142857,
+ "grad_norm": 43.70731735229492,
+ "learning_rate": 3.105396825396826e-05,
+ "loss": 0.3874,
+ "step": 3859
+ },
+ {
+ "epoch": 22.057142857142857,
+ "grad_norm": 94.05394744873047,
+ "learning_rate": 3.104761904761905e-05,
+ "loss": 0.3835,
+ "step": 3860
+ },
+ {
+ "epoch": 22.062857142857144,
+ "grad_norm": 45.00230407714844,
+ "learning_rate": 3.104126984126984e-05,
+ "loss": 0.3022,
+ "step": 3861
+ },
+ {
+ "epoch": 22.068571428571428,
+ "grad_norm": 36.670806884765625,
+ "learning_rate": 3.103492063492064e-05,
+ "loss": 0.5788,
+ "step": 3862
+ },
+ {
+ "epoch": 22.074285714285715,
+ "grad_norm": 39.09443664550781,
+ "learning_rate": 3.102857142857143e-05,
+ "loss": 0.3785,
+ "step": 3863
+ },
+ {
+ "epoch": 22.08,
+ "grad_norm": 42.257102966308594,
+ "learning_rate": 3.102222222222223e-05,
+ "loss": 0.309,
+ "step": 3864
+ },
+ {
+ "epoch": 22.085714285714285,
+ "grad_norm": 37.75422286987305,
+ "learning_rate": 3.101587301587301e-05,
+ "loss": 0.3694,
+ "step": 3865
+ },
+ {
+ "epoch": 22.091428571428573,
+ "grad_norm": 57.656097412109375,
+ "learning_rate": 3.100952380952381e-05,
+ "loss": 0.3344,
+ "step": 3866
+ },
+ {
+ "epoch": 22.097142857142856,
+ "grad_norm": 57.66275405883789,
+ "learning_rate": 3.1003174603174605e-05,
+ "loss": 0.2738,
+ "step": 3867
+ },
+ {
+ "epoch": 22.102857142857143,
+ "grad_norm": 185.97476196289062,
+ "learning_rate": 3.09968253968254e-05,
+ "loss": 0.3516,
+ "step": 3868
+ },
+ {
+ "epoch": 22.10857142857143,
+ "grad_norm": 55.6573600769043,
+ "learning_rate": 3.099047619047619e-05,
+ "loss": 0.2769,
+ "step": 3869
+ },
+ {
+ "epoch": 22.114285714285714,
+ "grad_norm": 47.03541946411133,
+ "learning_rate": 3.098412698412699e-05,
+ "loss": 0.2358,
+ "step": 3870
+ },
+ {
+ "epoch": 22.12,
+ "grad_norm": 60.72099685668945,
+ "learning_rate": 3.0977777777777776e-05,
+ "loss": 0.4233,
+ "step": 3871
+ },
+ {
+ "epoch": 22.125714285714285,
+ "grad_norm": 77.88934326171875,
+ "learning_rate": 3.0971428571428575e-05,
+ "loss": 0.3265,
+ "step": 3872
+ },
+ {
+ "epoch": 22.13142857142857,
+ "grad_norm": 251.4064483642578,
+ "learning_rate": 3.096507936507937e-05,
+ "loss": 0.3101,
+ "step": 3873
+ },
+ {
+ "epoch": 22.13714285714286,
+ "grad_norm": 79.59466552734375,
+ "learning_rate": 3.095873015873016e-05,
+ "loss": 0.2651,
+ "step": 3874
+ },
+ {
+ "epoch": 22.142857142857142,
+ "grad_norm": 111.59322357177734,
+ "learning_rate": 3.095238095238095e-05,
+ "loss": 0.4637,
+ "step": 3875
+ },
+ {
+ "epoch": 22.14857142857143,
+ "grad_norm": 46.31583023071289,
+ "learning_rate": 3.0946031746031746e-05,
+ "loss": 0.2143,
+ "step": 3876
+ },
+ {
+ "epoch": 22.154285714285713,
+ "grad_norm": 56.032554626464844,
+ "learning_rate": 3.093968253968254e-05,
+ "loss": 0.3214,
+ "step": 3877
+ },
+ {
+ "epoch": 22.16,
+ "grad_norm": 51.20491409301758,
+ "learning_rate": 3.093333333333334e-05,
+ "loss": 0.2696,
+ "step": 3878
+ },
+ {
+ "epoch": 22.165714285714287,
+ "grad_norm": 78.42192077636719,
+ "learning_rate": 3.0926984126984123e-05,
+ "loss": 0.5291,
+ "step": 3879
+ },
+ {
+ "epoch": 22.17142857142857,
+ "grad_norm": 81.70121765136719,
+ "learning_rate": 3.092063492063492e-05,
+ "loss": 0.2933,
+ "step": 3880
+ },
+ {
+ "epoch": 22.177142857142858,
+ "grad_norm": 81.04582214355469,
+ "learning_rate": 3.0914285714285715e-05,
+ "loss": 0.5647,
+ "step": 3881
+ },
+ {
+ "epoch": 22.18285714285714,
+ "grad_norm": 34.81829833984375,
+ "learning_rate": 3.090793650793651e-05,
+ "loss": 0.3128,
+ "step": 3882
+ },
+ {
+ "epoch": 22.18857142857143,
+ "grad_norm": 107.74185180664062,
+ "learning_rate": 3.09015873015873e-05,
+ "loss": 0.3937,
+ "step": 3883
+ },
+ {
+ "epoch": 22.194285714285716,
+ "grad_norm": 52.18937301635742,
+ "learning_rate": 3.08952380952381e-05,
+ "loss": 0.2945,
+ "step": 3884
+ },
+ {
+ "epoch": 22.2,
+ "grad_norm": 74.31680297851562,
+ "learning_rate": 3.088888888888889e-05,
+ "loss": 0.4235,
+ "step": 3885
+ },
+ {
+ "epoch": 22.205714285714286,
+ "grad_norm": 43.282081604003906,
+ "learning_rate": 3.0882539682539685e-05,
+ "loss": 0.404,
+ "step": 3886
+ },
+ {
+ "epoch": 22.21142857142857,
+ "grad_norm": 47.725616455078125,
+ "learning_rate": 3.087619047619048e-05,
+ "loss": 0.2856,
+ "step": 3887
+ },
+ {
+ "epoch": 22.217142857142857,
+ "grad_norm": 20.372751235961914,
+ "learning_rate": 3.086984126984127e-05,
+ "loss": 0.3264,
+ "step": 3888
+ },
+ {
+ "epoch": 22.222857142857144,
+ "grad_norm": 120.78299713134766,
+ "learning_rate": 3.086349206349206e-05,
+ "loss": 0.3028,
+ "step": 3889
+ },
+ {
+ "epoch": 22.228571428571428,
+ "grad_norm": 45.60805130004883,
+ "learning_rate": 3.0857142857142856e-05,
+ "loss": 0.3358,
+ "step": 3890
+ },
+ {
+ "epoch": 22.234285714285715,
+ "grad_norm": 26.143068313598633,
+ "learning_rate": 3.0850793650793655e-05,
+ "loss": 0.3691,
+ "step": 3891
+ },
+ {
+ "epoch": 22.24,
+ "grad_norm": 37.71814727783203,
+ "learning_rate": 3.084444444444445e-05,
+ "loss": 0.29,
+ "step": 3892
+ },
+ {
+ "epoch": 22.245714285714286,
+ "grad_norm": 99.42308044433594,
+ "learning_rate": 3.083809523809524e-05,
+ "loss": 0.3096,
+ "step": 3893
+ },
+ {
+ "epoch": 22.251428571428573,
+ "grad_norm": 45.55909729003906,
+ "learning_rate": 3.083174603174603e-05,
+ "loss": 0.4135,
+ "step": 3894
+ },
+ {
+ "epoch": 22.257142857142856,
+ "grad_norm": 140.01119995117188,
+ "learning_rate": 3.082539682539683e-05,
+ "loss": 0.5953,
+ "step": 3895
+ },
+ {
+ "epoch": 22.262857142857143,
+ "grad_norm": 83.41049194335938,
+ "learning_rate": 3.081904761904762e-05,
+ "loss": 0.3512,
+ "step": 3896
+ },
+ {
+ "epoch": 22.268571428571427,
+ "grad_norm": 69.6951904296875,
+ "learning_rate": 3.081269841269842e-05,
+ "loss": 0.3473,
+ "step": 3897
+ },
+ {
+ "epoch": 22.274285714285714,
+ "grad_norm": 63.989315032958984,
+ "learning_rate": 3.0806349206349204e-05,
+ "loss": 0.4721,
+ "step": 3898
+ },
+ {
+ "epoch": 22.28,
+ "grad_norm": 62.20379638671875,
+ "learning_rate": 3.08e-05,
+ "loss": 0.4091,
+ "step": 3899
+ },
+ {
+ "epoch": 22.285714285714285,
+ "grad_norm": 54.67482376098633,
+ "learning_rate": 3.0793650793650796e-05,
+ "loss": 0.347,
+ "step": 3900
+ },
+ {
+ "epoch": 22.291428571428572,
+ "grad_norm": 31.573854446411133,
+ "learning_rate": 3.078730158730159e-05,
+ "loss": 0.3179,
+ "step": 3901
+ },
+ {
+ "epoch": 22.29714285714286,
+ "grad_norm": 28.191001892089844,
+ "learning_rate": 3.078095238095238e-05,
+ "loss": 0.3604,
+ "step": 3902
+ },
+ {
+ "epoch": 22.302857142857142,
+ "grad_norm": 56.241233825683594,
+ "learning_rate": 3.077460317460318e-05,
+ "loss": 0.4328,
+ "step": 3903
+ },
+ {
+ "epoch": 22.30857142857143,
+ "grad_norm": 60.73114776611328,
+ "learning_rate": 3.0768253968253966e-05,
+ "loss": 0.2629,
+ "step": 3904
+ },
+ {
+ "epoch": 22.314285714285713,
+ "grad_norm": 94.41016387939453,
+ "learning_rate": 3.0761904761904766e-05,
+ "loss": 0.3041,
+ "step": 3905
+ },
+ {
+ "epoch": 22.32,
+ "grad_norm": 38.220855712890625,
+ "learning_rate": 3.075555555555556e-05,
+ "loss": 0.4861,
+ "step": 3906
+ },
+ {
+ "epoch": 22.325714285714287,
+ "grad_norm": 98.41761779785156,
+ "learning_rate": 3.074920634920635e-05,
+ "loss": 0.3381,
+ "step": 3907
+ },
+ {
+ "epoch": 22.33142857142857,
+ "grad_norm": 31.45133399963379,
+ "learning_rate": 3.0742857142857144e-05,
+ "loss": 0.3808,
+ "step": 3908
+ },
+ {
+ "epoch": 22.337142857142858,
+ "grad_norm": 27.403478622436523,
+ "learning_rate": 3.0736507936507936e-05,
+ "loss": 0.3264,
+ "step": 3909
+ },
+ {
+ "epoch": 22.34285714285714,
+ "grad_norm": 22.842941284179688,
+ "learning_rate": 3.073015873015873e-05,
+ "loss": 0.4001,
+ "step": 3910
+ },
+ {
+ "epoch": 22.34857142857143,
+ "grad_norm": 24.106473922729492,
+ "learning_rate": 3.072380952380953e-05,
+ "loss": 0.2799,
+ "step": 3911
+ },
+ {
+ "epoch": 22.354285714285716,
+ "grad_norm": 92.46276092529297,
+ "learning_rate": 3.0717460317460314e-05,
+ "loss": 0.2886,
+ "step": 3912
+ },
+ {
+ "epoch": 22.36,
+ "grad_norm": 238.04917907714844,
+ "learning_rate": 3.0711111111111114e-05,
+ "loss": 0.3307,
+ "step": 3913
+ },
+ {
+ "epoch": 22.365714285714287,
+ "grad_norm": 177.84490966796875,
+ "learning_rate": 3.0704761904761906e-05,
+ "loss": 0.3596,
+ "step": 3914
+ },
+ {
+ "epoch": 22.37142857142857,
+ "grad_norm": 70.74510955810547,
+ "learning_rate": 3.06984126984127e-05,
+ "loss": 0.411,
+ "step": 3915
+ },
+ {
+ "epoch": 22.377142857142857,
+ "grad_norm": 36.48595428466797,
+ "learning_rate": 3.069206349206349e-05,
+ "loss": 0.3127,
+ "step": 3916
+ },
+ {
+ "epoch": 22.382857142857144,
+ "grad_norm": 45.07102584838867,
+ "learning_rate": 3.068571428571429e-05,
+ "loss": 0.3441,
+ "step": 3917
+ },
+ {
+ "epoch": 22.388571428571428,
+ "grad_norm": 57.4742546081543,
+ "learning_rate": 3.067936507936508e-05,
+ "loss": 0.2636,
+ "step": 3918
+ },
+ {
+ "epoch": 22.394285714285715,
+ "grad_norm": 41.72212219238281,
+ "learning_rate": 3.0673015873015876e-05,
+ "loss": 0.2729,
+ "step": 3919
+ },
+ {
+ "epoch": 22.4,
+ "grad_norm": 25.12485122680664,
+ "learning_rate": 3.066666666666667e-05,
+ "loss": 0.3471,
+ "step": 3920
+ },
+ {
+ "epoch": 22.405714285714286,
+ "grad_norm": 18.712438583374023,
+ "learning_rate": 3.066031746031746e-05,
+ "loss": 0.3291,
+ "step": 3921
+ },
+ {
+ "epoch": 22.411428571428573,
+ "grad_norm": 63.36475372314453,
+ "learning_rate": 3.0653968253968254e-05,
+ "loss": 0.4245,
+ "step": 3922
+ },
+ {
+ "epoch": 22.417142857142856,
+ "grad_norm": 72.96314239501953,
+ "learning_rate": 3.064761904761905e-05,
+ "loss": 0.7622,
+ "step": 3923
+ },
+ {
+ "epoch": 22.422857142857143,
+ "grad_norm": 81.22702026367188,
+ "learning_rate": 3.0641269841269846e-05,
+ "loss": 0.4028,
+ "step": 3924
+ },
+ {
+ "epoch": 22.428571428571427,
+ "grad_norm": 102.31585693359375,
+ "learning_rate": 3.063492063492064e-05,
+ "loss": 0.3337,
+ "step": 3925
+ },
+ {
+ "epoch": 22.434285714285714,
+ "grad_norm": 182.3521270751953,
+ "learning_rate": 3.062857142857143e-05,
+ "loss": 0.3027,
+ "step": 3926
+ },
+ {
+ "epoch": 22.44,
+ "grad_norm": 70.28782653808594,
+ "learning_rate": 3.0622222222222224e-05,
+ "loss": 0.5345,
+ "step": 3927
+ },
+ {
+ "epoch": 22.445714285714285,
+ "grad_norm": 42.58792495727539,
+ "learning_rate": 3.061587301587302e-05,
+ "loss": 0.3515,
+ "step": 3928
+ },
+ {
+ "epoch": 22.451428571428572,
+ "grad_norm": 37.07721710205078,
+ "learning_rate": 3.060952380952381e-05,
+ "loss": 0.3843,
+ "step": 3929
+ },
+ {
+ "epoch": 22.457142857142856,
+ "grad_norm": 37.0380744934082,
+ "learning_rate": 3.060317460317461e-05,
+ "loss": 0.2544,
+ "step": 3930
+ },
+ {
+ "epoch": 22.462857142857143,
+ "grad_norm": 52.82063674926758,
+ "learning_rate": 3.0596825396825395e-05,
+ "loss": 0.2882,
+ "step": 3931
+ },
+ {
+ "epoch": 22.46857142857143,
+ "grad_norm": 52.426612854003906,
+ "learning_rate": 3.0590476190476194e-05,
+ "loss": 0.3228,
+ "step": 3932
+ },
+ {
+ "epoch": 22.474285714285713,
+ "grad_norm": 34.03739547729492,
+ "learning_rate": 3.058412698412699e-05,
+ "loss": 0.2281,
+ "step": 3933
+ },
+ {
+ "epoch": 22.48,
+ "grad_norm": 98.39401245117188,
+ "learning_rate": 3.057777777777778e-05,
+ "loss": 0.4055,
+ "step": 3934
+ },
+ {
+ "epoch": 22.485714285714284,
+ "grad_norm": 45.16268539428711,
+ "learning_rate": 3.057142857142857e-05,
+ "loss": 0.3098,
+ "step": 3935
+ },
+ {
+ "epoch": 22.49142857142857,
+ "grad_norm": 18.381845474243164,
+ "learning_rate": 3.056507936507937e-05,
+ "loss": 0.3035,
+ "step": 3936
+ },
+ {
+ "epoch": 22.497142857142858,
+ "grad_norm": 41.476539611816406,
+ "learning_rate": 3.055873015873016e-05,
+ "loss": 0.3262,
+ "step": 3937
+ },
+ {
+ "epoch": 22.502857142857142,
+ "grad_norm": 90.57671356201172,
+ "learning_rate": 3.055238095238096e-05,
+ "loss": 0.3315,
+ "step": 3938
+ },
+ {
+ "epoch": 22.50857142857143,
+ "grad_norm": 61.76097106933594,
+ "learning_rate": 3.054603174603175e-05,
+ "loss": 0.4332,
+ "step": 3939
+ },
+ {
+ "epoch": 22.514285714285712,
+ "grad_norm": 50.836605072021484,
+ "learning_rate": 3.053968253968254e-05,
+ "loss": 0.252,
+ "step": 3940
+ },
+ {
+ "epoch": 22.52,
+ "grad_norm": 245.84751892089844,
+ "learning_rate": 3.0533333333333335e-05,
+ "loss": 0.3185,
+ "step": 3941
+ },
+ {
+ "epoch": 22.525714285714287,
+ "grad_norm": 83.79264068603516,
+ "learning_rate": 3.052698412698413e-05,
+ "loss": 0.443,
+ "step": 3942
+ },
+ {
+ "epoch": 22.53142857142857,
+ "grad_norm": 84.7953872680664,
+ "learning_rate": 3.052063492063492e-05,
+ "loss": 0.3408,
+ "step": 3943
+ },
+ {
+ "epoch": 22.537142857142857,
+ "grad_norm": 57.39402770996094,
+ "learning_rate": 3.0514285714285716e-05,
+ "loss": 0.5653,
+ "step": 3944
+ },
+ {
+ "epoch": 22.542857142857144,
+ "grad_norm": 331.8702392578125,
+ "learning_rate": 3.050793650793651e-05,
+ "loss": 0.4169,
+ "step": 3945
+ },
+ {
+ "epoch": 22.548571428571428,
+ "grad_norm": 374.3634033203125,
+ "learning_rate": 3.0501587301587305e-05,
+ "loss": 0.3585,
+ "step": 3946
+ },
+ {
+ "epoch": 22.554285714285715,
+ "grad_norm": 77.23312377929688,
+ "learning_rate": 3.04952380952381e-05,
+ "loss": 0.4017,
+ "step": 3947
+ },
+ {
+ "epoch": 22.56,
+ "grad_norm": 34.115482330322266,
+ "learning_rate": 3.048888888888889e-05,
+ "loss": 0.2754,
+ "step": 3948
+ },
+ {
+ "epoch": 22.565714285714286,
+ "grad_norm": 390.7008056640625,
+ "learning_rate": 3.0482539682539686e-05,
+ "loss": 0.3646,
+ "step": 3949
+ },
+ {
+ "epoch": 22.571428571428573,
+ "grad_norm": 142.5330352783203,
+ "learning_rate": 3.0476190476190482e-05,
+ "loss": 0.3656,
+ "step": 3950
+ },
+ {
+ "epoch": 22.577142857142857,
+ "grad_norm": 308.1759948730469,
+ "learning_rate": 3.046984126984127e-05,
+ "loss": 0.4462,
+ "step": 3951
+ },
+ {
+ "epoch": 22.582857142857144,
+ "grad_norm": 48.4372444152832,
+ "learning_rate": 3.0463492063492067e-05,
+ "loss": 0.3191,
+ "step": 3952
+ },
+ {
+ "epoch": 22.588571428571427,
+ "grad_norm": 51.79970932006836,
+ "learning_rate": 3.0457142857142856e-05,
+ "loss": 0.3815,
+ "step": 3953
+ },
+ {
+ "epoch": 22.594285714285714,
+ "grad_norm": 85.287109375,
+ "learning_rate": 3.0450793650793652e-05,
+ "loss": 0.263,
+ "step": 3954
+ },
+ {
+ "epoch": 22.6,
+ "grad_norm": 50.578041076660156,
+ "learning_rate": 3.044444444444445e-05,
+ "loss": 0.4532,
+ "step": 3955
+ },
+ {
+ "epoch": 22.605714285714285,
+ "grad_norm": 45.909027099609375,
+ "learning_rate": 3.0438095238095238e-05,
+ "loss": 0.2848,
+ "step": 3956
+ },
+ {
+ "epoch": 22.611428571428572,
+ "grad_norm": 33.778865814208984,
+ "learning_rate": 3.0431746031746034e-05,
+ "loss": 0.3687,
+ "step": 3957
+ },
+ {
+ "epoch": 22.617142857142856,
+ "grad_norm": 30.09535789489746,
+ "learning_rate": 3.042539682539683e-05,
+ "loss": 0.3757,
+ "step": 3958
+ },
+ {
+ "epoch": 22.622857142857143,
+ "grad_norm": 27.8607120513916,
+ "learning_rate": 3.041904761904762e-05,
+ "loss": 0.3275,
+ "step": 3959
+ },
+ {
+ "epoch": 22.62857142857143,
+ "grad_norm": 187.89405822753906,
+ "learning_rate": 3.0412698412698415e-05,
+ "loss": 0.2528,
+ "step": 3960
+ },
+ {
+ "epoch": 22.634285714285713,
+ "grad_norm": 29.064250946044922,
+ "learning_rate": 3.040634920634921e-05,
+ "loss": 0.304,
+ "step": 3961
+ },
+ {
+ "epoch": 22.64,
+ "grad_norm": 40.578895568847656,
+ "learning_rate": 3.04e-05,
+ "loss": 0.3312,
+ "step": 3962
+ },
+ {
+ "epoch": 22.645714285714284,
+ "grad_norm": 56.08921432495117,
+ "learning_rate": 3.0393650793650796e-05,
+ "loss": 0.2642,
+ "step": 3963
+ },
+ {
+ "epoch": 22.65142857142857,
+ "grad_norm": 68.52364349365234,
+ "learning_rate": 3.0387301587301586e-05,
+ "loss": 0.2772,
+ "step": 3964
+ },
+ {
+ "epoch": 22.65714285714286,
+ "grad_norm": 26.927160263061523,
+ "learning_rate": 3.038095238095238e-05,
+ "loss": 0.2455,
+ "step": 3965
+ },
+ {
+ "epoch": 22.662857142857142,
+ "grad_norm": 28.958812713623047,
+ "learning_rate": 3.0374603174603178e-05,
+ "loss": 0.3878,
+ "step": 3966
+ },
+ {
+ "epoch": 22.66857142857143,
+ "grad_norm": 85.509765625,
+ "learning_rate": 3.0368253968253967e-05,
+ "loss": 0.3657,
+ "step": 3967
+ },
+ {
+ "epoch": 22.674285714285713,
+ "grad_norm": 59.687705993652344,
+ "learning_rate": 3.0361904761904763e-05,
+ "loss": 0.3652,
+ "step": 3968
+ },
+ {
+ "epoch": 22.68,
+ "grad_norm": 55.201026916503906,
+ "learning_rate": 3.035555555555556e-05,
+ "loss": 0.316,
+ "step": 3969
+ },
+ {
+ "epoch": 22.685714285714287,
+ "grad_norm": 177.66893005371094,
+ "learning_rate": 3.0349206349206348e-05,
+ "loss": 0.4029,
+ "step": 3970
+ },
+ {
+ "epoch": 22.69142857142857,
+ "grad_norm": 65.45475769042969,
+ "learning_rate": 3.0342857142857144e-05,
+ "loss": 0.395,
+ "step": 3971
+ },
+ {
+ "epoch": 22.697142857142858,
+ "grad_norm": 65.41983795166016,
+ "learning_rate": 3.0336507936507937e-05,
+ "loss": 0.2951,
+ "step": 3972
+ },
+ {
+ "epoch": 22.70285714285714,
+ "grad_norm": 27.596040725708008,
+ "learning_rate": 3.033015873015873e-05,
+ "loss": 0.3872,
+ "step": 3973
+ },
+ {
+ "epoch": 22.708571428571428,
+ "grad_norm": 66.64160919189453,
+ "learning_rate": 3.0323809523809526e-05,
+ "loss": 0.4295,
+ "step": 3974
+ },
+ {
+ "epoch": 22.714285714285715,
+ "grad_norm": 27.690946578979492,
+ "learning_rate": 3.0317460317460318e-05,
+ "loss": 0.3481,
+ "step": 3975
+ },
+ {
+ "epoch": 22.72,
+ "grad_norm": 24.905649185180664,
+ "learning_rate": 3.031111111111111e-05,
+ "loss": 0.2619,
+ "step": 3976
+ },
+ {
+ "epoch": 22.725714285714286,
+ "grad_norm": 50.19453048706055,
+ "learning_rate": 3.0304761904761907e-05,
+ "loss": 0.2237,
+ "step": 3977
+ },
+ {
+ "epoch": 22.731428571428573,
+ "grad_norm": 48.230037689208984,
+ "learning_rate": 3.02984126984127e-05,
+ "loss": 0.4105,
+ "step": 3978
+ },
+ {
+ "epoch": 22.737142857142857,
+ "grad_norm": 59.16750717163086,
+ "learning_rate": 3.0292063492063495e-05,
+ "loss": 0.2397,
+ "step": 3979
+ },
+ {
+ "epoch": 22.742857142857144,
+ "grad_norm": 41.24590301513672,
+ "learning_rate": 3.0285714285714288e-05,
+ "loss": 0.2296,
+ "step": 3980
+ },
+ {
+ "epoch": 22.748571428571427,
+ "grad_norm": 27.61407470703125,
+ "learning_rate": 3.027936507936508e-05,
+ "loss": 0.2562,
+ "step": 3981
+ },
+ {
+ "epoch": 22.754285714285714,
+ "grad_norm": 79.87749481201172,
+ "learning_rate": 3.0273015873015877e-05,
+ "loss": 0.2065,
+ "step": 3982
+ },
+ {
+ "epoch": 22.76,
+ "grad_norm": 28.355274200439453,
+ "learning_rate": 3.0266666666666666e-05,
+ "loss": 0.2726,
+ "step": 3983
+ },
+ {
+ "epoch": 22.765714285714285,
+ "grad_norm": 69.86193084716797,
+ "learning_rate": 3.0260317460317462e-05,
+ "loss": 0.3554,
+ "step": 3984
+ },
+ {
+ "epoch": 22.771428571428572,
+ "grad_norm": 69.75241088867188,
+ "learning_rate": 3.0253968253968258e-05,
+ "loss": 0.3991,
+ "step": 3985
+ },
+ {
+ "epoch": 22.777142857142856,
+ "grad_norm": 18.34147834777832,
+ "learning_rate": 3.0247619047619047e-05,
+ "loss": 0.2868,
+ "step": 3986
+ },
+ {
+ "epoch": 22.782857142857143,
+ "grad_norm": 54.24711608886719,
+ "learning_rate": 3.0241269841269843e-05,
+ "loss": 0.2794,
+ "step": 3987
+ },
+ {
+ "epoch": 22.78857142857143,
+ "grad_norm": 60.45413589477539,
+ "learning_rate": 3.023492063492064e-05,
+ "loss": 0.3577,
+ "step": 3988
+ },
+ {
+ "epoch": 22.794285714285714,
+ "grad_norm": 61.43553924560547,
+ "learning_rate": 3.022857142857143e-05,
+ "loss": 0.3516,
+ "step": 3989
+ },
+ {
+ "epoch": 22.8,
+ "grad_norm": 78.0551986694336,
+ "learning_rate": 3.0222222222222225e-05,
+ "loss": 0.2044,
+ "step": 3990
+ },
+ {
+ "epoch": 22.805714285714284,
+ "grad_norm": 50.30086135864258,
+ "learning_rate": 3.021587301587302e-05,
+ "loss": 0.2837,
+ "step": 3991
+ },
+ {
+ "epoch": 22.81142857142857,
+ "grad_norm": 44.484046936035156,
+ "learning_rate": 3.020952380952381e-05,
+ "loss": 0.3038,
+ "step": 3992
+ },
+ {
+ "epoch": 22.81714285714286,
+ "grad_norm": 162.52215576171875,
+ "learning_rate": 3.0203174603174606e-05,
+ "loss": 0.44,
+ "step": 3993
+ },
+ {
+ "epoch": 22.822857142857142,
+ "grad_norm": 87.21377563476562,
+ "learning_rate": 3.0196825396825395e-05,
+ "loss": 0.2657,
+ "step": 3994
+ },
+ {
+ "epoch": 22.82857142857143,
+ "grad_norm": 51.34219741821289,
+ "learning_rate": 3.019047619047619e-05,
+ "loss": 0.2638,
+ "step": 3995
+ },
+ {
+ "epoch": 22.834285714285713,
+ "grad_norm": 198.4999237060547,
+ "learning_rate": 3.0184126984126987e-05,
+ "loss": 0.386,
+ "step": 3996
+ },
+ {
+ "epoch": 22.84,
+ "grad_norm": 83.12291717529297,
+ "learning_rate": 3.0177777777777776e-05,
+ "loss": 0.4847,
+ "step": 3997
+ },
+ {
+ "epoch": 22.845714285714287,
+ "grad_norm": 130.7611846923828,
+ "learning_rate": 3.0171428571428572e-05,
+ "loss": 0.3893,
+ "step": 3998
+ },
+ {
+ "epoch": 22.85142857142857,
+ "grad_norm": 86.97884368896484,
+ "learning_rate": 3.016507936507937e-05,
+ "loss": 0.3274,
+ "step": 3999
+ },
+ {
+ "epoch": 22.857142857142858,
+ "grad_norm": 77.94489288330078,
+ "learning_rate": 3.0158730158730158e-05,
+ "loss": 0.3596,
+ "step": 4000
+ },
+ {
+ "epoch": 22.86285714285714,
+ "grad_norm": 86.53071594238281,
+ "learning_rate": 3.0152380952380954e-05,
+ "loss": 0.5372,
+ "step": 4001
+ },
+ {
+ "epoch": 22.86857142857143,
+ "grad_norm": 60.08096694946289,
+ "learning_rate": 3.014603174603175e-05,
+ "loss": 0.2962,
+ "step": 4002
+ },
+ {
+ "epoch": 22.874285714285715,
+ "grad_norm": 74.86428833007812,
+ "learning_rate": 3.013968253968254e-05,
+ "loss": 0.4112,
+ "step": 4003
+ },
+ {
+ "epoch": 22.88,
+ "grad_norm": 27.846323013305664,
+ "learning_rate": 3.0133333333333335e-05,
+ "loss": 0.3109,
+ "step": 4004
+ },
+ {
+ "epoch": 22.885714285714286,
+ "grad_norm": 40.8173713684082,
+ "learning_rate": 3.0126984126984124e-05,
+ "loss": 0.4139,
+ "step": 4005
+ },
+ {
+ "epoch": 22.89142857142857,
+ "grad_norm": 42.435585021972656,
+ "learning_rate": 3.012063492063492e-05,
+ "loss": 0.2861,
+ "step": 4006
+ },
+ {
+ "epoch": 22.897142857142857,
+ "grad_norm": 83.25191497802734,
+ "learning_rate": 3.0114285714285716e-05,
+ "loss": 0.3462,
+ "step": 4007
+ },
+ {
+ "epoch": 22.902857142857144,
+ "grad_norm": 45.708099365234375,
+ "learning_rate": 3.0107936507936506e-05,
+ "loss": 0.2537,
+ "step": 4008
+ },
+ {
+ "epoch": 22.908571428571427,
+ "grad_norm": 77.79122161865234,
+ "learning_rate": 3.01015873015873e-05,
+ "loss": 0.3789,
+ "step": 4009
+ },
+ {
+ "epoch": 22.914285714285715,
+ "grad_norm": 25.502317428588867,
+ "learning_rate": 3.0095238095238098e-05,
+ "loss": 0.3658,
+ "step": 4010
+ },
+ {
+ "epoch": 22.92,
+ "grad_norm": 37.519325256347656,
+ "learning_rate": 3.008888888888889e-05,
+ "loss": 0.422,
+ "step": 4011
+ },
+ {
+ "epoch": 22.925714285714285,
+ "grad_norm": 2517.453125,
+ "learning_rate": 3.0082539682539683e-05,
+ "loss": 0.2673,
+ "step": 4012
+ },
+ {
+ "epoch": 22.931428571428572,
+ "grad_norm": 110.73004150390625,
+ "learning_rate": 3.007619047619048e-05,
+ "loss": 0.2519,
+ "step": 4013
+ },
+ {
+ "epoch": 22.937142857142856,
+ "grad_norm": 97.83757019042969,
+ "learning_rate": 3.006984126984127e-05,
+ "loss": 0.2666,
+ "step": 4014
+ },
+ {
+ "epoch": 22.942857142857143,
+ "grad_norm": 37.127960205078125,
+ "learning_rate": 3.0063492063492064e-05,
+ "loss": 0.2609,
+ "step": 4015
+ },
+ {
+ "epoch": 22.94857142857143,
+ "grad_norm": 40.58960723876953,
+ "learning_rate": 3.0057142857142857e-05,
+ "loss": 0.3827,
+ "step": 4016
+ },
+ {
+ "epoch": 22.954285714285714,
+ "grad_norm": 50.95119094848633,
+ "learning_rate": 3.0050793650793653e-05,
+ "loss": 0.3445,
+ "step": 4017
+ },
+ {
+ "epoch": 22.96,
+ "grad_norm": 39.06296157836914,
+ "learning_rate": 3.004444444444445e-05,
+ "loss": 0.3327,
+ "step": 4018
+ },
+ {
+ "epoch": 22.965714285714284,
+ "grad_norm": 40.81393814086914,
+ "learning_rate": 3.0038095238095238e-05,
+ "loss": 0.3127,
+ "step": 4019
+ },
+ {
+ "epoch": 22.97142857142857,
+ "grad_norm": 156.51710510253906,
+ "learning_rate": 3.0031746031746034e-05,
+ "loss": 0.3226,
+ "step": 4020
+ },
+ {
+ "epoch": 22.97714285714286,
+ "grad_norm": 19.79698944091797,
+ "learning_rate": 3.002539682539683e-05,
+ "loss": 0.2417,
+ "step": 4021
+ },
+ {
+ "epoch": 22.982857142857142,
+ "grad_norm": 26.006929397583008,
+ "learning_rate": 3.001904761904762e-05,
+ "loss": 0.358,
+ "step": 4022
+ },
+ {
+ "epoch": 22.98857142857143,
+ "grad_norm": 72.33332824707031,
+ "learning_rate": 3.0012698412698415e-05,
+ "loss": 0.2902,
+ "step": 4023
+ },
+ {
+ "epoch": 22.994285714285713,
+ "grad_norm": 66.47919464111328,
+ "learning_rate": 3.000634920634921e-05,
+ "loss": 0.3277,
+ "step": 4024
+ },
+ {
+ "epoch": 23.0,
+ "grad_norm": 62.8366813659668,
+ "learning_rate": 3e-05,
+ "loss": 0.4013,
+ "step": 4025
+ },
+ {
+ "epoch": 23.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6572927832603455,
+ "eval_map": 0.9134,
+ "eval_map_50": 0.9638,
+ "eval_map_75": 0.946,
+ "eval_map_large": 0.9134,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9134,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7857,
+ "eval_mar_10": 0.9635,
+ "eval_mar_100": 0.9702,
+ "eval_mar_100_per_class": 0.9702,
+ "eval_mar_large": 0.9702,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 14.5071,
+ "eval_samples_per_second": 20.266,
+ "eval_steps_per_second": 2.55,
+ "step": 4025
+ },
+ {
+ "epoch": 23.005714285714287,
+ "grad_norm": 30.199420928955078,
+ "learning_rate": 2.9993650793650797e-05,
+ "loss": 0.3206,
+ "step": 4026
+ },
+ {
+ "epoch": 23.01142857142857,
+ "grad_norm": 75.57009887695312,
+ "learning_rate": 2.9987301587301586e-05,
+ "loss": 0.2991,
+ "step": 4027
+ },
+ {
+ "epoch": 23.017142857142858,
+ "grad_norm": 44.21976089477539,
+ "learning_rate": 2.9980952380952382e-05,
+ "loss": 0.286,
+ "step": 4028
+ },
+ {
+ "epoch": 23.02285714285714,
+ "grad_norm": 81.70066833496094,
+ "learning_rate": 2.9974603174603178e-05,
+ "loss": 0.2852,
+ "step": 4029
+ },
+ {
+ "epoch": 23.02857142857143,
+ "grad_norm": 50.564395904541016,
+ "learning_rate": 2.9968253968253967e-05,
+ "loss": 0.5235,
+ "step": 4030
+ },
+ {
+ "epoch": 23.034285714285716,
+ "grad_norm": 29.517118453979492,
+ "learning_rate": 2.9961904761904763e-05,
+ "loss": 0.3634,
+ "step": 4031
+ },
+ {
+ "epoch": 23.04,
+ "grad_norm": 48.26054382324219,
+ "learning_rate": 2.995555555555556e-05,
+ "loss": 0.32,
+ "step": 4032
+ },
+ {
+ "epoch": 23.045714285714286,
+ "grad_norm": 54.40578079223633,
+ "learning_rate": 2.994920634920635e-05,
+ "loss": 0.3863,
+ "step": 4033
+ },
+ {
+ "epoch": 23.05142857142857,
+ "grad_norm": 85.53402709960938,
+ "learning_rate": 2.9942857142857145e-05,
+ "loss": 0.2024,
+ "step": 4034
+ },
+ {
+ "epoch": 23.057142857142857,
+ "grad_norm": 315.6854553222656,
+ "learning_rate": 2.993650793650794e-05,
+ "loss": 0.2238,
+ "step": 4035
+ },
+ {
+ "epoch": 23.062857142857144,
+ "grad_norm": 48.27422332763672,
+ "learning_rate": 2.993015873015873e-05,
+ "loss": 0.3596,
+ "step": 4036
+ },
+ {
+ "epoch": 23.068571428571428,
+ "grad_norm": 33.01654815673828,
+ "learning_rate": 2.9923809523809526e-05,
+ "loss": 0.3188,
+ "step": 4037
+ },
+ {
+ "epoch": 23.074285714285715,
+ "grad_norm": 40.44371032714844,
+ "learning_rate": 2.9917460317460315e-05,
+ "loss": 0.3801,
+ "step": 4038
+ },
+ {
+ "epoch": 23.08,
+ "grad_norm": 49.689483642578125,
+ "learning_rate": 2.991111111111111e-05,
+ "loss": 0.4179,
+ "step": 4039
+ },
+ {
+ "epoch": 23.085714285714285,
+ "grad_norm": 67.72065734863281,
+ "learning_rate": 2.9904761904761907e-05,
+ "loss": 0.2863,
+ "step": 4040
+ },
+ {
+ "epoch": 23.091428571428573,
+ "grad_norm": 48.59077835083008,
+ "learning_rate": 2.9898412698412696e-05,
+ "loss": 0.3233,
+ "step": 4041
+ },
+ {
+ "epoch": 23.097142857142856,
+ "grad_norm": 62.68111038208008,
+ "learning_rate": 2.9892063492063493e-05,
+ "loss": 0.2945,
+ "step": 4042
+ },
+ {
+ "epoch": 23.102857142857143,
+ "grad_norm": 48.72473907470703,
+ "learning_rate": 2.988571428571429e-05,
+ "loss": 0.2933,
+ "step": 4043
+ },
+ {
+ "epoch": 23.10857142857143,
+ "grad_norm": 180.3035125732422,
+ "learning_rate": 2.9879365079365078e-05,
+ "loss": 0.3224,
+ "step": 4044
+ },
+ {
+ "epoch": 23.114285714285714,
+ "grad_norm": 122.58573150634766,
+ "learning_rate": 2.9873015873015874e-05,
+ "loss": 0.4185,
+ "step": 4045
+ },
+ {
+ "epoch": 23.12,
+ "grad_norm": 96.61558532714844,
+ "learning_rate": 2.986666666666667e-05,
+ "loss": 0.3958,
+ "step": 4046
+ },
+ {
+ "epoch": 23.125714285714285,
+ "grad_norm": 49.80635452270508,
+ "learning_rate": 2.986031746031746e-05,
+ "loss": 0.2921,
+ "step": 4047
+ },
+ {
+ "epoch": 23.13142857142857,
+ "grad_norm": 58.02566146850586,
+ "learning_rate": 2.9853968253968255e-05,
+ "loss": 0.2497,
+ "step": 4048
+ },
+ {
+ "epoch": 23.13714285714286,
+ "grad_norm": 22.018583297729492,
+ "learning_rate": 2.9847619047619048e-05,
+ "loss": 0.3285,
+ "step": 4049
+ },
+ {
+ "epoch": 23.142857142857142,
+ "grad_norm": 19.503503799438477,
+ "learning_rate": 2.9841269841269844e-05,
+ "loss": 0.2849,
+ "step": 4050
+ },
+ {
+ "epoch": 23.14857142857143,
+ "grad_norm": 250.73367309570312,
+ "learning_rate": 2.9834920634920636e-05,
+ "loss": 0.4356,
+ "step": 4051
+ },
+ {
+ "epoch": 23.154285714285713,
+ "grad_norm": 69.83318328857422,
+ "learning_rate": 2.982857142857143e-05,
+ "loss": 0.3187,
+ "step": 4052
+ },
+ {
+ "epoch": 23.16,
+ "grad_norm": 42.90557861328125,
+ "learning_rate": 2.9822222222222225e-05,
+ "loss": 0.278,
+ "step": 4053
+ },
+ {
+ "epoch": 23.165714285714287,
+ "grad_norm": 65.743408203125,
+ "learning_rate": 2.9815873015873018e-05,
+ "loss": 0.2933,
+ "step": 4054
+ },
+ {
+ "epoch": 23.17142857142857,
+ "grad_norm": 24.971521377563477,
+ "learning_rate": 2.980952380952381e-05,
+ "loss": 0.2419,
+ "step": 4055
+ },
+ {
+ "epoch": 23.177142857142858,
+ "grad_norm": 83.3399429321289,
+ "learning_rate": 2.9803174603174606e-05,
+ "loss": 0.3094,
+ "step": 4056
+ },
+ {
+ "epoch": 23.18285714285714,
+ "grad_norm": 56.092254638671875,
+ "learning_rate": 2.9796825396825402e-05,
+ "loss": 0.2357,
+ "step": 4057
+ },
+ {
+ "epoch": 23.18857142857143,
+ "grad_norm": 65.36167907714844,
+ "learning_rate": 2.979047619047619e-05,
+ "loss": 0.2966,
+ "step": 4058
+ },
+ {
+ "epoch": 23.194285714285716,
+ "grad_norm": 77.52240753173828,
+ "learning_rate": 2.9784126984126988e-05,
+ "loss": 0.254,
+ "step": 4059
+ },
+ {
+ "epoch": 23.2,
+ "grad_norm": 39.724891662597656,
+ "learning_rate": 2.9777777777777777e-05,
+ "loss": 0.28,
+ "step": 4060
+ },
+ {
+ "epoch": 23.205714285714286,
+ "grad_norm": 89.53458404541016,
+ "learning_rate": 2.9771428571428573e-05,
+ "loss": 0.3436,
+ "step": 4061
+ },
+ {
+ "epoch": 23.21142857142857,
+ "grad_norm": 33.58918762207031,
+ "learning_rate": 2.976507936507937e-05,
+ "loss": 0.2707,
+ "step": 4062
+ },
+ {
+ "epoch": 23.217142857142857,
+ "grad_norm": 99.25462341308594,
+ "learning_rate": 2.9758730158730158e-05,
+ "loss": 0.2268,
+ "step": 4063
+ },
+ {
+ "epoch": 23.222857142857144,
+ "grad_norm": 39.316444396972656,
+ "learning_rate": 2.9752380952380954e-05,
+ "loss": 0.34,
+ "step": 4064
+ },
+ {
+ "epoch": 23.228571428571428,
+ "grad_norm": 234.89569091796875,
+ "learning_rate": 2.974603174603175e-05,
+ "loss": 0.3746,
+ "step": 4065
+ },
+ {
+ "epoch": 23.234285714285715,
+ "grad_norm": 29.803430557250977,
+ "learning_rate": 2.973968253968254e-05,
+ "loss": 0.2864,
+ "step": 4066
+ },
+ {
+ "epoch": 23.24,
+ "grad_norm": 197.21961975097656,
+ "learning_rate": 2.9733333333333336e-05,
+ "loss": 0.2753,
+ "step": 4067
+ },
+ {
+ "epoch": 23.245714285714286,
+ "grad_norm": 34.24064254760742,
+ "learning_rate": 2.972698412698413e-05,
+ "loss": 0.2701,
+ "step": 4068
+ },
+ {
+ "epoch": 23.251428571428573,
+ "grad_norm": 31.376087188720703,
+ "learning_rate": 2.972063492063492e-05,
+ "loss": 0.3328,
+ "step": 4069
+ },
+ {
+ "epoch": 23.257142857142856,
+ "grad_norm": 44.968936920166016,
+ "learning_rate": 2.9714285714285717e-05,
+ "loss": 0.252,
+ "step": 4070
+ },
+ {
+ "epoch": 23.262857142857143,
+ "grad_norm": 90.7536392211914,
+ "learning_rate": 2.9707936507936506e-05,
+ "loss": 0.2172,
+ "step": 4071
+ },
+ {
+ "epoch": 23.268571428571427,
+ "grad_norm": 76.8214340209961,
+ "learning_rate": 2.9701587301587302e-05,
+ "loss": 0.2644,
+ "step": 4072
+ },
+ {
+ "epoch": 23.274285714285714,
+ "grad_norm": 49.46097946166992,
+ "learning_rate": 2.9695238095238098e-05,
+ "loss": 0.3203,
+ "step": 4073
+ },
+ {
+ "epoch": 23.28,
+ "grad_norm": 62.46669006347656,
+ "learning_rate": 2.9688888888888887e-05,
+ "loss": 0.3691,
+ "step": 4074
+ },
+ {
+ "epoch": 23.285714285714285,
+ "grad_norm": 68.3512191772461,
+ "learning_rate": 2.9682539682539683e-05,
+ "loss": 0.2607,
+ "step": 4075
+ },
+ {
+ "epoch": 23.291428571428572,
+ "grad_norm": 29.4173641204834,
+ "learning_rate": 2.967619047619048e-05,
+ "loss": 0.2843,
+ "step": 4076
+ },
+ {
+ "epoch": 23.29714285714286,
+ "grad_norm": 542.9811401367188,
+ "learning_rate": 2.966984126984127e-05,
+ "loss": 0.2393,
+ "step": 4077
+ },
+ {
+ "epoch": 23.302857142857142,
+ "grad_norm": 47.83937072753906,
+ "learning_rate": 2.9663492063492065e-05,
+ "loss": 0.3395,
+ "step": 4078
+ },
+ {
+ "epoch": 23.30857142857143,
+ "grad_norm": 18.514507293701172,
+ "learning_rate": 2.965714285714286e-05,
+ "loss": 0.2663,
+ "step": 4079
+ },
+ {
+ "epoch": 23.314285714285713,
+ "grad_norm": 69.58287048339844,
+ "learning_rate": 2.965079365079365e-05,
+ "loss": 0.3558,
+ "step": 4080
+ },
+ {
+ "epoch": 23.32,
+ "grad_norm": 37.93904495239258,
+ "learning_rate": 2.9644444444444446e-05,
+ "loss": 0.2761,
+ "step": 4081
+ },
+ {
+ "epoch": 23.325714285714287,
+ "grad_norm": 29.72590446472168,
+ "learning_rate": 2.963809523809524e-05,
+ "loss": 0.284,
+ "step": 4082
+ },
+ {
+ "epoch": 23.33142857142857,
+ "grad_norm": 90.10633087158203,
+ "learning_rate": 2.963174603174603e-05,
+ "loss": 0.3729,
+ "step": 4083
+ },
+ {
+ "epoch": 23.337142857142858,
+ "grad_norm": 32.455440521240234,
+ "learning_rate": 2.9625396825396827e-05,
+ "loss": 0.3237,
+ "step": 4084
+ },
+ {
+ "epoch": 23.34285714285714,
+ "grad_norm": 29.6805477142334,
+ "learning_rate": 2.961904761904762e-05,
+ "loss": 0.2234,
+ "step": 4085
+ },
+ {
+ "epoch": 23.34857142857143,
+ "grad_norm": 56.32411193847656,
+ "learning_rate": 2.9612698412698413e-05,
+ "loss": 0.3353,
+ "step": 4086
+ },
+ {
+ "epoch": 23.354285714285716,
+ "grad_norm": 50.53952407836914,
+ "learning_rate": 2.960634920634921e-05,
+ "loss": 0.2391,
+ "step": 4087
+ },
+ {
+ "epoch": 23.36,
+ "grad_norm": 33.48997116088867,
+ "learning_rate": 2.96e-05,
+ "loss": 0.2906,
+ "step": 4088
+ },
+ {
+ "epoch": 23.365714285714287,
+ "grad_norm": 37.64202117919922,
+ "learning_rate": 2.9593650793650797e-05,
+ "loss": 0.3691,
+ "step": 4089
+ },
+ {
+ "epoch": 23.37142857142857,
+ "grad_norm": 56.41489028930664,
+ "learning_rate": 2.958730158730159e-05,
+ "loss": 0.2553,
+ "step": 4090
+ },
+ {
+ "epoch": 23.377142857142857,
+ "grad_norm": 34.01242446899414,
+ "learning_rate": 2.9580952380952382e-05,
+ "loss": 0.3105,
+ "step": 4091
+ },
+ {
+ "epoch": 23.382857142857144,
+ "grad_norm": 282.2917785644531,
+ "learning_rate": 2.957460317460318e-05,
+ "loss": 0.3843,
+ "step": 4092
+ },
+ {
+ "epoch": 23.388571428571428,
+ "grad_norm": 82.39885711669922,
+ "learning_rate": 2.9568253968253968e-05,
+ "loss": 0.2232,
+ "step": 4093
+ },
+ {
+ "epoch": 23.394285714285715,
+ "grad_norm": 37.49563980102539,
+ "learning_rate": 2.9561904761904764e-05,
+ "loss": 0.274,
+ "step": 4094
+ },
+ {
+ "epoch": 23.4,
+ "grad_norm": 34.51573944091797,
+ "learning_rate": 2.955555555555556e-05,
+ "loss": 0.3468,
+ "step": 4095
+ },
+ {
+ "epoch": 23.405714285714286,
+ "grad_norm": 54.154293060302734,
+ "learning_rate": 2.954920634920635e-05,
+ "loss": 0.3724,
+ "step": 4096
+ },
+ {
+ "epoch": 23.411428571428573,
+ "grad_norm": 57.695987701416016,
+ "learning_rate": 2.9542857142857145e-05,
+ "loss": 0.3271,
+ "step": 4097
+ },
+ {
+ "epoch": 23.417142857142856,
+ "grad_norm": 68.31269836425781,
+ "learning_rate": 2.953650793650794e-05,
+ "loss": 0.2621,
+ "step": 4098
+ },
+ {
+ "epoch": 23.422857142857143,
+ "grad_norm": 103.53577423095703,
+ "learning_rate": 2.953015873015873e-05,
+ "loss": 0.3015,
+ "step": 4099
+ },
+ {
+ "epoch": 23.428571428571427,
+ "grad_norm": 453.0390930175781,
+ "learning_rate": 2.9523809523809526e-05,
+ "loss": 0.4082,
+ "step": 4100
+ },
+ {
+ "epoch": 23.434285714285714,
+ "grad_norm": 32.948158264160156,
+ "learning_rate": 2.9517460317460322e-05,
+ "loss": 0.3961,
+ "step": 4101
+ },
+ {
+ "epoch": 23.44,
+ "grad_norm": 54.90577697753906,
+ "learning_rate": 2.951111111111111e-05,
+ "loss": 0.1926,
+ "step": 4102
+ },
+ {
+ "epoch": 23.445714285714285,
+ "grad_norm": 66.09902954101562,
+ "learning_rate": 2.9504761904761908e-05,
+ "loss": 0.5602,
+ "step": 4103
+ },
+ {
+ "epoch": 23.451428571428572,
+ "grad_norm": 40.43222427368164,
+ "learning_rate": 2.9498412698412697e-05,
+ "loss": 0.1848,
+ "step": 4104
+ },
+ {
+ "epoch": 23.457142857142856,
+ "grad_norm": 35.24291229248047,
+ "learning_rate": 2.9492063492063493e-05,
+ "loss": 0.2214,
+ "step": 4105
+ },
+ {
+ "epoch": 23.462857142857143,
+ "grad_norm": 54.03268814086914,
+ "learning_rate": 2.948571428571429e-05,
+ "loss": 0.2811,
+ "step": 4106
+ },
+ {
+ "epoch": 23.46857142857143,
+ "grad_norm": 27.69357681274414,
+ "learning_rate": 2.9479365079365078e-05,
+ "loss": 0.3433,
+ "step": 4107
+ },
+ {
+ "epoch": 23.474285714285713,
+ "grad_norm": 116.31719970703125,
+ "learning_rate": 2.9473015873015874e-05,
+ "loss": 0.3817,
+ "step": 4108
+ },
+ {
+ "epoch": 23.48,
+ "grad_norm": 106.09890747070312,
+ "learning_rate": 2.946666666666667e-05,
+ "loss": 0.391,
+ "step": 4109
+ },
+ {
+ "epoch": 23.485714285714284,
+ "grad_norm": 30.089054107666016,
+ "learning_rate": 2.946031746031746e-05,
+ "loss": 0.3046,
+ "step": 4110
+ },
+ {
+ "epoch": 23.49142857142857,
+ "grad_norm": 73.0106201171875,
+ "learning_rate": 2.9453968253968256e-05,
+ "loss": 0.2402,
+ "step": 4111
+ },
+ {
+ "epoch": 23.497142857142858,
+ "grad_norm": 45.55817794799805,
+ "learning_rate": 2.944761904761905e-05,
+ "loss": 0.1807,
+ "step": 4112
+ },
+ {
+ "epoch": 23.502857142857142,
+ "grad_norm": 45.446956634521484,
+ "learning_rate": 2.944126984126984e-05,
+ "loss": 0.2182,
+ "step": 4113
+ },
+ {
+ "epoch": 23.50857142857143,
+ "grad_norm": 150.46876525878906,
+ "learning_rate": 2.9434920634920637e-05,
+ "loss": 0.328,
+ "step": 4114
+ },
+ {
+ "epoch": 23.514285714285712,
+ "grad_norm": 82.34252166748047,
+ "learning_rate": 2.9428571428571426e-05,
+ "loss": 0.2811,
+ "step": 4115
+ },
+ {
+ "epoch": 23.52,
+ "grad_norm": 36.25138473510742,
+ "learning_rate": 2.9422222222222222e-05,
+ "loss": 0.2981,
+ "step": 4116
+ },
+ {
+ "epoch": 23.525714285714287,
+ "grad_norm": 26.288358688354492,
+ "learning_rate": 2.9415873015873018e-05,
+ "loss": 0.3378,
+ "step": 4117
+ },
+ {
+ "epoch": 23.53142857142857,
+ "grad_norm": 83.09166717529297,
+ "learning_rate": 2.9409523809523807e-05,
+ "loss": 0.215,
+ "step": 4118
+ },
+ {
+ "epoch": 23.537142857142857,
+ "grad_norm": 35.448184967041016,
+ "learning_rate": 2.9403174603174603e-05,
+ "loss": 0.3778,
+ "step": 4119
+ },
+ {
+ "epoch": 23.542857142857144,
+ "grad_norm": 41.17313003540039,
+ "learning_rate": 2.93968253968254e-05,
+ "loss": 0.3844,
+ "step": 4120
+ },
+ {
+ "epoch": 23.548571428571428,
+ "grad_norm": 35.156951904296875,
+ "learning_rate": 2.9390476190476192e-05,
+ "loss": 0.3097,
+ "step": 4121
+ },
+ {
+ "epoch": 23.554285714285715,
+ "grad_norm": 39.88846969604492,
+ "learning_rate": 2.9384126984126985e-05,
+ "loss": 0.2651,
+ "step": 4122
+ },
+ {
+ "epoch": 23.56,
+ "grad_norm": 50.63812255859375,
+ "learning_rate": 2.937777777777778e-05,
+ "loss": 0.3026,
+ "step": 4123
+ },
+ {
+ "epoch": 23.565714285714286,
+ "grad_norm": 41.88031005859375,
+ "learning_rate": 2.9371428571428573e-05,
+ "loss": 0.243,
+ "step": 4124
+ },
+ {
+ "epoch": 23.571428571428573,
+ "grad_norm": 56.02881622314453,
+ "learning_rate": 2.9365079365079366e-05,
+ "loss": 0.28,
+ "step": 4125
+ },
+ {
+ "epoch": 23.577142857142857,
+ "grad_norm": 32.54145812988281,
+ "learning_rate": 2.935873015873016e-05,
+ "loss": 0.5015,
+ "step": 4126
+ },
+ {
+ "epoch": 23.582857142857144,
+ "grad_norm": 29.771968841552734,
+ "learning_rate": 2.9352380952380955e-05,
+ "loss": 0.2976,
+ "step": 4127
+ },
+ {
+ "epoch": 23.588571428571427,
+ "grad_norm": 62.9817008972168,
+ "learning_rate": 2.934603174603175e-05,
+ "loss": 0.3482,
+ "step": 4128
+ },
+ {
+ "epoch": 23.594285714285714,
+ "grad_norm": 78.29792022705078,
+ "learning_rate": 2.933968253968254e-05,
+ "loss": 0.2816,
+ "step": 4129
+ },
+ {
+ "epoch": 23.6,
+ "grad_norm": 68.27775573730469,
+ "learning_rate": 2.9333333333333336e-05,
+ "loss": 0.4067,
+ "step": 4130
+ },
+ {
+ "epoch": 23.605714285714285,
+ "grad_norm": 61.55348587036133,
+ "learning_rate": 2.9326984126984132e-05,
+ "loss": 0.2233,
+ "step": 4131
+ },
+ {
+ "epoch": 23.611428571428572,
+ "grad_norm": 52.80277633666992,
+ "learning_rate": 2.932063492063492e-05,
+ "loss": 0.3024,
+ "step": 4132
+ },
+ {
+ "epoch": 23.617142857142856,
+ "grad_norm": 58.86653137207031,
+ "learning_rate": 2.9314285714285717e-05,
+ "loss": 0.274,
+ "step": 4133
+ },
+ {
+ "epoch": 23.622857142857143,
+ "grad_norm": 74.1474838256836,
+ "learning_rate": 2.9307936507936513e-05,
+ "loss": 0.3088,
+ "step": 4134
+ },
+ {
+ "epoch": 23.62857142857143,
+ "grad_norm": 61.79986572265625,
+ "learning_rate": 2.9301587301587303e-05,
+ "loss": 0.3552,
+ "step": 4135
+ },
+ {
+ "epoch": 23.634285714285713,
+ "grad_norm": 74.90558624267578,
+ "learning_rate": 2.92952380952381e-05,
+ "loss": 0.3745,
+ "step": 4136
+ },
+ {
+ "epoch": 23.64,
+ "grad_norm": 46.57097244262695,
+ "learning_rate": 2.9288888888888888e-05,
+ "loss": 0.3227,
+ "step": 4137
+ },
+ {
+ "epoch": 23.645714285714284,
+ "grad_norm": 17.17082977294922,
+ "learning_rate": 2.9282539682539684e-05,
+ "loss": 0.2491,
+ "step": 4138
+ },
+ {
+ "epoch": 23.65142857142857,
+ "grad_norm": 49.79542541503906,
+ "learning_rate": 2.927619047619048e-05,
+ "loss": 0.3576,
+ "step": 4139
+ },
+ {
+ "epoch": 23.65714285714286,
+ "grad_norm": 26.572895050048828,
+ "learning_rate": 2.926984126984127e-05,
+ "loss": 0.2167,
+ "step": 4140
+ },
+ {
+ "epoch": 23.662857142857142,
+ "grad_norm": 252.93934631347656,
+ "learning_rate": 2.9263492063492065e-05,
+ "loss": 0.4041,
+ "step": 4141
+ },
+ {
+ "epoch": 23.66857142857143,
+ "grad_norm": 50.79920959472656,
+ "learning_rate": 2.925714285714286e-05,
+ "loss": 0.3337,
+ "step": 4142
+ },
+ {
+ "epoch": 23.674285714285713,
+ "grad_norm": 40.481346130371094,
+ "learning_rate": 2.925079365079365e-05,
+ "loss": 0.2801,
+ "step": 4143
+ },
+ {
+ "epoch": 23.68,
+ "grad_norm": 65.705078125,
+ "learning_rate": 2.9244444444444446e-05,
+ "loss": 0.3622,
+ "step": 4144
+ },
+ {
+ "epoch": 23.685714285714287,
+ "grad_norm": 92.2256088256836,
+ "learning_rate": 2.9238095238095242e-05,
+ "loss": 0.3038,
+ "step": 4145
+ },
+ {
+ "epoch": 23.69142857142857,
+ "grad_norm": 556.6972045898438,
+ "learning_rate": 2.923174603174603e-05,
+ "loss": 0.5332,
+ "step": 4146
+ },
+ {
+ "epoch": 23.697142857142858,
+ "grad_norm": 70.96257019042969,
+ "learning_rate": 2.9225396825396828e-05,
+ "loss": 0.4394,
+ "step": 4147
+ },
+ {
+ "epoch": 23.70285714285714,
+ "grad_norm": 86.98827362060547,
+ "learning_rate": 2.9219047619047617e-05,
+ "loss": 0.3198,
+ "step": 4148
+ },
+ {
+ "epoch": 23.708571428571428,
+ "grad_norm": 40.2372932434082,
+ "learning_rate": 2.9212698412698413e-05,
+ "loss": 0.2527,
+ "step": 4149
+ },
+ {
+ "epoch": 23.714285714285715,
+ "grad_norm": 36.18385314941406,
+ "learning_rate": 2.920634920634921e-05,
+ "loss": 0.235,
+ "step": 4150
+ },
+ {
+ "epoch": 23.72,
+ "grad_norm": 46.50616455078125,
+ "learning_rate": 2.9199999999999998e-05,
+ "loss": 0.324,
+ "step": 4151
+ },
+ {
+ "epoch": 23.725714285714286,
+ "grad_norm": 32.6524772644043,
+ "learning_rate": 2.9193650793650794e-05,
+ "loss": 0.275,
+ "step": 4152
+ },
+ {
+ "epoch": 23.731428571428573,
+ "grad_norm": 52.58832931518555,
+ "learning_rate": 2.918730158730159e-05,
+ "loss": 0.4361,
+ "step": 4153
+ },
+ {
+ "epoch": 23.737142857142857,
+ "grad_norm": 54.013526916503906,
+ "learning_rate": 2.918095238095238e-05,
+ "loss": 0.331,
+ "step": 4154
+ },
+ {
+ "epoch": 23.742857142857144,
+ "grad_norm": 96.2431411743164,
+ "learning_rate": 2.9174603174603176e-05,
+ "loss": 0.4995,
+ "step": 4155
+ },
+ {
+ "epoch": 23.748571428571427,
+ "grad_norm": 181.16592407226562,
+ "learning_rate": 2.916825396825397e-05,
+ "loss": 0.4459,
+ "step": 4156
+ },
+ {
+ "epoch": 23.754285714285714,
+ "grad_norm": 40.125282287597656,
+ "learning_rate": 2.916190476190476e-05,
+ "loss": 0.3106,
+ "step": 4157
+ },
+ {
+ "epoch": 23.76,
+ "grad_norm": 34.627464294433594,
+ "learning_rate": 2.9155555555555557e-05,
+ "loss": 0.457,
+ "step": 4158
+ },
+ {
+ "epoch": 23.765714285714285,
+ "grad_norm": 222.72634887695312,
+ "learning_rate": 2.914920634920635e-05,
+ "loss": 0.3446,
+ "step": 4159
+ },
+ {
+ "epoch": 23.771428571428572,
+ "grad_norm": 47.79024887084961,
+ "learning_rate": 2.9142857142857146e-05,
+ "loss": 0.4671,
+ "step": 4160
+ },
+ {
+ "epoch": 23.777142857142856,
+ "grad_norm": 29.11241912841797,
+ "learning_rate": 2.9136507936507938e-05,
+ "loss": 0.2407,
+ "step": 4161
+ },
+ {
+ "epoch": 23.782857142857143,
+ "grad_norm": 301.6238098144531,
+ "learning_rate": 2.913015873015873e-05,
+ "loss": 0.2996,
+ "step": 4162
+ },
+ {
+ "epoch": 23.78857142857143,
+ "grad_norm": 58.332515716552734,
+ "learning_rate": 2.9123809523809527e-05,
+ "loss": 0.3433,
+ "step": 4163
+ },
+ {
+ "epoch": 23.794285714285714,
+ "grad_norm": 27.199840545654297,
+ "learning_rate": 2.911746031746032e-05,
+ "loss": 0.2256,
+ "step": 4164
+ },
+ {
+ "epoch": 23.8,
+ "grad_norm": 72.84584045410156,
+ "learning_rate": 2.9111111111111112e-05,
+ "loss": 0.3094,
+ "step": 4165
+ },
+ {
+ "epoch": 23.805714285714284,
+ "grad_norm": 49.55764389038086,
+ "learning_rate": 2.9104761904761908e-05,
+ "loss": 0.3032,
+ "step": 4166
+ },
+ {
+ "epoch": 23.81142857142857,
+ "grad_norm": 43.26686477661133,
+ "learning_rate": 2.9098412698412704e-05,
+ "loss": 0.5507,
+ "step": 4167
+ },
+ {
+ "epoch": 23.81714285714286,
+ "grad_norm": 53.073814392089844,
+ "learning_rate": 2.9092063492063493e-05,
+ "loss": 0.1837,
+ "step": 4168
+ },
+ {
+ "epoch": 23.822857142857142,
+ "grad_norm": 36.02912139892578,
+ "learning_rate": 2.908571428571429e-05,
+ "loss": 0.2756,
+ "step": 4169
+ },
+ {
+ "epoch": 23.82857142857143,
+ "grad_norm": 61.297542572021484,
+ "learning_rate": 2.907936507936508e-05,
+ "loss": 0.2412,
+ "step": 4170
+ },
+ {
+ "epoch": 23.834285714285713,
+ "grad_norm": 40.57884216308594,
+ "learning_rate": 2.9073015873015875e-05,
+ "loss": 0.3359,
+ "step": 4171
+ },
+ {
+ "epoch": 23.84,
+ "grad_norm": 39.376338958740234,
+ "learning_rate": 2.906666666666667e-05,
+ "loss": 0.2095,
+ "step": 4172
+ },
+ {
+ "epoch": 23.845714285714287,
+ "grad_norm": 42.68909454345703,
+ "learning_rate": 2.906031746031746e-05,
+ "loss": 0.3273,
+ "step": 4173
+ },
+ {
+ "epoch": 23.85142857142857,
+ "grad_norm": 30.186763763427734,
+ "learning_rate": 2.9053968253968256e-05,
+ "loss": 0.2665,
+ "step": 4174
+ },
+ {
+ "epoch": 23.857142857142858,
+ "grad_norm": 33.17063522338867,
+ "learning_rate": 2.9047619047619052e-05,
+ "loss": 0.2003,
+ "step": 4175
+ },
+ {
+ "epoch": 23.86285714285714,
+ "grad_norm": 36.41645050048828,
+ "learning_rate": 2.904126984126984e-05,
+ "loss": 0.2562,
+ "step": 4176
+ },
+ {
+ "epoch": 23.86857142857143,
+ "grad_norm": 366.9606628417969,
+ "learning_rate": 2.9034920634920637e-05,
+ "loss": 0.4117,
+ "step": 4177
+ },
+ {
+ "epoch": 23.874285714285715,
+ "grad_norm": 58.57237243652344,
+ "learning_rate": 2.9028571428571427e-05,
+ "loss": 0.3746,
+ "step": 4178
+ },
+ {
+ "epoch": 23.88,
+ "grad_norm": 51.41328811645508,
+ "learning_rate": 2.9022222222222223e-05,
+ "loss": 0.4567,
+ "step": 4179
+ },
+ {
+ "epoch": 23.885714285714286,
+ "grad_norm": 43.09410858154297,
+ "learning_rate": 2.901587301587302e-05,
+ "loss": 0.202,
+ "step": 4180
+ },
+ {
+ "epoch": 23.89142857142857,
+ "grad_norm": 122.54499053955078,
+ "learning_rate": 2.9009523809523808e-05,
+ "loss": 0.3366,
+ "step": 4181
+ },
+ {
+ "epoch": 23.897142857142857,
+ "grad_norm": 58.41392517089844,
+ "learning_rate": 2.9003174603174604e-05,
+ "loss": 0.2232,
+ "step": 4182
+ },
+ {
+ "epoch": 23.902857142857144,
+ "grad_norm": 61.96669387817383,
+ "learning_rate": 2.89968253968254e-05,
+ "loss": 0.325,
+ "step": 4183
+ },
+ {
+ "epoch": 23.908571428571427,
+ "grad_norm": 50.98522186279297,
+ "learning_rate": 2.899047619047619e-05,
+ "loss": 0.255,
+ "step": 4184
+ },
+ {
+ "epoch": 23.914285714285715,
+ "grad_norm": 83.81291961669922,
+ "learning_rate": 2.8984126984126985e-05,
+ "loss": 0.3386,
+ "step": 4185
+ },
+ {
+ "epoch": 23.92,
+ "grad_norm": 13.118070602416992,
+ "learning_rate": 2.897777777777778e-05,
+ "loss": 0.4992,
+ "step": 4186
+ },
+ {
+ "epoch": 23.925714285714285,
+ "grad_norm": 237.41595458984375,
+ "learning_rate": 2.897142857142857e-05,
+ "loss": 0.3331,
+ "step": 4187
+ },
+ {
+ "epoch": 23.931428571428572,
+ "grad_norm": 66.728759765625,
+ "learning_rate": 2.8965079365079366e-05,
+ "loss": 0.2556,
+ "step": 4188
+ },
+ {
+ "epoch": 23.937142857142856,
+ "grad_norm": 61.510650634765625,
+ "learning_rate": 2.8958730158730156e-05,
+ "loss": 0.3102,
+ "step": 4189
+ },
+ {
+ "epoch": 23.942857142857143,
+ "grad_norm": 38.73003387451172,
+ "learning_rate": 2.8952380952380952e-05,
+ "loss": 0.2483,
+ "step": 4190
+ },
+ {
+ "epoch": 23.94857142857143,
+ "grad_norm": 59.62195587158203,
+ "learning_rate": 2.8946031746031748e-05,
+ "loss": 0.4213,
+ "step": 4191
+ },
+ {
+ "epoch": 23.954285714285714,
+ "grad_norm": 42.30493927001953,
+ "learning_rate": 2.893968253968254e-05,
+ "loss": 0.2779,
+ "step": 4192
+ },
+ {
+ "epoch": 23.96,
+ "grad_norm": 94.05848693847656,
+ "learning_rate": 2.8933333333333333e-05,
+ "loss": 0.3609,
+ "step": 4193
+ },
+ {
+ "epoch": 23.965714285714284,
+ "grad_norm": 28.656415939331055,
+ "learning_rate": 2.892698412698413e-05,
+ "loss": 0.3449,
+ "step": 4194
+ },
+ {
+ "epoch": 23.97142857142857,
+ "grad_norm": 41.065467834472656,
+ "learning_rate": 2.892063492063492e-05,
+ "loss": 0.2239,
+ "step": 4195
+ },
+ {
+ "epoch": 23.97714285714286,
+ "grad_norm": 50.81985855102539,
+ "learning_rate": 2.8914285714285714e-05,
+ "loss": 0.3053,
+ "step": 4196
+ },
+ {
+ "epoch": 23.982857142857142,
+ "grad_norm": 91.52791595458984,
+ "learning_rate": 2.890793650793651e-05,
+ "loss": 0.3412,
+ "step": 4197
+ },
+ {
+ "epoch": 23.98857142857143,
+ "grad_norm": 142.12220764160156,
+ "learning_rate": 2.8901587301587303e-05,
+ "loss": 0.4178,
+ "step": 4198
+ },
+ {
+ "epoch": 23.994285714285713,
+ "grad_norm": 54.66435623168945,
+ "learning_rate": 2.88952380952381e-05,
+ "loss": 0.4585,
+ "step": 4199
+ },
+ {
+ "epoch": 24.0,
+ "grad_norm": 37.650115966796875,
+ "learning_rate": 2.8888888888888888e-05,
+ "loss": 0.381,
+ "step": 4200
+ },
+ {
+ "epoch": 24.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6580873727798462,
+ "eval_map": 0.9096,
+ "eval_map_50": 0.9685,
+ "eval_map_75": 0.9554,
+ "eval_map_large": 0.9099,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9096,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7806,
+ "eval_mar_10": 0.9578,
+ "eval_mar_100": 0.9641,
+ "eval_mar_100_per_class": 0.9641,
+ "eval_mar_large": 0.9641,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.4963,
+ "eval_samples_per_second": 21.784,
+ "eval_steps_per_second": 2.741,
+ "step": 4200
+ },
+ {
+ "epoch": 24.005714285714287,
+ "grad_norm": 27.211124420166016,
+ "learning_rate": 2.8882539682539684e-05,
+ "loss": 0.3187,
+ "step": 4201
+ },
+ {
+ "epoch": 24.01142857142857,
+ "grad_norm": 55.33848571777344,
+ "learning_rate": 2.887619047619048e-05,
+ "loss": 0.4081,
+ "step": 4202
+ },
+ {
+ "epoch": 24.017142857142858,
+ "grad_norm": 69.4387435913086,
+ "learning_rate": 2.886984126984127e-05,
+ "loss": 0.4005,
+ "step": 4203
+ },
+ {
+ "epoch": 24.02285714285714,
+ "grad_norm": 166.2154541015625,
+ "learning_rate": 2.8863492063492066e-05,
+ "loss": 0.3173,
+ "step": 4204
+ },
+ {
+ "epoch": 24.02857142857143,
+ "grad_norm": 25.793365478515625,
+ "learning_rate": 2.885714285714286e-05,
+ "loss": 0.4004,
+ "step": 4205
+ },
+ {
+ "epoch": 24.034285714285716,
+ "grad_norm": 22.807405471801758,
+ "learning_rate": 2.885079365079365e-05,
+ "loss": 0.2878,
+ "step": 4206
+ },
+ {
+ "epoch": 24.04,
+ "grad_norm": 27.175668716430664,
+ "learning_rate": 2.8844444444444447e-05,
+ "loss": 0.4303,
+ "step": 4207
+ },
+ {
+ "epoch": 24.045714285714286,
+ "grad_norm": 404.7174072265625,
+ "learning_rate": 2.8838095238095243e-05,
+ "loss": 0.364,
+ "step": 4208
+ },
+ {
+ "epoch": 24.05142857142857,
+ "grad_norm": 34.13227081298828,
+ "learning_rate": 2.8831746031746032e-05,
+ "loss": 0.5157,
+ "step": 4209
+ },
+ {
+ "epoch": 24.057142857142857,
+ "grad_norm": 48.720848083496094,
+ "learning_rate": 2.8825396825396828e-05,
+ "loss": 0.3891,
+ "step": 4210
+ },
+ {
+ "epoch": 24.062857142857144,
+ "grad_norm": 57.76936340332031,
+ "learning_rate": 2.8819047619047617e-05,
+ "loss": 0.408,
+ "step": 4211
+ },
+ {
+ "epoch": 24.068571428571428,
+ "grad_norm": 89.69650268554688,
+ "learning_rate": 2.8812698412698413e-05,
+ "loss": 0.377,
+ "step": 4212
+ },
+ {
+ "epoch": 24.074285714285715,
+ "grad_norm": 56.81898498535156,
+ "learning_rate": 2.880634920634921e-05,
+ "loss": 0.3412,
+ "step": 4213
+ },
+ {
+ "epoch": 24.08,
+ "grad_norm": 23.175161361694336,
+ "learning_rate": 2.88e-05,
+ "loss": 0.3675,
+ "step": 4214
+ },
+ {
+ "epoch": 24.085714285714285,
+ "grad_norm": 55.028072357177734,
+ "learning_rate": 2.8793650793650795e-05,
+ "loss": 0.3423,
+ "step": 4215
+ },
+ {
+ "epoch": 24.091428571428573,
+ "grad_norm": 21.8044376373291,
+ "learning_rate": 2.878730158730159e-05,
+ "loss": 0.3916,
+ "step": 4216
+ },
+ {
+ "epoch": 24.097142857142856,
+ "grad_norm": 32.57919692993164,
+ "learning_rate": 2.878095238095238e-05,
+ "loss": 0.3125,
+ "step": 4217
+ },
+ {
+ "epoch": 24.102857142857143,
+ "grad_norm": 58.032657623291016,
+ "learning_rate": 2.8774603174603176e-05,
+ "loss": 0.2729,
+ "step": 4218
+ },
+ {
+ "epoch": 24.10857142857143,
+ "grad_norm": 138.78158569335938,
+ "learning_rate": 2.8768253968253972e-05,
+ "loss": 0.2627,
+ "step": 4219
+ },
+ {
+ "epoch": 24.114285714285714,
+ "grad_norm": 61.15944290161133,
+ "learning_rate": 2.876190476190476e-05,
+ "loss": 0.3373,
+ "step": 4220
+ },
+ {
+ "epoch": 24.12,
+ "grad_norm": 32.430580139160156,
+ "learning_rate": 2.8755555555555557e-05,
+ "loss": 0.3092,
+ "step": 4221
+ },
+ {
+ "epoch": 24.125714285714285,
+ "grad_norm": 86.44486236572266,
+ "learning_rate": 2.8749206349206347e-05,
+ "loss": 0.2722,
+ "step": 4222
+ },
+ {
+ "epoch": 24.13142857142857,
+ "grad_norm": 20.431638717651367,
+ "learning_rate": 2.8742857142857143e-05,
+ "loss": 0.2748,
+ "step": 4223
+ },
+ {
+ "epoch": 24.13714285714286,
+ "grad_norm": 194.9654998779297,
+ "learning_rate": 2.873650793650794e-05,
+ "loss": 0.3542,
+ "step": 4224
+ },
+ {
+ "epoch": 24.142857142857142,
+ "grad_norm": 189.65545654296875,
+ "learning_rate": 2.8730158730158728e-05,
+ "loss": 0.2563,
+ "step": 4225
+ },
+ {
+ "epoch": 24.14857142857143,
+ "grad_norm": 53.960079193115234,
+ "learning_rate": 2.8723809523809524e-05,
+ "loss": 0.3252,
+ "step": 4226
+ },
+ {
+ "epoch": 24.154285714285713,
+ "grad_norm": 25.061969757080078,
+ "learning_rate": 2.871746031746032e-05,
+ "loss": 0.3335,
+ "step": 4227
+ },
+ {
+ "epoch": 24.16,
+ "grad_norm": 113.84751892089844,
+ "learning_rate": 2.8711111111111113e-05,
+ "loss": 0.3427,
+ "step": 4228
+ },
+ {
+ "epoch": 24.165714285714287,
+ "grad_norm": 22.992420196533203,
+ "learning_rate": 2.8704761904761905e-05,
+ "loss": 0.4027,
+ "step": 4229
+ },
+ {
+ "epoch": 24.17142857142857,
+ "grad_norm": 33.864959716796875,
+ "learning_rate": 2.86984126984127e-05,
+ "loss": 0.36,
+ "step": 4230
+ },
+ {
+ "epoch": 24.177142857142858,
+ "grad_norm": 208.66458129882812,
+ "learning_rate": 2.8692063492063494e-05,
+ "loss": 0.413,
+ "step": 4231
+ },
+ {
+ "epoch": 24.18285714285714,
+ "grad_norm": 47.48231887817383,
+ "learning_rate": 2.8685714285714286e-05,
+ "loss": 0.2793,
+ "step": 4232
+ },
+ {
+ "epoch": 24.18857142857143,
+ "grad_norm": 62.76450729370117,
+ "learning_rate": 2.867936507936508e-05,
+ "loss": 0.1819,
+ "step": 4233
+ },
+ {
+ "epoch": 24.194285714285716,
+ "grad_norm": 55.089698791503906,
+ "learning_rate": 2.8673015873015875e-05,
+ "loss": 0.2865,
+ "step": 4234
+ },
+ {
+ "epoch": 24.2,
+ "grad_norm": 143.07089233398438,
+ "learning_rate": 2.8666666666666668e-05,
+ "loss": 0.3126,
+ "step": 4235
+ },
+ {
+ "epoch": 24.205714285714286,
+ "grad_norm": 45.78302764892578,
+ "learning_rate": 2.866031746031746e-05,
+ "loss": 0.2982,
+ "step": 4236
+ },
+ {
+ "epoch": 24.21142857142857,
+ "grad_norm": 104.21949005126953,
+ "learning_rate": 2.8653968253968256e-05,
+ "loss": 0.4371,
+ "step": 4237
+ },
+ {
+ "epoch": 24.217142857142857,
+ "grad_norm": 41.76313018798828,
+ "learning_rate": 2.8647619047619052e-05,
+ "loss": 0.2361,
+ "step": 4238
+ },
+ {
+ "epoch": 24.222857142857144,
+ "grad_norm": 37.7957649230957,
+ "learning_rate": 2.8641269841269842e-05,
+ "loss": 0.3493,
+ "step": 4239
+ },
+ {
+ "epoch": 24.228571428571428,
+ "grad_norm": 61.35261917114258,
+ "learning_rate": 2.8634920634920638e-05,
+ "loss": 0.3329,
+ "step": 4240
+ },
+ {
+ "epoch": 24.234285714285715,
+ "grad_norm": 42.222930908203125,
+ "learning_rate": 2.8628571428571434e-05,
+ "loss": 0.267,
+ "step": 4241
+ },
+ {
+ "epoch": 24.24,
+ "grad_norm": 25.356447219848633,
+ "learning_rate": 2.8622222222222223e-05,
+ "loss": 0.3173,
+ "step": 4242
+ },
+ {
+ "epoch": 24.245714285714286,
+ "grad_norm": 29.31715202331543,
+ "learning_rate": 2.861587301587302e-05,
+ "loss": 0.2556,
+ "step": 4243
+ },
+ {
+ "epoch": 24.251428571428573,
+ "grad_norm": 78.12251281738281,
+ "learning_rate": 2.8609523809523808e-05,
+ "loss": 0.2284,
+ "step": 4244
+ },
+ {
+ "epoch": 24.257142857142856,
+ "grad_norm": 38.94855880737305,
+ "learning_rate": 2.8603174603174604e-05,
+ "loss": 0.2179,
+ "step": 4245
+ },
+ {
+ "epoch": 24.262857142857143,
+ "grad_norm": 28.624399185180664,
+ "learning_rate": 2.85968253968254e-05,
+ "loss": 0.4882,
+ "step": 4246
+ },
+ {
+ "epoch": 24.268571428571427,
+ "grad_norm": 60.395606994628906,
+ "learning_rate": 2.859047619047619e-05,
+ "loss": 0.2819,
+ "step": 4247
+ },
+ {
+ "epoch": 24.274285714285714,
+ "grad_norm": 42.161983489990234,
+ "learning_rate": 2.8584126984126986e-05,
+ "loss": 0.2336,
+ "step": 4248
+ },
+ {
+ "epoch": 24.28,
+ "grad_norm": 45.44225311279297,
+ "learning_rate": 2.857777777777778e-05,
+ "loss": 0.3845,
+ "step": 4249
+ },
+ {
+ "epoch": 24.285714285714285,
+ "grad_norm": 54.161983489990234,
+ "learning_rate": 2.857142857142857e-05,
+ "loss": 0.2777,
+ "step": 4250
+ },
+ {
+ "epoch": 24.291428571428572,
+ "grad_norm": 213.60040283203125,
+ "learning_rate": 2.8565079365079367e-05,
+ "loss": 0.4929,
+ "step": 4251
+ },
+ {
+ "epoch": 24.29714285714286,
+ "grad_norm": 61.110660552978516,
+ "learning_rate": 2.8558730158730163e-05,
+ "loss": 0.3036,
+ "step": 4252
+ },
+ {
+ "epoch": 24.302857142857142,
+ "grad_norm": 38.8966064453125,
+ "learning_rate": 2.8552380952380952e-05,
+ "loss": 0.3094,
+ "step": 4253
+ },
+ {
+ "epoch": 24.30857142857143,
+ "grad_norm": 62.35374450683594,
+ "learning_rate": 2.8546031746031748e-05,
+ "loss": 0.3099,
+ "step": 4254
+ },
+ {
+ "epoch": 24.314285714285713,
+ "grad_norm": 96.93309020996094,
+ "learning_rate": 2.8539682539682537e-05,
+ "loss": 0.3475,
+ "step": 4255
+ },
+ {
+ "epoch": 24.32,
+ "grad_norm": 37.66231918334961,
+ "learning_rate": 2.8533333333333333e-05,
+ "loss": 0.3573,
+ "step": 4256
+ },
+ {
+ "epoch": 24.325714285714287,
+ "grad_norm": 51.56706237792969,
+ "learning_rate": 2.852698412698413e-05,
+ "loss": 0.2146,
+ "step": 4257
+ },
+ {
+ "epoch": 24.33142857142857,
+ "grad_norm": 20.347219467163086,
+ "learning_rate": 2.852063492063492e-05,
+ "loss": 0.2294,
+ "step": 4258
+ },
+ {
+ "epoch": 24.337142857142858,
+ "grad_norm": 46.085289001464844,
+ "learning_rate": 2.8514285714285715e-05,
+ "loss": 0.2955,
+ "step": 4259
+ },
+ {
+ "epoch": 24.34285714285714,
+ "grad_norm": 45.59817123413086,
+ "learning_rate": 2.850793650793651e-05,
+ "loss": 0.2426,
+ "step": 4260
+ },
+ {
+ "epoch": 24.34857142857143,
+ "grad_norm": 48.673213958740234,
+ "learning_rate": 2.85015873015873e-05,
+ "loss": 0.2566,
+ "step": 4261
+ },
+ {
+ "epoch": 24.354285714285716,
+ "grad_norm": 44.20500564575195,
+ "learning_rate": 2.8495238095238096e-05,
+ "loss": 0.2162,
+ "step": 4262
+ },
+ {
+ "epoch": 24.36,
+ "grad_norm": 33.636749267578125,
+ "learning_rate": 2.8488888888888892e-05,
+ "loss": 0.4338,
+ "step": 4263
+ },
+ {
+ "epoch": 24.365714285714287,
+ "grad_norm": 57.70370101928711,
+ "learning_rate": 2.848253968253968e-05,
+ "loss": 0.3726,
+ "step": 4264
+ },
+ {
+ "epoch": 24.37142857142857,
+ "grad_norm": 34.58115768432617,
+ "learning_rate": 2.8476190476190477e-05,
+ "loss": 0.2692,
+ "step": 4265
+ },
+ {
+ "epoch": 24.377142857142857,
+ "grad_norm": 140.1950225830078,
+ "learning_rate": 2.846984126984127e-05,
+ "loss": 0.2758,
+ "step": 4266
+ },
+ {
+ "epoch": 24.382857142857144,
+ "grad_norm": 24.006532669067383,
+ "learning_rate": 2.8463492063492066e-05,
+ "loss": 0.1841,
+ "step": 4267
+ },
+ {
+ "epoch": 24.388571428571428,
+ "grad_norm": 26.133983612060547,
+ "learning_rate": 2.845714285714286e-05,
+ "loss": 0.28,
+ "step": 4268
+ },
+ {
+ "epoch": 24.394285714285715,
+ "grad_norm": 42.2238655090332,
+ "learning_rate": 2.845079365079365e-05,
+ "loss": 0.1865,
+ "step": 4269
+ },
+ {
+ "epoch": 24.4,
+ "grad_norm": 29.878080368041992,
+ "learning_rate": 2.8444444444444447e-05,
+ "loss": 0.4168,
+ "step": 4270
+ },
+ {
+ "epoch": 24.405714285714286,
+ "grad_norm": 35.7902946472168,
+ "learning_rate": 2.843809523809524e-05,
+ "loss": 0.2483,
+ "step": 4271
+ },
+ {
+ "epoch": 24.411428571428573,
+ "grad_norm": 70.40202331542969,
+ "learning_rate": 2.8431746031746033e-05,
+ "loss": 0.5446,
+ "step": 4272
+ },
+ {
+ "epoch": 24.417142857142856,
+ "grad_norm": 48.534523010253906,
+ "learning_rate": 2.842539682539683e-05,
+ "loss": 0.3008,
+ "step": 4273
+ },
+ {
+ "epoch": 24.422857142857143,
+ "grad_norm": 106.88329315185547,
+ "learning_rate": 2.841904761904762e-05,
+ "loss": 0.2192,
+ "step": 4274
+ },
+ {
+ "epoch": 24.428571428571427,
+ "grad_norm": 33.87653732299805,
+ "learning_rate": 2.8412698412698414e-05,
+ "loss": 0.2836,
+ "step": 4275
+ },
+ {
+ "epoch": 24.434285714285714,
+ "grad_norm": 70.60962677001953,
+ "learning_rate": 2.840634920634921e-05,
+ "loss": 0.3215,
+ "step": 4276
+ },
+ {
+ "epoch": 24.44,
+ "grad_norm": 69.09635162353516,
+ "learning_rate": 2.84e-05,
+ "loss": 0.2381,
+ "step": 4277
+ },
+ {
+ "epoch": 24.445714285714285,
+ "grad_norm": 28.32038116455078,
+ "learning_rate": 2.8393650793650795e-05,
+ "loss": 0.3397,
+ "step": 4278
+ },
+ {
+ "epoch": 24.451428571428572,
+ "grad_norm": 70.71125030517578,
+ "learning_rate": 2.838730158730159e-05,
+ "loss": 0.5062,
+ "step": 4279
+ },
+ {
+ "epoch": 24.457142857142856,
+ "grad_norm": 48.60917282104492,
+ "learning_rate": 2.838095238095238e-05,
+ "loss": 0.3211,
+ "step": 4280
+ },
+ {
+ "epoch": 24.462857142857143,
+ "grad_norm": 312.99127197265625,
+ "learning_rate": 2.8374603174603176e-05,
+ "loss": 0.238,
+ "step": 4281
+ },
+ {
+ "epoch": 24.46857142857143,
+ "grad_norm": 30.63831901550293,
+ "learning_rate": 2.8368253968253972e-05,
+ "loss": 0.3494,
+ "step": 4282
+ },
+ {
+ "epoch": 24.474285714285713,
+ "grad_norm": 78.44001770019531,
+ "learning_rate": 2.8361904761904762e-05,
+ "loss": 0.2799,
+ "step": 4283
+ },
+ {
+ "epoch": 24.48,
+ "grad_norm": 28.793020248413086,
+ "learning_rate": 2.8355555555555558e-05,
+ "loss": 0.2468,
+ "step": 4284
+ },
+ {
+ "epoch": 24.485714285714284,
+ "grad_norm": 122.70006561279297,
+ "learning_rate": 2.8349206349206354e-05,
+ "loss": 0.2959,
+ "step": 4285
+ },
+ {
+ "epoch": 24.49142857142857,
+ "grad_norm": 24.2781982421875,
+ "learning_rate": 2.8342857142857143e-05,
+ "loss": 0.2785,
+ "step": 4286
+ },
+ {
+ "epoch": 24.497142857142858,
+ "grad_norm": 24.19087791442871,
+ "learning_rate": 2.833650793650794e-05,
+ "loss": 0.3076,
+ "step": 4287
+ },
+ {
+ "epoch": 24.502857142857142,
+ "grad_norm": 54.276973724365234,
+ "learning_rate": 2.8330158730158728e-05,
+ "loss": 0.2332,
+ "step": 4288
+ },
+ {
+ "epoch": 24.50857142857143,
+ "grad_norm": 49.06976318359375,
+ "learning_rate": 2.8323809523809524e-05,
+ "loss": 0.2764,
+ "step": 4289
+ },
+ {
+ "epoch": 24.514285714285712,
+ "grad_norm": 79.82915496826172,
+ "learning_rate": 2.831746031746032e-05,
+ "loss": 0.2594,
+ "step": 4290
+ },
+ {
+ "epoch": 24.52,
+ "grad_norm": 65.11991882324219,
+ "learning_rate": 2.831111111111111e-05,
+ "loss": 0.2255,
+ "step": 4291
+ },
+ {
+ "epoch": 24.525714285714287,
+ "grad_norm": 53.068416595458984,
+ "learning_rate": 2.8304761904761906e-05,
+ "loss": 0.2646,
+ "step": 4292
+ },
+ {
+ "epoch": 24.53142857142857,
+ "grad_norm": 53.878868103027344,
+ "learning_rate": 2.82984126984127e-05,
+ "loss": 0.3159,
+ "step": 4293
+ },
+ {
+ "epoch": 24.537142857142857,
+ "grad_norm": 81.9829330444336,
+ "learning_rate": 2.829206349206349e-05,
+ "loss": 0.3541,
+ "step": 4294
+ },
+ {
+ "epoch": 24.542857142857144,
+ "grad_norm": 42.4346923828125,
+ "learning_rate": 2.8285714285714287e-05,
+ "loss": 0.3058,
+ "step": 4295
+ },
+ {
+ "epoch": 24.548571428571428,
+ "grad_norm": 45.353668212890625,
+ "learning_rate": 2.8279365079365083e-05,
+ "loss": 0.2678,
+ "step": 4296
+ },
+ {
+ "epoch": 24.554285714285715,
+ "grad_norm": 74.28866577148438,
+ "learning_rate": 2.8273015873015872e-05,
+ "loss": 0.2563,
+ "step": 4297
+ },
+ {
+ "epoch": 24.56,
+ "grad_norm": 27.684419631958008,
+ "learning_rate": 2.8266666666666668e-05,
+ "loss": 0.2522,
+ "step": 4298
+ },
+ {
+ "epoch": 24.565714285714286,
+ "grad_norm": 76.28163146972656,
+ "learning_rate": 2.826031746031746e-05,
+ "loss": 0.2642,
+ "step": 4299
+ },
+ {
+ "epoch": 24.571428571428573,
+ "grad_norm": 52.390769958496094,
+ "learning_rate": 2.8253968253968253e-05,
+ "loss": 0.2927,
+ "step": 4300
+ },
+ {
+ "epoch": 24.577142857142857,
+ "grad_norm": 29.362049102783203,
+ "learning_rate": 2.824761904761905e-05,
+ "loss": 0.2513,
+ "step": 4301
+ },
+ {
+ "epoch": 24.582857142857144,
+ "grad_norm": 58.900634765625,
+ "learning_rate": 2.8241269841269842e-05,
+ "loss": 0.2636,
+ "step": 4302
+ },
+ {
+ "epoch": 24.588571428571427,
+ "grad_norm": 76.04304504394531,
+ "learning_rate": 2.8234920634920635e-05,
+ "loss": 0.3211,
+ "step": 4303
+ },
+ {
+ "epoch": 24.594285714285714,
+ "grad_norm": 36.00731658935547,
+ "learning_rate": 2.822857142857143e-05,
+ "loss": 0.2781,
+ "step": 4304
+ },
+ {
+ "epoch": 24.6,
+ "grad_norm": 142.11961364746094,
+ "learning_rate": 2.8222222222222223e-05,
+ "loss": 0.3775,
+ "step": 4305
+ },
+ {
+ "epoch": 24.605714285714285,
+ "grad_norm": 47.81526565551758,
+ "learning_rate": 2.821587301587302e-05,
+ "loss": 0.3731,
+ "step": 4306
+ },
+ {
+ "epoch": 24.611428571428572,
+ "grad_norm": 35.750518798828125,
+ "learning_rate": 2.8209523809523812e-05,
+ "loss": 0.4115,
+ "step": 4307
+ },
+ {
+ "epoch": 24.617142857142856,
+ "grad_norm": 110.9820556640625,
+ "learning_rate": 2.8203174603174605e-05,
+ "loss": 0.3973,
+ "step": 4308
+ },
+ {
+ "epoch": 24.622857142857143,
+ "grad_norm": 43.487083435058594,
+ "learning_rate": 2.81968253968254e-05,
+ "loss": 0.2279,
+ "step": 4309
+ },
+ {
+ "epoch": 24.62857142857143,
+ "grad_norm": 58.84871292114258,
+ "learning_rate": 2.819047619047619e-05,
+ "loss": 0.308,
+ "step": 4310
+ },
+ {
+ "epoch": 24.634285714285713,
+ "grad_norm": 39.39466857910156,
+ "learning_rate": 2.8184126984126986e-05,
+ "loss": 0.2654,
+ "step": 4311
+ },
+ {
+ "epoch": 24.64,
+ "grad_norm": 73.448486328125,
+ "learning_rate": 2.8177777777777782e-05,
+ "loss": 0.2046,
+ "step": 4312
+ },
+ {
+ "epoch": 24.645714285714284,
+ "grad_norm": 34.662811279296875,
+ "learning_rate": 2.817142857142857e-05,
+ "loss": 0.3182,
+ "step": 4313
+ },
+ {
+ "epoch": 24.65142857142857,
+ "grad_norm": 92.18864440917969,
+ "learning_rate": 2.8165079365079367e-05,
+ "loss": 0.3164,
+ "step": 4314
+ },
+ {
+ "epoch": 24.65714285714286,
+ "grad_norm": 122.65282440185547,
+ "learning_rate": 2.8158730158730163e-05,
+ "loss": 0.3399,
+ "step": 4315
+ },
+ {
+ "epoch": 24.662857142857142,
+ "grad_norm": 73.22319793701172,
+ "learning_rate": 2.8152380952380953e-05,
+ "loss": 0.3516,
+ "step": 4316
+ },
+ {
+ "epoch": 24.66857142857143,
+ "grad_norm": 49.231956481933594,
+ "learning_rate": 2.814603174603175e-05,
+ "loss": 0.3248,
+ "step": 4317
+ },
+ {
+ "epoch": 24.674285714285713,
+ "grad_norm": 59.13899612426758,
+ "learning_rate": 2.8139682539682545e-05,
+ "loss": 0.3696,
+ "step": 4318
+ },
+ {
+ "epoch": 24.68,
+ "grad_norm": 24.286306381225586,
+ "learning_rate": 2.8133333333333334e-05,
+ "loss": 0.2992,
+ "step": 4319
+ },
+ {
+ "epoch": 24.685714285714287,
+ "grad_norm": 75.39820098876953,
+ "learning_rate": 2.812698412698413e-05,
+ "loss": 0.2236,
+ "step": 4320
+ },
+ {
+ "epoch": 24.69142857142857,
+ "grad_norm": 61.131248474121094,
+ "learning_rate": 2.812063492063492e-05,
+ "loss": 0.3016,
+ "step": 4321
+ },
+ {
+ "epoch": 24.697142857142858,
+ "grad_norm": 124.00677490234375,
+ "learning_rate": 2.8114285714285715e-05,
+ "loss": 0.4102,
+ "step": 4322
+ },
+ {
+ "epoch": 24.70285714285714,
+ "grad_norm": 96.09242248535156,
+ "learning_rate": 2.810793650793651e-05,
+ "loss": 0.2678,
+ "step": 4323
+ },
+ {
+ "epoch": 24.708571428571428,
+ "grad_norm": 26.77962303161621,
+ "learning_rate": 2.81015873015873e-05,
+ "loss": 0.294,
+ "step": 4324
+ },
+ {
+ "epoch": 24.714285714285715,
+ "grad_norm": 30.745365142822266,
+ "learning_rate": 2.8095238095238096e-05,
+ "loss": 0.2074,
+ "step": 4325
+ },
+ {
+ "epoch": 24.72,
+ "grad_norm": 63.81066131591797,
+ "learning_rate": 2.8088888888888893e-05,
+ "loss": 0.2527,
+ "step": 4326
+ },
+ {
+ "epoch": 24.725714285714286,
+ "grad_norm": 15.541707992553711,
+ "learning_rate": 2.8082539682539682e-05,
+ "loss": 0.2539,
+ "step": 4327
+ },
+ {
+ "epoch": 24.731428571428573,
+ "grad_norm": 41.85258102416992,
+ "learning_rate": 2.8076190476190478e-05,
+ "loss": 0.3006,
+ "step": 4328
+ },
+ {
+ "epoch": 24.737142857142857,
+ "grad_norm": 102.15760040283203,
+ "learning_rate": 2.8069841269841274e-05,
+ "loss": 0.2942,
+ "step": 4329
+ },
+ {
+ "epoch": 24.742857142857144,
+ "grad_norm": 28.917007446289062,
+ "learning_rate": 2.8063492063492063e-05,
+ "loss": 0.4143,
+ "step": 4330
+ },
+ {
+ "epoch": 24.748571428571427,
+ "grad_norm": 37.711669921875,
+ "learning_rate": 2.805714285714286e-05,
+ "loss": 0.2402,
+ "step": 4331
+ },
+ {
+ "epoch": 24.754285714285714,
+ "grad_norm": 120.78079223632812,
+ "learning_rate": 2.805079365079365e-05,
+ "loss": 0.2288,
+ "step": 4332
+ },
+ {
+ "epoch": 24.76,
+ "grad_norm": 25.932483673095703,
+ "learning_rate": 2.8044444444444444e-05,
+ "loss": 0.2307,
+ "step": 4333
+ },
+ {
+ "epoch": 24.765714285714285,
+ "grad_norm": 26.200965881347656,
+ "learning_rate": 2.803809523809524e-05,
+ "loss": 0.213,
+ "step": 4334
+ },
+ {
+ "epoch": 24.771428571428572,
+ "grad_norm": 58.8943977355957,
+ "learning_rate": 2.803174603174603e-05,
+ "loss": 0.475,
+ "step": 4335
+ },
+ {
+ "epoch": 24.777142857142856,
+ "grad_norm": 46.189361572265625,
+ "learning_rate": 2.8025396825396826e-05,
+ "loss": 0.2892,
+ "step": 4336
+ },
+ {
+ "epoch": 24.782857142857143,
+ "grad_norm": 26.55647087097168,
+ "learning_rate": 2.801904761904762e-05,
+ "loss": 0.3676,
+ "step": 4337
+ },
+ {
+ "epoch": 24.78857142857143,
+ "grad_norm": 30.934398651123047,
+ "learning_rate": 2.8012698412698414e-05,
+ "loss": 0.3309,
+ "step": 4338
+ },
+ {
+ "epoch": 24.794285714285714,
+ "grad_norm": 104.68560791015625,
+ "learning_rate": 2.8006349206349207e-05,
+ "loss": 0.2351,
+ "step": 4339
+ },
+ {
+ "epoch": 24.8,
+ "grad_norm": 52.946502685546875,
+ "learning_rate": 2.8000000000000003e-05,
+ "loss": 0.2199,
+ "step": 4340
+ },
+ {
+ "epoch": 24.805714285714284,
+ "grad_norm": 59.598533630371094,
+ "learning_rate": 2.7993650793650796e-05,
+ "loss": 0.4019,
+ "step": 4341
+ },
+ {
+ "epoch": 24.81142857142857,
+ "grad_norm": 31.779327392578125,
+ "learning_rate": 2.7987301587301588e-05,
+ "loss": 0.4388,
+ "step": 4342
+ },
+ {
+ "epoch": 24.81714285714286,
+ "grad_norm": 104.10421752929688,
+ "learning_rate": 2.798095238095238e-05,
+ "loss": 0.3035,
+ "step": 4343
+ },
+ {
+ "epoch": 24.822857142857142,
+ "grad_norm": 71.49649047851562,
+ "learning_rate": 2.7974603174603177e-05,
+ "loss": 0.3221,
+ "step": 4344
+ },
+ {
+ "epoch": 24.82857142857143,
+ "grad_norm": 49.50043869018555,
+ "learning_rate": 2.7968253968253973e-05,
+ "loss": 0.1884,
+ "step": 4345
+ },
+ {
+ "epoch": 24.834285714285713,
+ "grad_norm": 31.241344451904297,
+ "learning_rate": 2.7961904761904762e-05,
+ "loss": 0.2233,
+ "step": 4346
+ },
+ {
+ "epoch": 24.84,
+ "grad_norm": 66.66368865966797,
+ "learning_rate": 2.7955555555555558e-05,
+ "loss": 0.3339,
+ "step": 4347
+ },
+ {
+ "epoch": 24.845714285714287,
+ "grad_norm": 1207.3427734375,
+ "learning_rate": 2.7949206349206354e-05,
+ "loss": 0.2911,
+ "step": 4348
+ },
+ {
+ "epoch": 24.85142857142857,
+ "grad_norm": 38.64277648925781,
+ "learning_rate": 2.7942857142857143e-05,
+ "loss": 0.2442,
+ "step": 4349
+ },
+ {
+ "epoch": 24.857142857142858,
+ "grad_norm": 26.282543182373047,
+ "learning_rate": 2.793650793650794e-05,
+ "loss": 0.2649,
+ "step": 4350
+ },
+ {
+ "epoch": 24.86285714285714,
+ "grad_norm": 28.724164962768555,
+ "learning_rate": 2.7930158730158736e-05,
+ "loss": 0.3553,
+ "step": 4351
+ },
+ {
+ "epoch": 24.86857142857143,
+ "grad_norm": 30.950960159301758,
+ "learning_rate": 2.7923809523809525e-05,
+ "loss": 0.521,
+ "step": 4352
+ },
+ {
+ "epoch": 24.874285714285715,
+ "grad_norm": 35.390445709228516,
+ "learning_rate": 2.791746031746032e-05,
+ "loss": 0.3175,
+ "step": 4353
+ },
+ {
+ "epoch": 24.88,
+ "grad_norm": 268.7099304199219,
+ "learning_rate": 2.791111111111111e-05,
+ "loss": 0.419,
+ "step": 4354
+ },
+ {
+ "epoch": 24.885714285714286,
+ "grad_norm": 71.29864501953125,
+ "learning_rate": 2.7904761904761906e-05,
+ "loss": 0.2923,
+ "step": 4355
+ },
+ {
+ "epoch": 24.89142857142857,
+ "grad_norm": 133.0418243408203,
+ "learning_rate": 2.7898412698412702e-05,
+ "loss": 0.2464,
+ "step": 4356
+ },
+ {
+ "epoch": 24.897142857142857,
+ "grad_norm": 48.5760612487793,
+ "learning_rate": 2.789206349206349e-05,
+ "loss": 0.2966,
+ "step": 4357
+ },
+ {
+ "epoch": 24.902857142857144,
+ "grad_norm": 79.38921356201172,
+ "learning_rate": 2.7885714285714287e-05,
+ "loss": 0.3203,
+ "step": 4358
+ },
+ {
+ "epoch": 24.908571428571427,
+ "grad_norm": 41.58376693725586,
+ "learning_rate": 2.7879365079365083e-05,
+ "loss": 0.4117,
+ "step": 4359
+ },
+ {
+ "epoch": 24.914285714285715,
+ "grad_norm": 31.19057273864746,
+ "learning_rate": 2.7873015873015873e-05,
+ "loss": 0.3405,
+ "step": 4360
+ },
+ {
+ "epoch": 24.92,
+ "grad_norm": 26.2551212310791,
+ "learning_rate": 2.786666666666667e-05,
+ "loss": 0.2894,
+ "step": 4361
+ },
+ {
+ "epoch": 24.925714285714285,
+ "grad_norm": 56.0374641418457,
+ "learning_rate": 2.7860317460317465e-05,
+ "loss": 0.3965,
+ "step": 4362
+ },
+ {
+ "epoch": 24.931428571428572,
+ "grad_norm": 36.64375686645508,
+ "learning_rate": 2.7853968253968254e-05,
+ "loss": 0.3491,
+ "step": 4363
+ },
+ {
+ "epoch": 24.937142857142856,
+ "grad_norm": 90.42372131347656,
+ "learning_rate": 2.784761904761905e-05,
+ "loss": 0.2957,
+ "step": 4364
+ },
+ {
+ "epoch": 24.942857142857143,
+ "grad_norm": 45.03855895996094,
+ "learning_rate": 2.784126984126984e-05,
+ "loss": 0.2678,
+ "step": 4365
+ },
+ {
+ "epoch": 24.94857142857143,
+ "grad_norm": 23.052322387695312,
+ "learning_rate": 2.7834920634920635e-05,
+ "loss": 0.2565,
+ "step": 4366
+ },
+ {
+ "epoch": 24.954285714285714,
+ "grad_norm": 160.03709411621094,
+ "learning_rate": 2.782857142857143e-05,
+ "loss": 0.2906,
+ "step": 4367
+ },
+ {
+ "epoch": 24.96,
+ "grad_norm": 35.61563491821289,
+ "learning_rate": 2.782222222222222e-05,
+ "loss": 0.2952,
+ "step": 4368
+ },
+ {
+ "epoch": 24.965714285714284,
+ "grad_norm": 39.50947189331055,
+ "learning_rate": 2.7815873015873017e-05,
+ "loss": 0.2457,
+ "step": 4369
+ },
+ {
+ "epoch": 24.97142857142857,
+ "grad_norm": 20.795259475708008,
+ "learning_rate": 2.7809523809523813e-05,
+ "loss": 0.318,
+ "step": 4370
+ },
+ {
+ "epoch": 24.97714285714286,
+ "grad_norm": 427.93792724609375,
+ "learning_rate": 2.7803174603174602e-05,
+ "loss": 0.3685,
+ "step": 4371
+ },
+ {
+ "epoch": 24.982857142857142,
+ "grad_norm": 65.03075408935547,
+ "learning_rate": 2.7796825396825398e-05,
+ "loss": 0.2777,
+ "step": 4372
+ },
+ {
+ "epoch": 24.98857142857143,
+ "grad_norm": 80.09349822998047,
+ "learning_rate": 2.7790476190476194e-05,
+ "loss": 0.2586,
+ "step": 4373
+ },
+ {
+ "epoch": 24.994285714285713,
+ "grad_norm": 52.3007698059082,
+ "learning_rate": 2.7784126984126983e-05,
+ "loss": 0.2647,
+ "step": 4374
+ },
+ {
+ "epoch": 25.0,
+ "grad_norm": 96.0640640258789,
+ "learning_rate": 2.777777777777778e-05,
+ "loss": 0.2789,
+ "step": 4375
+ },
+ {
+ "epoch": 25.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6246998906135559,
+ "eval_map": 0.9312,
+ "eval_map_50": 0.9691,
+ "eval_map_75": 0.9566,
+ "eval_map_large": 0.9313,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9312,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7794,
+ "eval_mar_10": 0.9724,
+ "eval_mar_100": 0.9759,
+ "eval_mar_100_per_class": 0.9759,
+ "eval_mar_large": 0.9759,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.7404,
+ "eval_samples_per_second": 21.397,
+ "eval_steps_per_second": 2.693,
+ "step": 4375
+ },
+ {
+ "epoch": 25.005714285714287,
+ "grad_norm": 43.88606262207031,
+ "learning_rate": 2.7771428571428572e-05,
+ "loss": 0.2644,
+ "step": 4376
+ },
+ {
+ "epoch": 25.01142857142857,
+ "grad_norm": 61.545406341552734,
+ "learning_rate": 2.7765079365079368e-05,
+ "loss": 0.3157,
+ "step": 4377
+ },
+ {
+ "epoch": 25.017142857142858,
+ "grad_norm": 21.369365692138672,
+ "learning_rate": 2.775873015873016e-05,
+ "loss": 0.3259,
+ "step": 4378
+ },
+ {
+ "epoch": 25.02285714285714,
+ "grad_norm": 59.84233093261719,
+ "learning_rate": 2.7752380952380953e-05,
+ "loss": 0.2608,
+ "step": 4379
+ },
+ {
+ "epoch": 25.02857142857143,
+ "grad_norm": 51.466552734375,
+ "learning_rate": 2.774603174603175e-05,
+ "loss": 0.3375,
+ "step": 4380
+ },
+ {
+ "epoch": 25.034285714285716,
+ "grad_norm": 85.64466094970703,
+ "learning_rate": 2.7739682539682542e-05,
+ "loss": 0.2167,
+ "step": 4381
+ },
+ {
+ "epoch": 25.04,
+ "grad_norm": 22.61601448059082,
+ "learning_rate": 2.7733333333333334e-05,
+ "loss": 0.274,
+ "step": 4382
+ },
+ {
+ "epoch": 25.045714285714286,
+ "grad_norm": 39.32375717163086,
+ "learning_rate": 2.772698412698413e-05,
+ "loss": 0.2653,
+ "step": 4383
+ },
+ {
+ "epoch": 25.05142857142857,
+ "grad_norm": 62.446346282958984,
+ "learning_rate": 2.7720634920634926e-05,
+ "loss": 0.2597,
+ "step": 4384
+ },
+ {
+ "epoch": 25.057142857142857,
+ "grad_norm": 57.022151947021484,
+ "learning_rate": 2.7714285714285716e-05,
+ "loss": 0.3768,
+ "step": 4385
+ },
+ {
+ "epoch": 25.062857142857144,
+ "grad_norm": 29.207191467285156,
+ "learning_rate": 2.770793650793651e-05,
+ "loss": 0.296,
+ "step": 4386
+ },
+ {
+ "epoch": 25.068571428571428,
+ "grad_norm": 67.32698822021484,
+ "learning_rate": 2.77015873015873e-05,
+ "loss": 0.257,
+ "step": 4387
+ },
+ {
+ "epoch": 25.074285714285715,
+ "grad_norm": 73.68763732910156,
+ "learning_rate": 2.7695238095238097e-05,
+ "loss": 0.2113,
+ "step": 4388
+ },
+ {
+ "epoch": 25.08,
+ "grad_norm": 58.43614196777344,
+ "learning_rate": 2.7688888888888893e-05,
+ "loss": 0.2352,
+ "step": 4389
+ },
+ {
+ "epoch": 25.085714285714285,
+ "grad_norm": 25.05832290649414,
+ "learning_rate": 2.7682539682539682e-05,
+ "loss": 0.2948,
+ "step": 4390
+ },
+ {
+ "epoch": 25.091428571428573,
+ "grad_norm": 47.36033630371094,
+ "learning_rate": 2.7676190476190478e-05,
+ "loss": 0.3172,
+ "step": 4391
+ },
+ {
+ "epoch": 25.097142857142856,
+ "grad_norm": 32.92298126220703,
+ "learning_rate": 2.7669841269841274e-05,
+ "loss": 0.3204,
+ "step": 4392
+ },
+ {
+ "epoch": 25.102857142857143,
+ "grad_norm": 51.99428939819336,
+ "learning_rate": 2.7663492063492063e-05,
+ "loss": 0.3066,
+ "step": 4393
+ },
+ {
+ "epoch": 25.10857142857143,
+ "grad_norm": 14.884100914001465,
+ "learning_rate": 2.765714285714286e-05,
+ "loss": 0.3006,
+ "step": 4394
+ },
+ {
+ "epoch": 25.114285714285714,
+ "grad_norm": 25.125812530517578,
+ "learning_rate": 2.765079365079365e-05,
+ "loss": 0.3255,
+ "step": 4395
+ },
+ {
+ "epoch": 25.12,
+ "grad_norm": 51.343177795410156,
+ "learning_rate": 2.7644444444444445e-05,
+ "loss": 0.3462,
+ "step": 4396
+ },
+ {
+ "epoch": 25.125714285714285,
+ "grad_norm": 72.3733901977539,
+ "learning_rate": 2.763809523809524e-05,
+ "loss": 0.621,
+ "step": 4397
+ },
+ {
+ "epoch": 25.13142857142857,
+ "grad_norm": 78.37907409667969,
+ "learning_rate": 2.763174603174603e-05,
+ "loss": 0.4284,
+ "step": 4398
+ },
+ {
+ "epoch": 25.13714285714286,
+ "grad_norm": 144.72216796875,
+ "learning_rate": 2.7625396825396826e-05,
+ "loss": 0.3259,
+ "step": 4399
+ },
+ {
+ "epoch": 25.142857142857142,
+ "grad_norm": 47.657203674316406,
+ "learning_rate": 2.7619047619047622e-05,
+ "loss": 0.3515,
+ "step": 4400
+ },
+ {
+ "epoch": 25.14857142857143,
+ "grad_norm": 29.482498168945312,
+ "learning_rate": 2.761269841269841e-05,
+ "loss": 0.3151,
+ "step": 4401
+ },
+ {
+ "epoch": 25.154285714285713,
+ "grad_norm": 43.94999694824219,
+ "learning_rate": 2.7606349206349207e-05,
+ "loss": 0.2682,
+ "step": 4402
+ },
+ {
+ "epoch": 25.16,
+ "grad_norm": 43.80424880981445,
+ "learning_rate": 2.7600000000000003e-05,
+ "loss": 0.2339,
+ "step": 4403
+ },
+ {
+ "epoch": 25.165714285714287,
+ "grad_norm": 56.889461517333984,
+ "learning_rate": 2.7593650793650793e-05,
+ "loss": 0.3043,
+ "step": 4404
+ },
+ {
+ "epoch": 25.17142857142857,
+ "grad_norm": 378.54022216796875,
+ "learning_rate": 2.758730158730159e-05,
+ "loss": 0.3664,
+ "step": 4405
+ },
+ {
+ "epoch": 25.177142857142858,
+ "grad_norm": 47.72706985473633,
+ "learning_rate": 2.7580952380952378e-05,
+ "loss": 0.2012,
+ "step": 4406
+ },
+ {
+ "epoch": 25.18285714285714,
+ "grad_norm": 116.41458129882812,
+ "learning_rate": 2.7574603174603174e-05,
+ "loss": 0.3051,
+ "step": 4407
+ },
+ {
+ "epoch": 25.18857142857143,
+ "grad_norm": 29.590312957763672,
+ "learning_rate": 2.756825396825397e-05,
+ "loss": 0.2562,
+ "step": 4408
+ },
+ {
+ "epoch": 25.194285714285716,
+ "grad_norm": 36.94352722167969,
+ "learning_rate": 2.7561904761904763e-05,
+ "loss": 0.2394,
+ "step": 4409
+ },
+ {
+ "epoch": 25.2,
+ "grad_norm": 19.118684768676758,
+ "learning_rate": 2.7555555555555555e-05,
+ "loss": 0.1968,
+ "step": 4410
+ },
+ {
+ "epoch": 25.205714285714286,
+ "grad_norm": 23.596872329711914,
+ "learning_rate": 2.754920634920635e-05,
+ "loss": 0.2467,
+ "step": 4411
+ },
+ {
+ "epoch": 25.21142857142857,
+ "grad_norm": 45.40176010131836,
+ "learning_rate": 2.7542857142857144e-05,
+ "loss": 0.3012,
+ "step": 4412
+ },
+ {
+ "epoch": 25.217142857142857,
+ "grad_norm": 132.94227600097656,
+ "learning_rate": 2.7536507936507937e-05,
+ "loss": 0.2807,
+ "step": 4413
+ },
+ {
+ "epoch": 25.222857142857144,
+ "grad_norm": 34.401912689208984,
+ "learning_rate": 2.7530158730158733e-05,
+ "loss": 0.292,
+ "step": 4414
+ },
+ {
+ "epoch": 25.228571428571428,
+ "grad_norm": 24.77764129638672,
+ "learning_rate": 2.7523809523809525e-05,
+ "loss": 0.2501,
+ "step": 4415
+ },
+ {
+ "epoch": 25.234285714285715,
+ "grad_norm": 19.19468116760254,
+ "learning_rate": 2.751746031746032e-05,
+ "loss": 0.3163,
+ "step": 4416
+ },
+ {
+ "epoch": 25.24,
+ "grad_norm": 292.3793029785156,
+ "learning_rate": 2.751111111111111e-05,
+ "loss": 0.2731,
+ "step": 4417
+ },
+ {
+ "epoch": 25.245714285714286,
+ "grad_norm": 52.74235153198242,
+ "learning_rate": 2.7504761904761907e-05,
+ "loss": 0.3278,
+ "step": 4418
+ },
+ {
+ "epoch": 25.251428571428573,
+ "grad_norm": 28.179885864257812,
+ "learning_rate": 2.7498412698412703e-05,
+ "loss": 0.3716,
+ "step": 4419
+ },
+ {
+ "epoch": 25.257142857142856,
+ "grad_norm": 12.870906829833984,
+ "learning_rate": 2.7492063492063492e-05,
+ "loss": 0.307,
+ "step": 4420
+ },
+ {
+ "epoch": 25.262857142857143,
+ "grad_norm": 34.991180419921875,
+ "learning_rate": 2.7485714285714288e-05,
+ "loss": 0.2501,
+ "step": 4421
+ },
+ {
+ "epoch": 25.268571428571427,
+ "grad_norm": 25.55196189880371,
+ "learning_rate": 2.7479365079365084e-05,
+ "loss": 0.2556,
+ "step": 4422
+ },
+ {
+ "epoch": 25.274285714285714,
+ "grad_norm": 38.679012298583984,
+ "learning_rate": 2.7473015873015873e-05,
+ "loss": 0.3446,
+ "step": 4423
+ },
+ {
+ "epoch": 25.28,
+ "grad_norm": 54.33946990966797,
+ "learning_rate": 2.746666666666667e-05,
+ "loss": 0.1951,
+ "step": 4424
+ },
+ {
+ "epoch": 25.285714285714285,
+ "grad_norm": 268.55279541015625,
+ "learning_rate": 2.7460317460317465e-05,
+ "loss": 0.4006,
+ "step": 4425
+ },
+ {
+ "epoch": 25.291428571428572,
+ "grad_norm": 33.12761306762695,
+ "learning_rate": 2.7453968253968254e-05,
+ "loss": 0.3447,
+ "step": 4426
+ },
+ {
+ "epoch": 25.29714285714286,
+ "grad_norm": 35.51462936401367,
+ "learning_rate": 2.744761904761905e-05,
+ "loss": 0.1925,
+ "step": 4427
+ },
+ {
+ "epoch": 25.302857142857142,
+ "grad_norm": 352.35626220703125,
+ "learning_rate": 2.744126984126984e-05,
+ "loss": 0.3144,
+ "step": 4428
+ },
+ {
+ "epoch": 25.30857142857143,
+ "grad_norm": 28.301921844482422,
+ "learning_rate": 2.7434920634920636e-05,
+ "loss": 0.2511,
+ "step": 4429
+ },
+ {
+ "epoch": 25.314285714285713,
+ "grad_norm": 63.250953674316406,
+ "learning_rate": 2.742857142857143e-05,
+ "loss": 0.2418,
+ "step": 4430
+ },
+ {
+ "epoch": 25.32,
+ "grad_norm": 37.64375305175781,
+ "learning_rate": 2.742222222222222e-05,
+ "loss": 0.2395,
+ "step": 4431
+ },
+ {
+ "epoch": 25.325714285714287,
+ "grad_norm": 25.647315979003906,
+ "learning_rate": 2.7415873015873017e-05,
+ "loss": 0.2811,
+ "step": 4432
+ },
+ {
+ "epoch": 25.33142857142857,
+ "grad_norm": 65.68405151367188,
+ "learning_rate": 2.7409523809523813e-05,
+ "loss": 0.3045,
+ "step": 4433
+ },
+ {
+ "epoch": 25.337142857142858,
+ "grad_norm": 56.48481750488281,
+ "learning_rate": 2.7403174603174602e-05,
+ "loss": 0.2342,
+ "step": 4434
+ },
+ {
+ "epoch": 25.34285714285714,
+ "grad_norm": 56.583316802978516,
+ "learning_rate": 2.7396825396825398e-05,
+ "loss": 0.2946,
+ "step": 4435
+ },
+ {
+ "epoch": 25.34857142857143,
+ "grad_norm": 334.7755432128906,
+ "learning_rate": 2.7390476190476194e-05,
+ "loss": 0.3795,
+ "step": 4436
+ },
+ {
+ "epoch": 25.354285714285716,
+ "grad_norm": 28.795318603515625,
+ "learning_rate": 2.7384126984126984e-05,
+ "loss": 0.1844,
+ "step": 4437
+ },
+ {
+ "epoch": 25.36,
+ "grad_norm": 547.7698974609375,
+ "learning_rate": 2.737777777777778e-05,
+ "loss": 0.3293,
+ "step": 4438
+ },
+ {
+ "epoch": 25.365714285714287,
+ "grad_norm": 129.0718536376953,
+ "learning_rate": 2.737142857142857e-05,
+ "loss": 0.3313,
+ "step": 4439
+ },
+ {
+ "epoch": 25.37142857142857,
+ "grad_norm": 98.94517517089844,
+ "learning_rate": 2.7365079365079365e-05,
+ "loss": 0.3156,
+ "step": 4440
+ },
+ {
+ "epoch": 25.377142857142857,
+ "grad_norm": 43.74811935424805,
+ "learning_rate": 2.735873015873016e-05,
+ "loss": 0.3324,
+ "step": 4441
+ },
+ {
+ "epoch": 25.382857142857144,
+ "grad_norm": 68.41603088378906,
+ "learning_rate": 2.735238095238095e-05,
+ "loss": 0.3158,
+ "step": 4442
+ },
+ {
+ "epoch": 25.388571428571428,
+ "grad_norm": 37.616661071777344,
+ "learning_rate": 2.7346031746031746e-05,
+ "loss": 0.22,
+ "step": 4443
+ },
+ {
+ "epoch": 25.394285714285715,
+ "grad_norm": 51.03703689575195,
+ "learning_rate": 2.7339682539682542e-05,
+ "loss": 0.3034,
+ "step": 4444
+ },
+ {
+ "epoch": 25.4,
+ "grad_norm": 13.909053802490234,
+ "learning_rate": 2.733333333333333e-05,
+ "loss": 0.2913,
+ "step": 4445
+ },
+ {
+ "epoch": 25.405714285714286,
+ "grad_norm": 65.43724822998047,
+ "learning_rate": 2.7326984126984127e-05,
+ "loss": 0.215,
+ "step": 4446
+ },
+ {
+ "epoch": 25.411428571428573,
+ "grad_norm": 58.34597396850586,
+ "learning_rate": 2.7320634920634923e-05,
+ "loss": 0.2464,
+ "step": 4447
+ },
+ {
+ "epoch": 25.417142857142856,
+ "grad_norm": 38.75104522705078,
+ "learning_rate": 2.7314285714285716e-05,
+ "loss": 0.3302,
+ "step": 4448
+ },
+ {
+ "epoch": 25.422857142857143,
+ "grad_norm": 19.12841796875,
+ "learning_rate": 2.730793650793651e-05,
+ "loss": 0.276,
+ "step": 4449
+ },
+ {
+ "epoch": 25.428571428571427,
+ "grad_norm": 39.01633834838867,
+ "learning_rate": 2.73015873015873e-05,
+ "loss": 0.3066,
+ "step": 4450
+ },
+ {
+ "epoch": 25.434285714285714,
+ "grad_norm": 29.239784240722656,
+ "learning_rate": 2.7295238095238097e-05,
+ "loss": 0.154,
+ "step": 4451
+ },
+ {
+ "epoch": 25.44,
+ "grad_norm": 69.61752319335938,
+ "learning_rate": 2.728888888888889e-05,
+ "loss": 0.4975,
+ "step": 4452
+ },
+ {
+ "epoch": 25.445714285714285,
+ "grad_norm": 75.92781066894531,
+ "learning_rate": 2.7282539682539683e-05,
+ "loss": 0.2437,
+ "step": 4453
+ },
+ {
+ "epoch": 25.451428571428572,
+ "grad_norm": 86.27715301513672,
+ "learning_rate": 2.727619047619048e-05,
+ "loss": 0.4747,
+ "step": 4454
+ },
+ {
+ "epoch": 25.457142857142856,
+ "grad_norm": 23.397066116333008,
+ "learning_rate": 2.7269841269841275e-05,
+ "loss": 0.3549,
+ "step": 4455
+ },
+ {
+ "epoch": 25.462857142857143,
+ "grad_norm": 24.970991134643555,
+ "learning_rate": 2.7263492063492064e-05,
+ "loss": 0.2626,
+ "step": 4456
+ },
+ {
+ "epoch": 25.46857142857143,
+ "grad_norm": 37.73689651489258,
+ "learning_rate": 2.725714285714286e-05,
+ "loss": 0.2319,
+ "step": 4457
+ },
+ {
+ "epoch": 25.474285714285713,
+ "grad_norm": 46.63981628417969,
+ "learning_rate": 2.7250793650793656e-05,
+ "loss": 0.2903,
+ "step": 4458
+ },
+ {
+ "epoch": 25.48,
+ "grad_norm": 33.05022430419922,
+ "learning_rate": 2.7244444444444445e-05,
+ "loss": 0.2529,
+ "step": 4459
+ },
+ {
+ "epoch": 25.485714285714284,
+ "grad_norm": 52.30170822143555,
+ "learning_rate": 2.723809523809524e-05,
+ "loss": 0.3641,
+ "step": 4460
+ },
+ {
+ "epoch": 25.49142857142857,
+ "grad_norm": 66.10030364990234,
+ "learning_rate": 2.723174603174603e-05,
+ "loss": 0.2233,
+ "step": 4461
+ },
+ {
+ "epoch": 25.497142857142858,
+ "grad_norm": 20.06730079650879,
+ "learning_rate": 2.7225396825396827e-05,
+ "loss": 0.2077,
+ "step": 4462
+ },
+ {
+ "epoch": 25.502857142857142,
+ "grad_norm": 68.0523910522461,
+ "learning_rate": 2.7219047619047623e-05,
+ "loss": 0.2541,
+ "step": 4463
+ },
+ {
+ "epoch": 25.50857142857143,
+ "grad_norm": 46.033531188964844,
+ "learning_rate": 2.7212698412698412e-05,
+ "loss": 0.1659,
+ "step": 4464
+ },
+ {
+ "epoch": 25.514285714285712,
+ "grad_norm": 148.0507049560547,
+ "learning_rate": 2.7206349206349208e-05,
+ "loss": 0.2559,
+ "step": 4465
+ },
+ {
+ "epoch": 25.52,
+ "grad_norm": 211.72640991210938,
+ "learning_rate": 2.7200000000000004e-05,
+ "loss": 0.5573,
+ "step": 4466
+ },
+ {
+ "epoch": 25.525714285714287,
+ "grad_norm": 41.14247512817383,
+ "learning_rate": 2.7193650793650793e-05,
+ "loss": 0.234,
+ "step": 4467
+ },
+ {
+ "epoch": 25.53142857142857,
+ "grad_norm": 40.05872344970703,
+ "learning_rate": 2.718730158730159e-05,
+ "loss": 0.2458,
+ "step": 4468
+ },
+ {
+ "epoch": 25.537142857142857,
+ "grad_norm": 18.45254135131836,
+ "learning_rate": 2.7180952380952385e-05,
+ "loss": 0.2387,
+ "step": 4469
+ },
+ {
+ "epoch": 25.542857142857144,
+ "grad_norm": 75.00344848632812,
+ "learning_rate": 2.7174603174603174e-05,
+ "loss": 0.2262,
+ "step": 4470
+ },
+ {
+ "epoch": 25.548571428571428,
+ "grad_norm": 53.357627868652344,
+ "learning_rate": 2.716825396825397e-05,
+ "loss": 0.2403,
+ "step": 4471
+ },
+ {
+ "epoch": 25.554285714285715,
+ "grad_norm": 57.647972106933594,
+ "learning_rate": 2.716190476190476e-05,
+ "loss": 0.242,
+ "step": 4472
+ },
+ {
+ "epoch": 25.56,
+ "grad_norm": 54.45359420776367,
+ "learning_rate": 2.7155555555555556e-05,
+ "loss": 0.2642,
+ "step": 4473
+ },
+ {
+ "epoch": 25.565714285714286,
+ "grad_norm": 41.4904670715332,
+ "learning_rate": 2.7149206349206352e-05,
+ "loss": 0.2704,
+ "step": 4474
+ },
+ {
+ "epoch": 25.571428571428573,
+ "grad_norm": 29.26909637451172,
+ "learning_rate": 2.714285714285714e-05,
+ "loss": 0.3188,
+ "step": 4475
+ },
+ {
+ "epoch": 25.577142857142857,
+ "grad_norm": 268.87744140625,
+ "learning_rate": 2.7136507936507937e-05,
+ "loss": 0.4228,
+ "step": 4476
+ },
+ {
+ "epoch": 25.582857142857144,
+ "grad_norm": 32.49101638793945,
+ "learning_rate": 2.7130158730158733e-05,
+ "loss": 0.2181,
+ "step": 4477
+ },
+ {
+ "epoch": 25.588571428571427,
+ "grad_norm": 43.50292205810547,
+ "learning_rate": 2.7123809523809522e-05,
+ "loss": 0.2767,
+ "step": 4478
+ },
+ {
+ "epoch": 25.594285714285714,
+ "grad_norm": 41.27511215209961,
+ "learning_rate": 2.7117460317460318e-05,
+ "loss": 0.2836,
+ "step": 4479
+ },
+ {
+ "epoch": 25.6,
+ "grad_norm": 75.72777557373047,
+ "learning_rate": 2.7111111111111114e-05,
+ "loss": 0.3726,
+ "step": 4480
+ },
+ {
+ "epoch": 25.605714285714285,
+ "grad_norm": 25.272321701049805,
+ "learning_rate": 2.7104761904761904e-05,
+ "loss": 0.3072,
+ "step": 4481
+ },
+ {
+ "epoch": 25.611428571428572,
+ "grad_norm": 68.56621551513672,
+ "learning_rate": 2.70984126984127e-05,
+ "loss": 0.2443,
+ "step": 4482
+ },
+ {
+ "epoch": 25.617142857142856,
+ "grad_norm": 27.494661331176758,
+ "learning_rate": 2.7092063492063492e-05,
+ "loss": 0.2897,
+ "step": 4483
+ },
+ {
+ "epoch": 25.622857142857143,
+ "grad_norm": 66.72151184082031,
+ "learning_rate": 2.7085714285714285e-05,
+ "loss": 0.2782,
+ "step": 4484
+ },
+ {
+ "epoch": 25.62857142857143,
+ "grad_norm": 53.125858306884766,
+ "learning_rate": 2.707936507936508e-05,
+ "loss": 0.3291,
+ "step": 4485
+ },
+ {
+ "epoch": 25.634285714285713,
+ "grad_norm": 42.118446350097656,
+ "learning_rate": 2.7073015873015874e-05,
+ "loss": 0.2855,
+ "step": 4486
+ },
+ {
+ "epoch": 25.64,
+ "grad_norm": 25.07311248779297,
+ "learning_rate": 2.706666666666667e-05,
+ "loss": 0.3883,
+ "step": 4487
+ },
+ {
+ "epoch": 25.645714285714284,
+ "grad_norm": 31.401103973388672,
+ "learning_rate": 2.7060317460317462e-05,
+ "loss": 0.3002,
+ "step": 4488
+ },
+ {
+ "epoch": 25.65142857142857,
+ "grad_norm": 28.622156143188477,
+ "learning_rate": 2.7053968253968255e-05,
+ "loss": 0.3207,
+ "step": 4489
+ },
+ {
+ "epoch": 25.65714285714286,
+ "grad_norm": 23.805086135864258,
+ "learning_rate": 2.704761904761905e-05,
+ "loss": 0.208,
+ "step": 4490
+ },
+ {
+ "epoch": 25.662857142857142,
+ "grad_norm": 140.76644897460938,
+ "learning_rate": 2.7041269841269843e-05,
+ "loss": 0.2796,
+ "step": 4491
+ },
+ {
+ "epoch": 25.66857142857143,
+ "grad_norm": 104.07940673828125,
+ "learning_rate": 2.7034920634920636e-05,
+ "loss": 0.2813,
+ "step": 4492
+ },
+ {
+ "epoch": 25.674285714285713,
+ "grad_norm": 985.031982421875,
+ "learning_rate": 2.7028571428571432e-05,
+ "loss": 0.2497,
+ "step": 4493
+ },
+ {
+ "epoch": 25.68,
+ "grad_norm": 36.1199836730957,
+ "learning_rate": 2.702222222222222e-05,
+ "loss": 0.2631,
+ "step": 4494
+ },
+ {
+ "epoch": 25.685714285714287,
+ "grad_norm": 58.97662353515625,
+ "learning_rate": 2.7015873015873017e-05,
+ "loss": 0.3707,
+ "step": 4495
+ },
+ {
+ "epoch": 25.69142857142857,
+ "grad_norm": 24.511669158935547,
+ "learning_rate": 2.7009523809523813e-05,
+ "loss": 0.2219,
+ "step": 4496
+ },
+ {
+ "epoch": 25.697142857142858,
+ "grad_norm": 80.04442596435547,
+ "learning_rate": 2.7003174603174603e-05,
+ "loss": 0.3375,
+ "step": 4497
+ },
+ {
+ "epoch": 25.70285714285714,
+ "grad_norm": 74.19042205810547,
+ "learning_rate": 2.69968253968254e-05,
+ "loss": 0.2039,
+ "step": 4498
+ },
+ {
+ "epoch": 25.708571428571428,
+ "grad_norm": 124.21507263183594,
+ "learning_rate": 2.6990476190476195e-05,
+ "loss": 0.4383,
+ "step": 4499
+ },
+ {
+ "epoch": 25.714285714285715,
+ "grad_norm": 38.0180778503418,
+ "learning_rate": 2.6984126984126984e-05,
+ "loss": 0.3133,
+ "step": 4500
+ },
+ {
+ "epoch": 25.72,
+ "grad_norm": 283.60150146484375,
+ "learning_rate": 2.697777777777778e-05,
+ "loss": 0.3557,
+ "step": 4501
+ },
+ {
+ "epoch": 25.725714285714286,
+ "grad_norm": 33.72235107421875,
+ "learning_rate": 2.6971428571428576e-05,
+ "loss": 0.3229,
+ "step": 4502
+ },
+ {
+ "epoch": 25.731428571428573,
+ "grad_norm": 88.3503189086914,
+ "learning_rate": 2.6965079365079365e-05,
+ "loss": 0.2675,
+ "step": 4503
+ },
+ {
+ "epoch": 25.737142857142857,
+ "grad_norm": 35.932823181152344,
+ "learning_rate": 2.695873015873016e-05,
+ "loss": 0.2786,
+ "step": 4504
+ },
+ {
+ "epoch": 25.742857142857144,
+ "grad_norm": 46.16046142578125,
+ "learning_rate": 2.695238095238095e-05,
+ "loss": 0.304,
+ "step": 4505
+ },
+ {
+ "epoch": 25.748571428571427,
+ "grad_norm": 43.43974685668945,
+ "learning_rate": 2.6946031746031747e-05,
+ "loss": 0.2721,
+ "step": 4506
+ },
+ {
+ "epoch": 25.754285714285714,
+ "grad_norm": 25.076675415039062,
+ "learning_rate": 2.6939682539682543e-05,
+ "loss": 0.3251,
+ "step": 4507
+ },
+ {
+ "epoch": 25.76,
+ "grad_norm": 66.32283020019531,
+ "learning_rate": 2.6933333333333332e-05,
+ "loss": 0.3436,
+ "step": 4508
+ },
+ {
+ "epoch": 25.765714285714285,
+ "grad_norm": 34.4593391418457,
+ "learning_rate": 2.6926984126984128e-05,
+ "loss": 0.3827,
+ "step": 4509
+ },
+ {
+ "epoch": 25.771428571428572,
+ "grad_norm": 41.65052795410156,
+ "learning_rate": 2.6920634920634924e-05,
+ "loss": 0.2946,
+ "step": 4510
+ },
+ {
+ "epoch": 25.777142857142856,
+ "grad_norm": 59.617496490478516,
+ "learning_rate": 2.6914285714285713e-05,
+ "loss": 0.2359,
+ "step": 4511
+ },
+ {
+ "epoch": 25.782857142857143,
+ "grad_norm": 444.38128662109375,
+ "learning_rate": 2.690793650793651e-05,
+ "loss": 0.2877,
+ "step": 4512
+ },
+ {
+ "epoch": 25.78857142857143,
+ "grad_norm": 67.48945617675781,
+ "learning_rate": 2.6901587301587305e-05,
+ "loss": 0.3101,
+ "step": 4513
+ },
+ {
+ "epoch": 25.794285714285714,
+ "grad_norm": 47.88575744628906,
+ "learning_rate": 2.6895238095238094e-05,
+ "loss": 0.4117,
+ "step": 4514
+ },
+ {
+ "epoch": 25.8,
+ "grad_norm": 35.176971435546875,
+ "learning_rate": 2.688888888888889e-05,
+ "loss": 0.3138,
+ "step": 4515
+ },
+ {
+ "epoch": 25.805714285714284,
+ "grad_norm": 69.59241485595703,
+ "learning_rate": 2.688253968253968e-05,
+ "loss": 0.3087,
+ "step": 4516
+ },
+ {
+ "epoch": 25.81142857142857,
+ "grad_norm": 47.38324737548828,
+ "learning_rate": 2.6876190476190476e-05,
+ "loss": 0.3266,
+ "step": 4517
+ },
+ {
+ "epoch": 25.81714285714286,
+ "grad_norm": 66.85245513916016,
+ "learning_rate": 2.6869841269841272e-05,
+ "loss": 0.2838,
+ "step": 4518
+ },
+ {
+ "epoch": 25.822857142857142,
+ "grad_norm": 34.234954833984375,
+ "learning_rate": 2.6863492063492064e-05,
+ "loss": 0.237,
+ "step": 4519
+ },
+ {
+ "epoch": 25.82857142857143,
+ "grad_norm": 37.65780258178711,
+ "learning_rate": 2.6857142857142857e-05,
+ "loss": 0.3527,
+ "step": 4520
+ },
+ {
+ "epoch": 25.834285714285713,
+ "grad_norm": 17.731443405151367,
+ "learning_rate": 2.6850793650793653e-05,
+ "loss": 0.3085,
+ "step": 4521
+ },
+ {
+ "epoch": 25.84,
+ "grad_norm": 54.15608215332031,
+ "learning_rate": 2.6844444444444446e-05,
+ "loss": 0.2654,
+ "step": 4522
+ },
+ {
+ "epoch": 25.845714285714287,
+ "grad_norm": 30.08025550842285,
+ "learning_rate": 2.683809523809524e-05,
+ "loss": 0.2865,
+ "step": 4523
+ },
+ {
+ "epoch": 25.85142857142857,
+ "grad_norm": 105.50086212158203,
+ "learning_rate": 2.6831746031746034e-05,
+ "loss": 0.2484,
+ "step": 4524
+ },
+ {
+ "epoch": 25.857142857142858,
+ "grad_norm": 44.405601501464844,
+ "learning_rate": 2.6825396825396827e-05,
+ "loss": 0.3007,
+ "step": 4525
+ },
+ {
+ "epoch": 25.86285714285714,
+ "grad_norm": 46.13258361816406,
+ "learning_rate": 2.6819047619047623e-05,
+ "loss": 0.2646,
+ "step": 4526
+ },
+ {
+ "epoch": 25.86857142857143,
+ "grad_norm": 32.382591247558594,
+ "learning_rate": 2.6812698412698412e-05,
+ "loss": 0.3584,
+ "step": 4527
+ },
+ {
+ "epoch": 25.874285714285715,
+ "grad_norm": 30.748363494873047,
+ "learning_rate": 2.6806349206349208e-05,
+ "loss": 0.3738,
+ "step": 4528
+ },
+ {
+ "epoch": 25.88,
+ "grad_norm": 180.90086364746094,
+ "learning_rate": 2.6800000000000004e-05,
+ "loss": 0.3811,
+ "step": 4529
+ },
+ {
+ "epoch": 25.885714285714286,
+ "grad_norm": 39.43400192260742,
+ "learning_rate": 2.6793650793650794e-05,
+ "loss": 0.4279,
+ "step": 4530
+ },
+ {
+ "epoch": 25.89142857142857,
+ "grad_norm": 44.61370086669922,
+ "learning_rate": 2.678730158730159e-05,
+ "loss": 0.3784,
+ "step": 4531
+ },
+ {
+ "epoch": 25.897142857142857,
+ "grad_norm": 102.19576263427734,
+ "learning_rate": 2.6780952380952386e-05,
+ "loss": 0.2871,
+ "step": 4532
+ },
+ {
+ "epoch": 25.902857142857144,
+ "grad_norm": 127.11107635498047,
+ "learning_rate": 2.6774603174603175e-05,
+ "loss": 0.457,
+ "step": 4533
+ },
+ {
+ "epoch": 25.908571428571427,
+ "grad_norm": 81.60096740722656,
+ "learning_rate": 2.676825396825397e-05,
+ "loss": 0.3186,
+ "step": 4534
+ },
+ {
+ "epoch": 25.914285714285715,
+ "grad_norm": 721.7100830078125,
+ "learning_rate": 2.6761904761904767e-05,
+ "loss": 0.3064,
+ "step": 4535
+ },
+ {
+ "epoch": 25.92,
+ "grad_norm": 28.72801971435547,
+ "learning_rate": 2.6755555555555556e-05,
+ "loss": 0.4099,
+ "step": 4536
+ },
+ {
+ "epoch": 25.925714285714285,
+ "grad_norm": 44.58869171142578,
+ "learning_rate": 2.6749206349206352e-05,
+ "loss": 0.2749,
+ "step": 4537
+ },
+ {
+ "epoch": 25.931428571428572,
+ "grad_norm": 211.85838317871094,
+ "learning_rate": 2.674285714285714e-05,
+ "loss": 0.329,
+ "step": 4538
+ },
+ {
+ "epoch": 25.937142857142856,
+ "grad_norm": 38.062259674072266,
+ "learning_rate": 2.6736507936507937e-05,
+ "loss": 0.2823,
+ "step": 4539
+ },
+ {
+ "epoch": 25.942857142857143,
+ "grad_norm": 112.61495971679688,
+ "learning_rate": 2.6730158730158733e-05,
+ "loss": 0.2566,
+ "step": 4540
+ },
+ {
+ "epoch": 25.94857142857143,
+ "grad_norm": 40.20820999145508,
+ "learning_rate": 2.6723809523809523e-05,
+ "loss": 0.4612,
+ "step": 4541
+ },
+ {
+ "epoch": 25.954285714285714,
+ "grad_norm": 40.41714096069336,
+ "learning_rate": 2.671746031746032e-05,
+ "loss": 0.297,
+ "step": 4542
+ },
+ {
+ "epoch": 25.96,
+ "grad_norm": 27.733102798461914,
+ "learning_rate": 2.6711111111111115e-05,
+ "loss": 0.3345,
+ "step": 4543
+ },
+ {
+ "epoch": 25.965714285714284,
+ "grad_norm": 36.81207275390625,
+ "learning_rate": 2.6704761904761904e-05,
+ "loss": 0.2943,
+ "step": 4544
+ },
+ {
+ "epoch": 25.97142857142857,
+ "grad_norm": 50.07954025268555,
+ "learning_rate": 2.66984126984127e-05,
+ "loss": 0.3336,
+ "step": 4545
+ },
+ {
+ "epoch": 25.97714285714286,
+ "grad_norm": 62.3792724609375,
+ "learning_rate": 2.6692063492063496e-05,
+ "loss": 0.3614,
+ "step": 4546
+ },
+ {
+ "epoch": 25.982857142857142,
+ "grad_norm": 101.2016830444336,
+ "learning_rate": 2.6685714285714285e-05,
+ "loss": 0.3905,
+ "step": 4547
+ },
+ {
+ "epoch": 25.98857142857143,
+ "grad_norm": 17.869173049926758,
+ "learning_rate": 2.667936507936508e-05,
+ "loss": 0.3525,
+ "step": 4548
+ },
+ {
+ "epoch": 25.994285714285713,
+ "grad_norm": 59.47468948364258,
+ "learning_rate": 2.667301587301587e-05,
+ "loss": 0.2708,
+ "step": 4549
+ },
+ {
+ "epoch": 26.0,
+ "grad_norm": 67.94355010986328,
+ "learning_rate": 2.6666666666666667e-05,
+ "loss": 0.2781,
+ "step": 4550
+ },
+ {
+ "epoch": 26.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6311623454093933,
+ "eval_map": 0.9147,
+ "eval_map_50": 0.9617,
+ "eval_map_75": 0.9475,
+ "eval_map_large": 0.9147,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9147,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7863,
+ "eval_mar_10": 0.9635,
+ "eval_mar_100": 0.9679,
+ "eval_mar_100_per_class": 0.9679,
+ "eval_mar_large": 0.9679,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.6762,
+ "eval_samples_per_second": 21.497,
+ "eval_steps_per_second": 2.705,
+ "step": 4550
+ },
+ {
+ "epoch": 26.005714285714287,
+ "grad_norm": 36.41327667236328,
+ "learning_rate": 2.6660317460317463e-05,
+ "loss": 0.3754,
+ "step": 4551
+ },
+ {
+ "epoch": 26.01142857142857,
+ "grad_norm": 28.15723991394043,
+ "learning_rate": 2.6653968253968252e-05,
+ "loss": 0.3568,
+ "step": 4552
+ },
+ {
+ "epoch": 26.017142857142858,
+ "grad_norm": 24.46324348449707,
+ "learning_rate": 2.6647619047619048e-05,
+ "loss": 0.2012,
+ "step": 4553
+ },
+ {
+ "epoch": 26.02285714285714,
+ "grad_norm": 51.601173400878906,
+ "learning_rate": 2.6641269841269844e-05,
+ "loss": 0.2799,
+ "step": 4554
+ },
+ {
+ "epoch": 26.02857142857143,
+ "grad_norm": 24.85300636291504,
+ "learning_rate": 2.6634920634920633e-05,
+ "loss": 0.3831,
+ "step": 4555
+ },
+ {
+ "epoch": 26.034285714285716,
+ "grad_norm": 36.120201110839844,
+ "learning_rate": 2.662857142857143e-05,
+ "loss": 0.224,
+ "step": 4556
+ },
+ {
+ "epoch": 26.04,
+ "grad_norm": 34.02216339111328,
+ "learning_rate": 2.6622222222222225e-05,
+ "loss": 0.3498,
+ "step": 4557
+ },
+ {
+ "epoch": 26.045714285714286,
+ "grad_norm": 37.88076400756836,
+ "learning_rate": 2.6615873015873018e-05,
+ "loss": 0.2839,
+ "step": 4558
+ },
+ {
+ "epoch": 26.05142857142857,
+ "grad_norm": 39.69442367553711,
+ "learning_rate": 2.660952380952381e-05,
+ "loss": 0.3258,
+ "step": 4559
+ },
+ {
+ "epoch": 26.057142857142857,
+ "grad_norm": 85.76815795898438,
+ "learning_rate": 2.6603174603174603e-05,
+ "loss": 0.3377,
+ "step": 4560
+ },
+ {
+ "epoch": 26.062857142857144,
+ "grad_norm": 49.02735900878906,
+ "learning_rate": 2.65968253968254e-05,
+ "loss": 0.2274,
+ "step": 4561
+ },
+ {
+ "epoch": 26.068571428571428,
+ "grad_norm": 46.21623611450195,
+ "learning_rate": 2.6590476190476192e-05,
+ "loss": 0.3173,
+ "step": 4562
+ },
+ {
+ "epoch": 26.074285714285715,
+ "grad_norm": 19.301591873168945,
+ "learning_rate": 2.6584126984126984e-05,
+ "loss": 0.1969,
+ "step": 4563
+ },
+ {
+ "epoch": 26.08,
+ "grad_norm": 44.840816497802734,
+ "learning_rate": 2.657777777777778e-05,
+ "loss": 0.2008,
+ "step": 4564
+ },
+ {
+ "epoch": 26.085714285714285,
+ "grad_norm": 36.26298141479492,
+ "learning_rate": 2.6571428571428576e-05,
+ "loss": 0.2551,
+ "step": 4565
+ },
+ {
+ "epoch": 26.091428571428573,
+ "grad_norm": 28.671228408813477,
+ "learning_rate": 2.6565079365079366e-05,
+ "loss": 0.2116,
+ "step": 4566
+ },
+ {
+ "epoch": 26.097142857142856,
+ "grad_norm": 30.666757583618164,
+ "learning_rate": 2.6558730158730162e-05,
+ "loss": 0.3992,
+ "step": 4567
+ },
+ {
+ "epoch": 26.102857142857143,
+ "grad_norm": 48.448524475097656,
+ "learning_rate": 2.6552380952380958e-05,
+ "loss": 0.2654,
+ "step": 4568
+ },
+ {
+ "epoch": 26.10857142857143,
+ "grad_norm": 69.3399429321289,
+ "learning_rate": 2.6546031746031747e-05,
+ "loss": 0.2669,
+ "step": 4569
+ },
+ {
+ "epoch": 26.114285714285714,
+ "grad_norm": 55.04347229003906,
+ "learning_rate": 2.6539682539682543e-05,
+ "loss": 0.3934,
+ "step": 4570
+ },
+ {
+ "epoch": 26.12,
+ "grad_norm": 86.17486572265625,
+ "learning_rate": 2.6533333333333332e-05,
+ "loss": 0.3426,
+ "step": 4571
+ },
+ {
+ "epoch": 26.125714285714285,
+ "grad_norm": 61.36354064941406,
+ "learning_rate": 2.6526984126984128e-05,
+ "loss": 0.2632,
+ "step": 4572
+ },
+ {
+ "epoch": 26.13142857142857,
+ "grad_norm": 51.69411087036133,
+ "learning_rate": 2.6520634920634924e-05,
+ "loss": 0.2911,
+ "step": 4573
+ },
+ {
+ "epoch": 26.13714285714286,
+ "grad_norm": 29.80560874938965,
+ "learning_rate": 2.6514285714285714e-05,
+ "loss": 0.2377,
+ "step": 4574
+ },
+ {
+ "epoch": 26.142857142857142,
+ "grad_norm": 70.0278091430664,
+ "learning_rate": 2.650793650793651e-05,
+ "loss": 0.3496,
+ "step": 4575
+ },
+ {
+ "epoch": 26.14857142857143,
+ "grad_norm": 67.60846710205078,
+ "learning_rate": 2.6501587301587306e-05,
+ "loss": 0.3502,
+ "step": 4576
+ },
+ {
+ "epoch": 26.154285714285713,
+ "grad_norm": 67.3626708984375,
+ "learning_rate": 2.6495238095238095e-05,
+ "loss": 0.2504,
+ "step": 4577
+ },
+ {
+ "epoch": 26.16,
+ "grad_norm": 38.45746612548828,
+ "learning_rate": 2.648888888888889e-05,
+ "loss": 0.2712,
+ "step": 4578
+ },
+ {
+ "epoch": 26.165714285714287,
+ "grad_norm": 27.64004135131836,
+ "learning_rate": 2.6482539682539687e-05,
+ "loss": 0.3435,
+ "step": 4579
+ },
+ {
+ "epoch": 26.17142857142857,
+ "grad_norm": 42.48225784301758,
+ "learning_rate": 2.6476190476190476e-05,
+ "loss": 0.3141,
+ "step": 4580
+ },
+ {
+ "epoch": 26.177142857142858,
+ "grad_norm": 35.910362243652344,
+ "learning_rate": 2.6469841269841272e-05,
+ "loss": 0.2417,
+ "step": 4581
+ },
+ {
+ "epoch": 26.18285714285714,
+ "grad_norm": 63.82694625854492,
+ "learning_rate": 2.646349206349206e-05,
+ "loss": 0.353,
+ "step": 4582
+ },
+ {
+ "epoch": 26.18857142857143,
+ "grad_norm": 2869.86474609375,
+ "learning_rate": 2.6457142857142857e-05,
+ "loss": 0.4538,
+ "step": 4583
+ },
+ {
+ "epoch": 26.194285714285716,
+ "grad_norm": 97.55684661865234,
+ "learning_rate": 2.6450793650793653e-05,
+ "loss": 0.2897,
+ "step": 4584
+ },
+ {
+ "epoch": 26.2,
+ "grad_norm": 60.941158294677734,
+ "learning_rate": 2.6444444444444443e-05,
+ "loss": 0.2114,
+ "step": 4585
+ },
+ {
+ "epoch": 26.205714285714286,
+ "grad_norm": 69.35711669921875,
+ "learning_rate": 2.643809523809524e-05,
+ "loss": 0.2418,
+ "step": 4586
+ },
+ {
+ "epoch": 26.21142857142857,
+ "grad_norm": 129.8416290283203,
+ "learning_rate": 2.6431746031746035e-05,
+ "loss": 0.3354,
+ "step": 4587
+ },
+ {
+ "epoch": 26.217142857142857,
+ "grad_norm": 55.6038703918457,
+ "learning_rate": 2.6425396825396824e-05,
+ "loss": 0.2319,
+ "step": 4588
+ },
+ {
+ "epoch": 26.222857142857144,
+ "grad_norm": 30.773601531982422,
+ "learning_rate": 2.641904761904762e-05,
+ "loss": 0.3119,
+ "step": 4589
+ },
+ {
+ "epoch": 26.228571428571428,
+ "grad_norm": 64.36360168457031,
+ "learning_rate": 2.6412698412698416e-05,
+ "loss": 0.257,
+ "step": 4590
+ },
+ {
+ "epoch": 26.234285714285715,
+ "grad_norm": 19.576568603515625,
+ "learning_rate": 2.6406349206349205e-05,
+ "loss": 0.3647,
+ "step": 4591
+ },
+ {
+ "epoch": 26.24,
+ "grad_norm": 52.89550018310547,
+ "learning_rate": 2.64e-05,
+ "loss": 0.2763,
+ "step": 4592
+ },
+ {
+ "epoch": 26.245714285714286,
+ "grad_norm": 51.489906311035156,
+ "learning_rate": 2.6393650793650794e-05,
+ "loss": 0.2594,
+ "step": 4593
+ },
+ {
+ "epoch": 26.251428571428573,
+ "grad_norm": 47.094451904296875,
+ "learning_rate": 2.6387301587301587e-05,
+ "loss": 0.4294,
+ "step": 4594
+ },
+ {
+ "epoch": 26.257142857142856,
+ "grad_norm": 89.28340148925781,
+ "learning_rate": 2.6380952380952383e-05,
+ "loss": 0.3631,
+ "step": 4595
+ },
+ {
+ "epoch": 26.262857142857143,
+ "grad_norm": 49.36043167114258,
+ "learning_rate": 2.6374603174603175e-05,
+ "loss": 0.2873,
+ "step": 4596
+ },
+ {
+ "epoch": 26.268571428571427,
+ "grad_norm": 99.70654296875,
+ "learning_rate": 2.636825396825397e-05,
+ "loss": 0.2918,
+ "step": 4597
+ },
+ {
+ "epoch": 26.274285714285714,
+ "grad_norm": 33.33317565917969,
+ "learning_rate": 2.6361904761904764e-05,
+ "loss": 0.354,
+ "step": 4598
+ },
+ {
+ "epoch": 26.28,
+ "grad_norm": 42.30352020263672,
+ "learning_rate": 2.6355555555555557e-05,
+ "loss": 0.2752,
+ "step": 4599
+ },
+ {
+ "epoch": 26.285714285714285,
+ "grad_norm": 49.61213302612305,
+ "learning_rate": 2.6349206349206353e-05,
+ "loss": 0.2889,
+ "step": 4600
+ },
+ {
+ "epoch": 26.291428571428572,
+ "grad_norm": 46.809486389160156,
+ "learning_rate": 2.6342857142857142e-05,
+ "loss": 0.263,
+ "step": 4601
+ },
+ {
+ "epoch": 26.29714285714286,
+ "grad_norm": 14.17546272277832,
+ "learning_rate": 2.6336507936507938e-05,
+ "loss": 0.222,
+ "step": 4602
+ },
+ {
+ "epoch": 26.302857142857142,
+ "grad_norm": 109.38255310058594,
+ "learning_rate": 2.6330158730158734e-05,
+ "loss": 0.2071,
+ "step": 4603
+ },
+ {
+ "epoch": 26.30857142857143,
+ "grad_norm": 40.216156005859375,
+ "learning_rate": 2.6323809523809523e-05,
+ "loss": 0.2199,
+ "step": 4604
+ },
+ {
+ "epoch": 26.314285714285713,
+ "grad_norm": 69.14410400390625,
+ "learning_rate": 2.631746031746032e-05,
+ "loss": 0.3123,
+ "step": 4605
+ },
+ {
+ "epoch": 26.32,
+ "grad_norm": 55.33283233642578,
+ "learning_rate": 2.6311111111111115e-05,
+ "loss": 0.2798,
+ "step": 4606
+ },
+ {
+ "epoch": 26.325714285714287,
+ "grad_norm": 44.87740707397461,
+ "learning_rate": 2.6304761904761904e-05,
+ "loss": 0.3386,
+ "step": 4607
+ },
+ {
+ "epoch": 26.33142857142857,
+ "grad_norm": 54.66011047363281,
+ "learning_rate": 2.62984126984127e-05,
+ "loss": 0.2192,
+ "step": 4608
+ },
+ {
+ "epoch": 26.337142857142858,
+ "grad_norm": 44.17807388305664,
+ "learning_rate": 2.6292063492063496e-05,
+ "loss": 0.222,
+ "step": 4609
+ },
+ {
+ "epoch": 26.34285714285714,
+ "grad_norm": 61.853721618652344,
+ "learning_rate": 2.6285714285714286e-05,
+ "loss": 0.1909,
+ "step": 4610
+ },
+ {
+ "epoch": 26.34857142857143,
+ "grad_norm": 127.42079162597656,
+ "learning_rate": 2.6279365079365082e-05,
+ "loss": 0.2718,
+ "step": 4611
+ },
+ {
+ "epoch": 26.354285714285716,
+ "grad_norm": 35.26097869873047,
+ "learning_rate": 2.627301587301587e-05,
+ "loss": 0.3606,
+ "step": 4612
+ },
+ {
+ "epoch": 26.36,
+ "grad_norm": 32.439327239990234,
+ "learning_rate": 2.6266666666666667e-05,
+ "loss": 0.2152,
+ "step": 4613
+ },
+ {
+ "epoch": 26.365714285714287,
+ "grad_norm": 60.07107925415039,
+ "learning_rate": 2.6260317460317463e-05,
+ "loss": 0.2751,
+ "step": 4614
+ },
+ {
+ "epoch": 26.37142857142857,
+ "grad_norm": 591.93017578125,
+ "learning_rate": 2.6253968253968252e-05,
+ "loss": 0.4014,
+ "step": 4615
+ },
+ {
+ "epoch": 26.377142857142857,
+ "grad_norm": 45.13702392578125,
+ "learning_rate": 2.624761904761905e-05,
+ "loss": 0.3403,
+ "step": 4616
+ },
+ {
+ "epoch": 26.382857142857144,
+ "grad_norm": 52.521602630615234,
+ "learning_rate": 2.6241269841269844e-05,
+ "loss": 0.2272,
+ "step": 4617
+ },
+ {
+ "epoch": 26.388571428571428,
+ "grad_norm": 90.63419342041016,
+ "learning_rate": 2.6234920634920634e-05,
+ "loss": 0.266,
+ "step": 4618
+ },
+ {
+ "epoch": 26.394285714285715,
+ "grad_norm": 25.051990509033203,
+ "learning_rate": 2.622857142857143e-05,
+ "loss": 0.3204,
+ "step": 4619
+ },
+ {
+ "epoch": 26.4,
+ "grad_norm": 18.78619384765625,
+ "learning_rate": 2.6222222222222226e-05,
+ "loss": 0.4196,
+ "step": 4620
+ },
+ {
+ "epoch": 26.405714285714286,
+ "grad_norm": 35.6497688293457,
+ "learning_rate": 2.6215873015873015e-05,
+ "loss": 0.196,
+ "step": 4621
+ },
+ {
+ "epoch": 26.411428571428573,
+ "grad_norm": 65.88414001464844,
+ "learning_rate": 2.620952380952381e-05,
+ "loss": 0.158,
+ "step": 4622
+ },
+ {
+ "epoch": 26.417142857142856,
+ "grad_norm": 26.555614471435547,
+ "learning_rate": 2.62031746031746e-05,
+ "loss": 0.2567,
+ "step": 4623
+ },
+ {
+ "epoch": 26.422857142857143,
+ "grad_norm": 408.59307861328125,
+ "learning_rate": 2.6196825396825396e-05,
+ "loss": 0.3711,
+ "step": 4624
+ },
+ {
+ "epoch": 26.428571428571427,
+ "grad_norm": 83.31832122802734,
+ "learning_rate": 2.6190476190476192e-05,
+ "loss": 0.3444,
+ "step": 4625
+ },
+ {
+ "epoch": 26.434285714285714,
+ "grad_norm": 50.338050842285156,
+ "learning_rate": 2.6184126984126985e-05,
+ "loss": 0.3384,
+ "step": 4626
+ },
+ {
+ "epoch": 26.44,
+ "grad_norm": 37.324745178222656,
+ "learning_rate": 2.6177777777777777e-05,
+ "loss": 0.3156,
+ "step": 4627
+ },
+ {
+ "epoch": 26.445714285714285,
+ "grad_norm": 60.125518798828125,
+ "learning_rate": 2.6171428571428574e-05,
+ "loss": 0.2744,
+ "step": 4628
+ },
+ {
+ "epoch": 26.451428571428572,
+ "grad_norm": 35.94612503051758,
+ "learning_rate": 2.6165079365079366e-05,
+ "loss": 0.2808,
+ "step": 4629
+ },
+ {
+ "epoch": 26.457142857142856,
+ "grad_norm": 50.03565979003906,
+ "learning_rate": 2.615873015873016e-05,
+ "loss": 0.3164,
+ "step": 4630
+ },
+ {
+ "epoch": 26.462857142857143,
+ "grad_norm": 36.93522262573242,
+ "learning_rate": 2.6152380952380955e-05,
+ "loss": 0.3094,
+ "step": 4631
+ },
+ {
+ "epoch": 26.46857142857143,
+ "grad_norm": 47.484981536865234,
+ "learning_rate": 2.6146031746031747e-05,
+ "loss": 0.1839,
+ "step": 4632
+ },
+ {
+ "epoch": 26.474285714285713,
+ "grad_norm": 35.224239349365234,
+ "learning_rate": 2.613968253968254e-05,
+ "loss": 0.3052,
+ "step": 4633
+ },
+ {
+ "epoch": 26.48,
+ "grad_norm": 41.6689567565918,
+ "learning_rate": 2.6133333333333333e-05,
+ "loss": 0.1908,
+ "step": 4634
+ },
+ {
+ "epoch": 26.485714285714284,
+ "grad_norm": 91.45073699951172,
+ "learning_rate": 2.612698412698413e-05,
+ "loss": 0.232,
+ "step": 4635
+ },
+ {
+ "epoch": 26.49142857142857,
+ "grad_norm": 41.19544982910156,
+ "learning_rate": 2.6120634920634925e-05,
+ "loss": 0.4264,
+ "step": 4636
+ },
+ {
+ "epoch": 26.497142857142858,
+ "grad_norm": 21.225603103637695,
+ "learning_rate": 2.6114285714285714e-05,
+ "loss": 0.2383,
+ "step": 4637
+ },
+ {
+ "epoch": 26.502857142857142,
+ "grad_norm": 72.92811584472656,
+ "learning_rate": 2.610793650793651e-05,
+ "loss": 0.3611,
+ "step": 4638
+ },
+ {
+ "epoch": 26.50857142857143,
+ "grad_norm": 144.8211669921875,
+ "learning_rate": 2.6101587301587306e-05,
+ "loss": 0.3354,
+ "step": 4639
+ },
+ {
+ "epoch": 26.514285714285712,
+ "grad_norm": 232.2416229248047,
+ "learning_rate": 2.6095238095238095e-05,
+ "loss": 0.2467,
+ "step": 4640
+ },
+ {
+ "epoch": 26.52,
+ "grad_norm": 29.52247428894043,
+ "learning_rate": 2.608888888888889e-05,
+ "loss": 0.2913,
+ "step": 4641
+ },
+ {
+ "epoch": 26.525714285714287,
+ "grad_norm": 1025.2799072265625,
+ "learning_rate": 2.6082539682539687e-05,
+ "loss": 0.3663,
+ "step": 4642
+ },
+ {
+ "epoch": 26.53142857142857,
+ "grad_norm": 34.98089599609375,
+ "learning_rate": 2.6076190476190477e-05,
+ "loss": 0.3618,
+ "step": 4643
+ },
+ {
+ "epoch": 26.537142857142857,
+ "grad_norm": 24.808778762817383,
+ "learning_rate": 2.6069841269841273e-05,
+ "loss": 0.4146,
+ "step": 4644
+ },
+ {
+ "epoch": 26.542857142857144,
+ "grad_norm": 39.9876708984375,
+ "learning_rate": 2.6063492063492062e-05,
+ "loss": 0.2713,
+ "step": 4645
+ },
+ {
+ "epoch": 26.548571428571428,
+ "grad_norm": 32.00506591796875,
+ "learning_rate": 2.6057142857142858e-05,
+ "loss": 0.3541,
+ "step": 4646
+ },
+ {
+ "epoch": 26.554285714285715,
+ "grad_norm": 19.72596549987793,
+ "learning_rate": 2.6050793650793654e-05,
+ "loss": 0.2096,
+ "step": 4647
+ },
+ {
+ "epoch": 26.56,
+ "grad_norm": 43.73262023925781,
+ "learning_rate": 2.6044444444444443e-05,
+ "loss": 0.282,
+ "step": 4648
+ },
+ {
+ "epoch": 26.565714285714286,
+ "grad_norm": 47.079261779785156,
+ "learning_rate": 2.603809523809524e-05,
+ "loss": 0.3678,
+ "step": 4649
+ },
+ {
+ "epoch": 26.571428571428573,
+ "grad_norm": 43.06452560424805,
+ "learning_rate": 2.6031746031746035e-05,
+ "loss": 0.3325,
+ "step": 4650
+ },
+ {
+ "epoch": 26.577142857142857,
+ "grad_norm": 69.7562255859375,
+ "learning_rate": 2.6025396825396824e-05,
+ "loss": 0.3022,
+ "step": 4651
+ },
+ {
+ "epoch": 26.582857142857144,
+ "grad_norm": 60.522830963134766,
+ "learning_rate": 2.601904761904762e-05,
+ "loss": 0.325,
+ "step": 4652
+ },
+ {
+ "epoch": 26.588571428571427,
+ "grad_norm": 336.8451843261719,
+ "learning_rate": 2.6012698412698417e-05,
+ "loss": 0.3255,
+ "step": 4653
+ },
+ {
+ "epoch": 26.594285714285714,
+ "grad_norm": 34.48298263549805,
+ "learning_rate": 2.6006349206349206e-05,
+ "loss": 0.4453,
+ "step": 4654
+ },
+ {
+ "epoch": 26.6,
+ "grad_norm": 53.4842529296875,
+ "learning_rate": 2.6000000000000002e-05,
+ "loss": 0.3246,
+ "step": 4655
+ },
+ {
+ "epoch": 26.605714285714285,
+ "grad_norm": 36.113494873046875,
+ "learning_rate": 2.599365079365079e-05,
+ "loss": 0.3546,
+ "step": 4656
+ },
+ {
+ "epoch": 26.611428571428572,
+ "grad_norm": 86.10785675048828,
+ "learning_rate": 2.5987301587301587e-05,
+ "loss": 0.2551,
+ "step": 4657
+ },
+ {
+ "epoch": 26.617142857142856,
+ "grad_norm": 98.80118560791016,
+ "learning_rate": 2.5980952380952383e-05,
+ "loss": 0.3381,
+ "step": 4658
+ },
+ {
+ "epoch": 26.622857142857143,
+ "grad_norm": 46.648921966552734,
+ "learning_rate": 2.5974603174603172e-05,
+ "loss": 0.28,
+ "step": 4659
+ },
+ {
+ "epoch": 26.62857142857143,
+ "grad_norm": 59.09379577636719,
+ "learning_rate": 2.596825396825397e-05,
+ "loss": 0.2914,
+ "step": 4660
+ },
+ {
+ "epoch": 26.634285714285713,
+ "grad_norm": 166.9357147216797,
+ "learning_rate": 2.5961904761904764e-05,
+ "loss": 0.356,
+ "step": 4661
+ },
+ {
+ "epoch": 26.64,
+ "grad_norm": 119.84436798095703,
+ "learning_rate": 2.5955555555555554e-05,
+ "loss": 0.2871,
+ "step": 4662
+ },
+ {
+ "epoch": 26.645714285714284,
+ "grad_norm": 21.690603256225586,
+ "learning_rate": 2.594920634920635e-05,
+ "loss": 0.2786,
+ "step": 4663
+ },
+ {
+ "epoch": 26.65142857142857,
+ "grad_norm": 77.54032135009766,
+ "learning_rate": 2.5942857142857146e-05,
+ "loss": 0.3975,
+ "step": 4664
+ },
+ {
+ "epoch": 26.65714285714286,
+ "grad_norm": 54.16943359375,
+ "learning_rate": 2.593650793650794e-05,
+ "loss": 0.2498,
+ "step": 4665
+ },
+ {
+ "epoch": 26.662857142857142,
+ "grad_norm": 30.922557830810547,
+ "learning_rate": 2.593015873015873e-05,
+ "loss": 0.2723,
+ "step": 4666
+ },
+ {
+ "epoch": 26.66857142857143,
+ "grad_norm": 67.56193542480469,
+ "learning_rate": 2.5923809523809524e-05,
+ "loss": 0.3129,
+ "step": 4667
+ },
+ {
+ "epoch": 26.674285714285713,
+ "grad_norm": 38.84308624267578,
+ "learning_rate": 2.591746031746032e-05,
+ "loss": 0.4121,
+ "step": 4668
+ },
+ {
+ "epoch": 26.68,
+ "grad_norm": 21.277053833007812,
+ "learning_rate": 2.5911111111111112e-05,
+ "loss": 0.2938,
+ "step": 4669
+ },
+ {
+ "epoch": 26.685714285714287,
+ "grad_norm": 45.40497589111328,
+ "learning_rate": 2.5904761904761905e-05,
+ "loss": 0.3152,
+ "step": 4670
+ },
+ {
+ "epoch": 26.69142857142857,
+ "grad_norm": 42.738285064697266,
+ "learning_rate": 2.58984126984127e-05,
+ "loss": 0.2402,
+ "step": 4671
+ },
+ {
+ "epoch": 26.697142857142858,
+ "grad_norm": 26.55962562561035,
+ "learning_rate": 2.5892063492063497e-05,
+ "loss": 0.4168,
+ "step": 4672
+ },
+ {
+ "epoch": 26.70285714285714,
+ "grad_norm": 68.99372863769531,
+ "learning_rate": 2.5885714285714286e-05,
+ "loss": 0.1987,
+ "step": 4673
+ },
+ {
+ "epoch": 26.708571428571428,
+ "grad_norm": 30.963071823120117,
+ "learning_rate": 2.5879365079365082e-05,
+ "loss": 0.2378,
+ "step": 4674
+ },
+ {
+ "epoch": 26.714285714285715,
+ "grad_norm": 127.34288787841797,
+ "learning_rate": 2.5873015873015878e-05,
+ "loss": 0.2533,
+ "step": 4675
+ },
+ {
+ "epoch": 26.72,
+ "grad_norm": 41.56283950805664,
+ "learning_rate": 2.5866666666666667e-05,
+ "loss": 0.2984,
+ "step": 4676
+ },
+ {
+ "epoch": 26.725714285714286,
+ "grad_norm": 36.975067138671875,
+ "learning_rate": 2.5860317460317463e-05,
+ "loss": 0.3104,
+ "step": 4677
+ },
+ {
+ "epoch": 26.731428571428573,
+ "grad_norm": 29.948862075805664,
+ "learning_rate": 2.5853968253968253e-05,
+ "loss": 0.2908,
+ "step": 4678
+ },
+ {
+ "epoch": 26.737142857142857,
+ "grad_norm": 88.34810638427734,
+ "learning_rate": 2.584761904761905e-05,
+ "loss": 0.276,
+ "step": 4679
+ },
+ {
+ "epoch": 26.742857142857144,
+ "grad_norm": 72.71587371826172,
+ "learning_rate": 2.5841269841269845e-05,
+ "loss": 0.2923,
+ "step": 4680
+ },
+ {
+ "epoch": 26.748571428571427,
+ "grad_norm": 219.08258056640625,
+ "learning_rate": 2.5834920634920634e-05,
+ "loss": 0.3756,
+ "step": 4681
+ },
+ {
+ "epoch": 26.754285714285714,
+ "grad_norm": 60.98505783081055,
+ "learning_rate": 2.582857142857143e-05,
+ "loss": 0.2535,
+ "step": 4682
+ },
+ {
+ "epoch": 26.76,
+ "grad_norm": 25.74429702758789,
+ "learning_rate": 2.5822222222222226e-05,
+ "loss": 0.2586,
+ "step": 4683
+ },
+ {
+ "epoch": 26.765714285714285,
+ "grad_norm": 33.48048782348633,
+ "learning_rate": 2.5815873015873015e-05,
+ "loss": 0.3317,
+ "step": 4684
+ },
+ {
+ "epoch": 26.771428571428572,
+ "grad_norm": 25.574949264526367,
+ "learning_rate": 2.580952380952381e-05,
+ "loss": 0.447,
+ "step": 4685
+ },
+ {
+ "epoch": 26.777142857142856,
+ "grad_norm": 341.3732604980469,
+ "learning_rate": 2.5803174603174607e-05,
+ "loss": 0.2951,
+ "step": 4686
+ },
+ {
+ "epoch": 26.782857142857143,
+ "grad_norm": 131.79299926757812,
+ "learning_rate": 2.5796825396825397e-05,
+ "loss": 0.3094,
+ "step": 4687
+ },
+ {
+ "epoch": 26.78857142857143,
+ "grad_norm": 56.57413864135742,
+ "learning_rate": 2.5790476190476193e-05,
+ "loss": 0.2468,
+ "step": 4688
+ },
+ {
+ "epoch": 26.794285714285714,
+ "grad_norm": 47.77552795410156,
+ "learning_rate": 2.5784126984126982e-05,
+ "loss": 0.3021,
+ "step": 4689
+ },
+ {
+ "epoch": 26.8,
+ "grad_norm": 29.353164672851562,
+ "learning_rate": 2.5777777777777778e-05,
+ "loss": 0.2925,
+ "step": 4690
+ },
+ {
+ "epoch": 26.805714285714284,
+ "grad_norm": 54.12332534790039,
+ "learning_rate": 2.5771428571428574e-05,
+ "loss": 0.2763,
+ "step": 4691
+ },
+ {
+ "epoch": 26.81142857142857,
+ "grad_norm": 48.31441116333008,
+ "learning_rate": 2.5765079365079363e-05,
+ "loss": 0.3282,
+ "step": 4692
+ },
+ {
+ "epoch": 26.81714285714286,
+ "grad_norm": 55.70726776123047,
+ "learning_rate": 2.575873015873016e-05,
+ "loss": 0.3505,
+ "step": 4693
+ },
+ {
+ "epoch": 26.822857142857142,
+ "grad_norm": 18.00375747680664,
+ "learning_rate": 2.5752380952380955e-05,
+ "loss": 0.2845,
+ "step": 4694
+ },
+ {
+ "epoch": 26.82857142857143,
+ "grad_norm": 19.55474281311035,
+ "learning_rate": 2.5746031746031744e-05,
+ "loss": 0.3077,
+ "step": 4695
+ },
+ {
+ "epoch": 26.834285714285713,
+ "grad_norm": 57.00762176513672,
+ "learning_rate": 2.573968253968254e-05,
+ "loss": 0.3885,
+ "step": 4696
+ },
+ {
+ "epoch": 26.84,
+ "grad_norm": 437.8184814453125,
+ "learning_rate": 2.5733333333333337e-05,
+ "loss": 0.3292,
+ "step": 4697
+ },
+ {
+ "epoch": 26.845714285714287,
+ "grad_norm": 22.36211395263672,
+ "learning_rate": 2.5726984126984126e-05,
+ "loss": 0.2513,
+ "step": 4698
+ },
+ {
+ "epoch": 26.85142857142857,
+ "grad_norm": 71.45623016357422,
+ "learning_rate": 2.5720634920634922e-05,
+ "loss": 0.3406,
+ "step": 4699
+ },
+ {
+ "epoch": 26.857142857142858,
+ "grad_norm": 43.11255645751953,
+ "learning_rate": 2.5714285714285714e-05,
+ "loss": 0.2955,
+ "step": 4700
+ },
+ {
+ "epoch": 26.86285714285714,
+ "grad_norm": 30.780973434448242,
+ "learning_rate": 2.5707936507936507e-05,
+ "loss": 0.2453,
+ "step": 4701
+ },
+ {
+ "epoch": 26.86857142857143,
+ "grad_norm": 23.691740036010742,
+ "learning_rate": 2.5701587301587303e-05,
+ "loss": 0.2932,
+ "step": 4702
+ },
+ {
+ "epoch": 26.874285714285715,
+ "grad_norm": 30.1962890625,
+ "learning_rate": 2.5695238095238096e-05,
+ "loss": 0.2652,
+ "step": 4703
+ },
+ {
+ "epoch": 26.88,
+ "grad_norm": 60.63984680175781,
+ "learning_rate": 2.5688888888888892e-05,
+ "loss": 0.2017,
+ "step": 4704
+ },
+ {
+ "epoch": 26.885714285714286,
+ "grad_norm": 27.217247009277344,
+ "learning_rate": 2.5682539682539684e-05,
+ "loss": 0.3772,
+ "step": 4705
+ },
+ {
+ "epoch": 26.89142857142857,
+ "grad_norm": 35.93013000488281,
+ "learning_rate": 2.5676190476190477e-05,
+ "loss": 0.2693,
+ "step": 4706
+ },
+ {
+ "epoch": 26.897142857142857,
+ "grad_norm": 230.4045867919922,
+ "learning_rate": 2.5669841269841273e-05,
+ "loss": 0.6108,
+ "step": 4707
+ },
+ {
+ "epoch": 26.902857142857144,
+ "grad_norm": 40.505592346191406,
+ "learning_rate": 2.5663492063492066e-05,
+ "loss": 0.3169,
+ "step": 4708
+ },
+ {
+ "epoch": 26.908571428571427,
+ "grad_norm": 20.808183670043945,
+ "learning_rate": 2.565714285714286e-05,
+ "loss": 0.3663,
+ "step": 4709
+ },
+ {
+ "epoch": 26.914285714285715,
+ "grad_norm": 71.28199768066406,
+ "learning_rate": 2.5650793650793654e-05,
+ "loss": 0.2616,
+ "step": 4710
+ },
+ {
+ "epoch": 26.92,
+ "grad_norm": 46.240116119384766,
+ "learning_rate": 2.5644444444444444e-05,
+ "loss": 0.1703,
+ "step": 4711
+ },
+ {
+ "epoch": 26.925714285714285,
+ "grad_norm": 58.91728591918945,
+ "learning_rate": 2.563809523809524e-05,
+ "loss": 0.2939,
+ "step": 4712
+ },
+ {
+ "epoch": 26.931428571428572,
+ "grad_norm": 24.967037200927734,
+ "learning_rate": 2.5631746031746036e-05,
+ "loss": 0.2953,
+ "step": 4713
+ },
+ {
+ "epoch": 26.937142857142856,
+ "grad_norm": 62.452796936035156,
+ "learning_rate": 2.5625396825396825e-05,
+ "loss": 0.2894,
+ "step": 4714
+ },
+ {
+ "epoch": 26.942857142857143,
+ "grad_norm": 41.568992614746094,
+ "learning_rate": 2.561904761904762e-05,
+ "loss": 0.2523,
+ "step": 4715
+ },
+ {
+ "epoch": 26.94857142857143,
+ "grad_norm": 29.147098541259766,
+ "learning_rate": 2.5612698412698417e-05,
+ "loss": 0.2588,
+ "step": 4716
+ },
+ {
+ "epoch": 26.954285714285714,
+ "grad_norm": 52.80727005004883,
+ "learning_rate": 2.5606349206349206e-05,
+ "loss": 0.1472,
+ "step": 4717
+ },
+ {
+ "epoch": 26.96,
+ "grad_norm": 28.822208404541016,
+ "learning_rate": 2.5600000000000002e-05,
+ "loss": 0.3417,
+ "step": 4718
+ },
+ {
+ "epoch": 26.965714285714284,
+ "grad_norm": 103.94974517822266,
+ "learning_rate": 2.5593650793650798e-05,
+ "loss": 0.2922,
+ "step": 4719
+ },
+ {
+ "epoch": 26.97142857142857,
+ "grad_norm": 45.41557693481445,
+ "learning_rate": 2.5587301587301588e-05,
+ "loss": 0.2904,
+ "step": 4720
+ },
+ {
+ "epoch": 26.97714285714286,
+ "grad_norm": 53.98274612426758,
+ "learning_rate": 2.5580952380952384e-05,
+ "loss": 0.309,
+ "step": 4721
+ },
+ {
+ "epoch": 26.982857142857142,
+ "grad_norm": 56.60956573486328,
+ "learning_rate": 2.5574603174603173e-05,
+ "loss": 0.3258,
+ "step": 4722
+ },
+ {
+ "epoch": 26.98857142857143,
+ "grad_norm": 34.62663269042969,
+ "learning_rate": 2.556825396825397e-05,
+ "loss": 0.2096,
+ "step": 4723
+ },
+ {
+ "epoch": 26.994285714285713,
+ "grad_norm": 62.96559143066406,
+ "learning_rate": 2.5561904761904765e-05,
+ "loss": 0.3225,
+ "step": 4724
+ },
+ {
+ "epoch": 27.0,
+ "grad_norm": 43.35622024536133,
+ "learning_rate": 2.5555555555555554e-05,
+ "loss": 0.26,
+ "step": 4725
+ },
+ {
+ "epoch": 27.0,
+ "eval_classes": 0,
+ "eval_loss": 0.617247998714447,
+ "eval_map": 0.9077,
+ "eval_map_50": 0.9546,
+ "eval_map_75": 0.9384,
+ "eval_map_large": 0.9083,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9077,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7781,
+ "eval_mar_10": 0.9587,
+ "eval_mar_100": 0.9663,
+ "eval_mar_100_per_class": 0.9663,
+ "eval_mar_large": 0.9663,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.9606,
+ "eval_samples_per_second": 21.059,
+ "eval_steps_per_second": 2.65,
+ "step": 4725
+ },
+ {
+ "epoch": 27.005714285714287,
+ "grad_norm": 264.7518005371094,
+ "learning_rate": 2.554920634920635e-05,
+ "loss": 0.2647,
+ "step": 4726
+ },
+ {
+ "epoch": 27.01142857142857,
+ "grad_norm": 61.20986557006836,
+ "learning_rate": 2.5542857142857146e-05,
+ "loss": 0.2843,
+ "step": 4727
+ },
+ {
+ "epoch": 27.017142857142858,
+ "grad_norm": 46.27261734008789,
+ "learning_rate": 2.5536507936507935e-05,
+ "loss": 0.4267,
+ "step": 4728
+ },
+ {
+ "epoch": 27.02285714285714,
+ "grad_norm": 36.558563232421875,
+ "learning_rate": 2.553015873015873e-05,
+ "loss": 0.2965,
+ "step": 4729
+ },
+ {
+ "epoch": 27.02857142857143,
+ "grad_norm": 39.53204345703125,
+ "learning_rate": 2.5523809523809527e-05,
+ "loss": 0.3747,
+ "step": 4730
+ },
+ {
+ "epoch": 27.034285714285716,
+ "grad_norm": 16.22458839416504,
+ "learning_rate": 2.5517460317460317e-05,
+ "loss": 0.2887,
+ "step": 4731
+ },
+ {
+ "epoch": 27.04,
+ "grad_norm": 81.15853881835938,
+ "learning_rate": 2.5511111111111113e-05,
+ "loss": 0.3398,
+ "step": 4732
+ },
+ {
+ "epoch": 27.045714285714286,
+ "grad_norm": 110.32965087890625,
+ "learning_rate": 2.5504761904761902e-05,
+ "loss": 0.3053,
+ "step": 4733
+ },
+ {
+ "epoch": 27.05142857142857,
+ "grad_norm": 47.38296127319336,
+ "learning_rate": 2.5498412698412698e-05,
+ "loss": 0.2774,
+ "step": 4734
+ },
+ {
+ "epoch": 27.057142857142857,
+ "grad_norm": 1103.1126708984375,
+ "learning_rate": 2.5492063492063494e-05,
+ "loss": 0.3639,
+ "step": 4735
+ },
+ {
+ "epoch": 27.062857142857144,
+ "grad_norm": 128.23031616210938,
+ "learning_rate": 2.5485714285714287e-05,
+ "loss": 0.2693,
+ "step": 4736
+ },
+ {
+ "epoch": 27.068571428571428,
+ "grad_norm": 51.42526626586914,
+ "learning_rate": 2.547936507936508e-05,
+ "loss": 0.297,
+ "step": 4737
+ },
+ {
+ "epoch": 27.074285714285715,
+ "grad_norm": 43.38787841796875,
+ "learning_rate": 2.5473015873015875e-05,
+ "loss": 0.2194,
+ "step": 4738
+ },
+ {
+ "epoch": 27.08,
+ "grad_norm": 108.1216812133789,
+ "learning_rate": 2.5466666666666668e-05,
+ "loss": 0.4258,
+ "step": 4739
+ },
+ {
+ "epoch": 27.085714285714285,
+ "grad_norm": 42.919464111328125,
+ "learning_rate": 2.546031746031746e-05,
+ "loss": 0.3384,
+ "step": 4740
+ },
+ {
+ "epoch": 27.091428571428573,
+ "grad_norm": 42.97194290161133,
+ "learning_rate": 2.5453968253968257e-05,
+ "loss": 0.2781,
+ "step": 4741
+ },
+ {
+ "epoch": 27.097142857142856,
+ "grad_norm": 50.67507553100586,
+ "learning_rate": 2.544761904761905e-05,
+ "loss": 0.3613,
+ "step": 4742
+ },
+ {
+ "epoch": 27.102857142857143,
+ "grad_norm": 1063.8197021484375,
+ "learning_rate": 2.5441269841269845e-05,
+ "loss": 0.4082,
+ "step": 4743
+ },
+ {
+ "epoch": 27.10857142857143,
+ "grad_norm": 39.627647399902344,
+ "learning_rate": 2.5434920634920634e-05,
+ "loss": 0.4711,
+ "step": 4744
+ },
+ {
+ "epoch": 27.114285714285714,
+ "grad_norm": 92.78875732421875,
+ "learning_rate": 2.542857142857143e-05,
+ "loss": 0.3292,
+ "step": 4745
+ },
+ {
+ "epoch": 27.12,
+ "grad_norm": 25.53900718688965,
+ "learning_rate": 2.5422222222222227e-05,
+ "loss": 0.2453,
+ "step": 4746
+ },
+ {
+ "epoch": 27.125714285714285,
+ "grad_norm": 221.59703063964844,
+ "learning_rate": 2.5415873015873016e-05,
+ "loss": 0.253,
+ "step": 4747
+ },
+ {
+ "epoch": 27.13142857142857,
+ "grad_norm": 83.64018249511719,
+ "learning_rate": 2.5409523809523812e-05,
+ "loss": 0.2473,
+ "step": 4748
+ },
+ {
+ "epoch": 27.13714285714286,
+ "grad_norm": 205.5132293701172,
+ "learning_rate": 2.5403174603174608e-05,
+ "loss": 0.2953,
+ "step": 4749
+ },
+ {
+ "epoch": 27.142857142857142,
+ "grad_norm": 50.870399475097656,
+ "learning_rate": 2.5396825396825397e-05,
+ "loss": 0.3832,
+ "step": 4750
+ },
+ {
+ "epoch": 27.14857142857143,
+ "grad_norm": 62.747032165527344,
+ "learning_rate": 2.5390476190476193e-05,
+ "loss": 0.2428,
+ "step": 4751
+ },
+ {
+ "epoch": 27.154285714285713,
+ "grad_norm": 212.9983367919922,
+ "learning_rate": 2.538412698412699e-05,
+ "loss": 0.2652,
+ "step": 4752
+ },
+ {
+ "epoch": 27.16,
+ "grad_norm": 72.1238784790039,
+ "learning_rate": 2.537777777777778e-05,
+ "loss": 0.289,
+ "step": 4753
+ },
+ {
+ "epoch": 27.165714285714287,
+ "grad_norm": 52.40901184082031,
+ "learning_rate": 2.5371428571428574e-05,
+ "loss": 0.2059,
+ "step": 4754
+ },
+ {
+ "epoch": 27.17142857142857,
+ "grad_norm": 74.59566497802734,
+ "learning_rate": 2.5365079365079364e-05,
+ "loss": 0.4205,
+ "step": 4755
+ },
+ {
+ "epoch": 27.177142857142858,
+ "grad_norm": 584.8218383789062,
+ "learning_rate": 2.535873015873016e-05,
+ "loss": 0.332,
+ "step": 4756
+ },
+ {
+ "epoch": 27.18285714285714,
+ "grad_norm": 49.488468170166016,
+ "learning_rate": 2.5352380952380956e-05,
+ "loss": 0.2476,
+ "step": 4757
+ },
+ {
+ "epoch": 27.18857142857143,
+ "grad_norm": 34.9415397644043,
+ "learning_rate": 2.5346031746031745e-05,
+ "loss": 0.3947,
+ "step": 4758
+ },
+ {
+ "epoch": 27.194285714285716,
+ "grad_norm": 36.94355392456055,
+ "learning_rate": 2.533968253968254e-05,
+ "loss": 0.3157,
+ "step": 4759
+ },
+ {
+ "epoch": 27.2,
+ "grad_norm": 66.73297119140625,
+ "learning_rate": 2.5333333333333337e-05,
+ "loss": 0.2973,
+ "step": 4760
+ },
+ {
+ "epoch": 27.205714285714286,
+ "grad_norm": 38.81161117553711,
+ "learning_rate": 2.5326984126984126e-05,
+ "loss": 0.3355,
+ "step": 4761
+ },
+ {
+ "epoch": 27.21142857142857,
+ "grad_norm": 42.80949401855469,
+ "learning_rate": 2.5320634920634922e-05,
+ "loss": 0.2284,
+ "step": 4762
+ },
+ {
+ "epoch": 27.217142857142857,
+ "grad_norm": 69.96595764160156,
+ "learning_rate": 2.5314285714285718e-05,
+ "loss": 0.2513,
+ "step": 4763
+ },
+ {
+ "epoch": 27.222857142857144,
+ "grad_norm": 81.86923217773438,
+ "learning_rate": 2.5307936507936508e-05,
+ "loss": 0.2305,
+ "step": 4764
+ },
+ {
+ "epoch": 27.228571428571428,
+ "grad_norm": 26.642152786254883,
+ "learning_rate": 2.5301587301587304e-05,
+ "loss": 0.2809,
+ "step": 4765
+ },
+ {
+ "epoch": 27.234285714285715,
+ "grad_norm": 41.40145492553711,
+ "learning_rate": 2.5295238095238093e-05,
+ "loss": 0.1789,
+ "step": 4766
+ },
+ {
+ "epoch": 27.24,
+ "grad_norm": 41.42937088012695,
+ "learning_rate": 2.528888888888889e-05,
+ "loss": 0.2544,
+ "step": 4767
+ },
+ {
+ "epoch": 27.245714285714286,
+ "grad_norm": 70.97283935546875,
+ "learning_rate": 2.5282539682539685e-05,
+ "loss": 0.3121,
+ "step": 4768
+ },
+ {
+ "epoch": 27.251428571428573,
+ "grad_norm": 116.86566925048828,
+ "learning_rate": 2.5276190476190474e-05,
+ "loss": 0.2777,
+ "step": 4769
+ },
+ {
+ "epoch": 27.257142857142856,
+ "grad_norm": 34.88475036621094,
+ "learning_rate": 2.526984126984127e-05,
+ "loss": 0.309,
+ "step": 4770
+ },
+ {
+ "epoch": 27.262857142857143,
+ "grad_norm": 146.3785400390625,
+ "learning_rate": 2.5263492063492066e-05,
+ "loss": 0.3011,
+ "step": 4771
+ },
+ {
+ "epoch": 27.268571428571427,
+ "grad_norm": 118.60247802734375,
+ "learning_rate": 2.5257142857142855e-05,
+ "loss": 0.2069,
+ "step": 4772
+ },
+ {
+ "epoch": 27.274285714285714,
+ "grad_norm": 22.586828231811523,
+ "learning_rate": 2.525079365079365e-05,
+ "loss": 0.3537,
+ "step": 4773
+ },
+ {
+ "epoch": 27.28,
+ "grad_norm": 50.62820816040039,
+ "learning_rate": 2.5244444444444447e-05,
+ "loss": 0.208,
+ "step": 4774
+ },
+ {
+ "epoch": 27.285714285714285,
+ "grad_norm": 56.44573974609375,
+ "learning_rate": 2.523809523809524e-05,
+ "loss": 0.3518,
+ "step": 4775
+ },
+ {
+ "epoch": 27.291428571428572,
+ "grad_norm": 371.9031982421875,
+ "learning_rate": 2.5231746031746033e-05,
+ "loss": 0.2639,
+ "step": 4776
+ },
+ {
+ "epoch": 27.29714285714286,
+ "grad_norm": 23.763046264648438,
+ "learning_rate": 2.5225396825396825e-05,
+ "loss": 0.312,
+ "step": 4777
+ },
+ {
+ "epoch": 27.302857142857142,
+ "grad_norm": 21.81557846069336,
+ "learning_rate": 2.521904761904762e-05,
+ "loss": 0.2612,
+ "step": 4778
+ },
+ {
+ "epoch": 27.30857142857143,
+ "grad_norm": 41.35017776489258,
+ "learning_rate": 2.5212698412698414e-05,
+ "loss": 0.2606,
+ "step": 4779
+ },
+ {
+ "epoch": 27.314285714285713,
+ "grad_norm": 82.43899536132812,
+ "learning_rate": 2.5206349206349207e-05,
+ "loss": 0.3153,
+ "step": 4780
+ },
+ {
+ "epoch": 27.32,
+ "grad_norm": 61.49275207519531,
+ "learning_rate": 2.5200000000000003e-05,
+ "loss": 0.2535,
+ "step": 4781
+ },
+ {
+ "epoch": 27.325714285714287,
+ "grad_norm": 20.077241897583008,
+ "learning_rate": 2.51936507936508e-05,
+ "loss": 0.2586,
+ "step": 4782
+ },
+ {
+ "epoch": 27.33142857142857,
+ "grad_norm": 31.274147033691406,
+ "learning_rate": 2.5187301587301588e-05,
+ "loss": 0.3254,
+ "step": 4783
+ },
+ {
+ "epoch": 27.337142857142858,
+ "grad_norm": 414.2837829589844,
+ "learning_rate": 2.5180952380952384e-05,
+ "loss": 0.3036,
+ "step": 4784
+ },
+ {
+ "epoch": 27.34285714285714,
+ "grad_norm": 60.34440612792969,
+ "learning_rate": 2.517460317460318e-05,
+ "loss": 0.2722,
+ "step": 4785
+ },
+ {
+ "epoch": 27.34857142857143,
+ "grad_norm": 56.29039764404297,
+ "learning_rate": 2.516825396825397e-05,
+ "loss": 0.4124,
+ "step": 4786
+ },
+ {
+ "epoch": 27.354285714285716,
+ "grad_norm": 90.10543823242188,
+ "learning_rate": 2.5161904761904765e-05,
+ "loss": 0.2382,
+ "step": 4787
+ },
+ {
+ "epoch": 27.36,
+ "grad_norm": 49.912086486816406,
+ "learning_rate": 2.5155555555555555e-05,
+ "loss": 0.2819,
+ "step": 4788
+ },
+ {
+ "epoch": 27.365714285714287,
+ "grad_norm": 66.22119903564453,
+ "learning_rate": 2.514920634920635e-05,
+ "loss": 0.2414,
+ "step": 4789
+ },
+ {
+ "epoch": 27.37142857142857,
+ "grad_norm": 53.957550048828125,
+ "learning_rate": 2.5142857142857147e-05,
+ "loss": 0.3447,
+ "step": 4790
+ },
+ {
+ "epoch": 27.377142857142857,
+ "grad_norm": 51.856266021728516,
+ "learning_rate": 2.5136507936507936e-05,
+ "loss": 0.2213,
+ "step": 4791
+ },
+ {
+ "epoch": 27.382857142857144,
+ "grad_norm": 33.7740478515625,
+ "learning_rate": 2.5130158730158732e-05,
+ "loss": 0.2832,
+ "step": 4792
+ },
+ {
+ "epoch": 27.388571428571428,
+ "grad_norm": 36.47516632080078,
+ "learning_rate": 2.5123809523809528e-05,
+ "loss": 0.288,
+ "step": 4793
+ },
+ {
+ "epoch": 27.394285714285715,
+ "grad_norm": 43.95291519165039,
+ "learning_rate": 2.5117460317460317e-05,
+ "loss": 0.2727,
+ "step": 4794
+ },
+ {
+ "epoch": 27.4,
+ "grad_norm": 54.80048370361328,
+ "learning_rate": 2.5111111111111113e-05,
+ "loss": 0.276,
+ "step": 4795
+ },
+ {
+ "epoch": 27.405714285714286,
+ "grad_norm": 231.54710388183594,
+ "learning_rate": 2.510476190476191e-05,
+ "loss": 0.2558,
+ "step": 4796
+ },
+ {
+ "epoch": 27.411428571428573,
+ "grad_norm": 56.0648307800293,
+ "learning_rate": 2.50984126984127e-05,
+ "loss": 0.2427,
+ "step": 4797
+ },
+ {
+ "epoch": 27.417142857142856,
+ "grad_norm": 75.80994415283203,
+ "learning_rate": 2.5092063492063494e-05,
+ "loss": 0.3139,
+ "step": 4798
+ },
+ {
+ "epoch": 27.422857142857143,
+ "grad_norm": 34.60862731933594,
+ "learning_rate": 2.5085714285714284e-05,
+ "loss": 0.2255,
+ "step": 4799
+ },
+ {
+ "epoch": 27.428571428571427,
+ "grad_norm": 30.5380859375,
+ "learning_rate": 2.507936507936508e-05,
+ "loss": 0.3061,
+ "step": 4800
+ },
+ {
+ "epoch": 27.434285714285714,
+ "grad_norm": 25.800336837768555,
+ "learning_rate": 2.5073015873015876e-05,
+ "loss": 0.2475,
+ "step": 4801
+ },
+ {
+ "epoch": 27.44,
+ "grad_norm": 31.043546676635742,
+ "learning_rate": 2.5066666666666665e-05,
+ "loss": 0.3676,
+ "step": 4802
+ },
+ {
+ "epoch": 27.445714285714285,
+ "grad_norm": 45.71298599243164,
+ "learning_rate": 2.506031746031746e-05,
+ "loss": 0.2559,
+ "step": 4803
+ },
+ {
+ "epoch": 27.451428571428572,
+ "grad_norm": 24.04212760925293,
+ "learning_rate": 2.5053968253968257e-05,
+ "loss": 0.3555,
+ "step": 4804
+ },
+ {
+ "epoch": 27.457142857142856,
+ "grad_norm": 49.58759689331055,
+ "learning_rate": 2.5047619047619046e-05,
+ "loss": 0.3161,
+ "step": 4805
+ },
+ {
+ "epoch": 27.462857142857143,
+ "grad_norm": 33.77734375,
+ "learning_rate": 2.5041269841269842e-05,
+ "loss": 0.2111,
+ "step": 4806
+ },
+ {
+ "epoch": 27.46857142857143,
+ "grad_norm": 31.39830780029297,
+ "learning_rate": 2.503492063492064e-05,
+ "loss": 0.2874,
+ "step": 4807
+ },
+ {
+ "epoch": 27.474285714285713,
+ "grad_norm": 55.34870529174805,
+ "learning_rate": 2.5028571428571428e-05,
+ "loss": 0.2791,
+ "step": 4808
+ },
+ {
+ "epoch": 27.48,
+ "grad_norm": 239.04185485839844,
+ "learning_rate": 2.5022222222222224e-05,
+ "loss": 0.3815,
+ "step": 4809
+ },
+ {
+ "epoch": 27.485714285714284,
+ "grad_norm": 43.9395637512207,
+ "learning_rate": 2.5015873015873016e-05,
+ "loss": 0.2651,
+ "step": 4810
+ },
+ {
+ "epoch": 27.49142857142857,
+ "grad_norm": 74.31682586669922,
+ "learning_rate": 2.500952380952381e-05,
+ "loss": 0.3032,
+ "step": 4811
+ },
+ {
+ "epoch": 27.497142857142858,
+ "grad_norm": 49.19131851196289,
+ "learning_rate": 2.5003174603174605e-05,
+ "loss": 0.3221,
+ "step": 4812
+ },
+ {
+ "epoch": 27.502857142857142,
+ "grad_norm": 30.429489135742188,
+ "learning_rate": 2.4996825396825398e-05,
+ "loss": 0.2028,
+ "step": 4813
+ },
+ {
+ "epoch": 27.50857142857143,
+ "grad_norm": 235.2591094970703,
+ "learning_rate": 2.4990476190476194e-05,
+ "loss": 0.2108,
+ "step": 4814
+ },
+ {
+ "epoch": 27.514285714285712,
+ "grad_norm": 46.36361312866211,
+ "learning_rate": 2.4984126984126986e-05,
+ "loss": 0.4535,
+ "step": 4815
+ },
+ {
+ "epoch": 27.52,
+ "grad_norm": 46.341285705566406,
+ "learning_rate": 2.497777777777778e-05,
+ "loss": 0.2184,
+ "step": 4816
+ },
+ {
+ "epoch": 27.525714285714287,
+ "grad_norm": 31.94393539428711,
+ "learning_rate": 2.4971428571428575e-05,
+ "loss": 0.2327,
+ "step": 4817
+ },
+ {
+ "epoch": 27.53142857142857,
+ "grad_norm": 39.17048645019531,
+ "learning_rate": 2.4965079365079367e-05,
+ "loss": 0.3632,
+ "step": 4818
+ },
+ {
+ "epoch": 27.537142857142857,
+ "grad_norm": 22.573932647705078,
+ "learning_rate": 2.495873015873016e-05,
+ "loss": 0.2425,
+ "step": 4819
+ },
+ {
+ "epoch": 27.542857142857144,
+ "grad_norm": 172.38751220703125,
+ "learning_rate": 2.4952380952380956e-05,
+ "loss": 0.2537,
+ "step": 4820
+ },
+ {
+ "epoch": 27.548571428571428,
+ "grad_norm": 55.52573013305664,
+ "learning_rate": 2.494603174603175e-05,
+ "loss": 0.3031,
+ "step": 4821
+ },
+ {
+ "epoch": 27.554285714285715,
+ "grad_norm": 48.40682601928711,
+ "learning_rate": 2.493968253968254e-05,
+ "loss": 0.2625,
+ "step": 4822
+ },
+ {
+ "epoch": 27.56,
+ "grad_norm": 30.03643035888672,
+ "learning_rate": 2.4933333333333334e-05,
+ "loss": 0.3392,
+ "step": 4823
+ },
+ {
+ "epoch": 27.565714285714286,
+ "grad_norm": 49.23202896118164,
+ "learning_rate": 2.492698412698413e-05,
+ "loss": 0.3327,
+ "step": 4824
+ },
+ {
+ "epoch": 27.571428571428573,
+ "grad_norm": 36.13849639892578,
+ "learning_rate": 2.4920634920634923e-05,
+ "loss": 0.2898,
+ "step": 4825
+ },
+ {
+ "epoch": 27.577142857142857,
+ "grad_norm": 46.84010314941406,
+ "learning_rate": 2.4914285714285715e-05,
+ "loss": 0.3819,
+ "step": 4826
+ },
+ {
+ "epoch": 27.582857142857144,
+ "grad_norm": 82.81731414794922,
+ "learning_rate": 2.4907936507936508e-05,
+ "loss": 0.2455,
+ "step": 4827
+ },
+ {
+ "epoch": 27.588571428571427,
+ "grad_norm": 50.32649612426758,
+ "learning_rate": 2.4901587301587304e-05,
+ "loss": 0.4102,
+ "step": 4828
+ },
+ {
+ "epoch": 27.594285714285714,
+ "grad_norm": 53.72205352783203,
+ "learning_rate": 2.4895238095238097e-05,
+ "loss": 0.2229,
+ "step": 4829
+ },
+ {
+ "epoch": 27.6,
+ "grad_norm": 23.845911026000977,
+ "learning_rate": 2.488888888888889e-05,
+ "loss": 0.2732,
+ "step": 4830
+ },
+ {
+ "epoch": 27.605714285714285,
+ "grad_norm": 40.13404846191406,
+ "learning_rate": 2.4882539682539685e-05,
+ "loss": 0.1945,
+ "step": 4831
+ },
+ {
+ "epoch": 27.611428571428572,
+ "grad_norm": 22.85449981689453,
+ "learning_rate": 2.4876190476190478e-05,
+ "loss": 0.2219,
+ "step": 4832
+ },
+ {
+ "epoch": 27.617142857142856,
+ "grad_norm": 20.945064544677734,
+ "learning_rate": 2.486984126984127e-05,
+ "loss": 0.2263,
+ "step": 4833
+ },
+ {
+ "epoch": 27.622857142857143,
+ "grad_norm": 173.3744354248047,
+ "learning_rate": 2.4863492063492063e-05,
+ "loss": 0.248,
+ "step": 4834
+ },
+ {
+ "epoch": 27.62857142857143,
+ "grad_norm": 36.34626007080078,
+ "learning_rate": 2.485714285714286e-05,
+ "loss": 0.5044,
+ "step": 4835
+ },
+ {
+ "epoch": 27.634285714285713,
+ "grad_norm": 89.77456665039062,
+ "learning_rate": 2.4850793650793652e-05,
+ "loss": 0.2702,
+ "step": 4836
+ },
+ {
+ "epoch": 27.64,
+ "grad_norm": 72.74186706542969,
+ "learning_rate": 2.4844444444444444e-05,
+ "loss": 0.2642,
+ "step": 4837
+ },
+ {
+ "epoch": 27.645714285714284,
+ "grad_norm": 48.650089263916016,
+ "learning_rate": 2.4838095238095237e-05,
+ "loss": 0.253,
+ "step": 4838
+ },
+ {
+ "epoch": 27.65142857142857,
+ "grad_norm": 43.67725372314453,
+ "learning_rate": 2.4831746031746033e-05,
+ "loss": 0.2165,
+ "step": 4839
+ },
+ {
+ "epoch": 27.65714285714286,
+ "grad_norm": 20.58785629272461,
+ "learning_rate": 2.4825396825396826e-05,
+ "loss": 0.2718,
+ "step": 4840
+ },
+ {
+ "epoch": 27.662857142857142,
+ "grad_norm": 45.65858840942383,
+ "learning_rate": 2.481904761904762e-05,
+ "loss": 0.2948,
+ "step": 4841
+ },
+ {
+ "epoch": 27.66857142857143,
+ "grad_norm": 74.41572570800781,
+ "learning_rate": 2.4812698412698414e-05,
+ "loss": 0.3391,
+ "step": 4842
+ },
+ {
+ "epoch": 27.674285714285713,
+ "grad_norm": 38.63113021850586,
+ "learning_rate": 2.4806349206349207e-05,
+ "loss": 0.3389,
+ "step": 4843
+ },
+ {
+ "epoch": 27.68,
+ "grad_norm": 76.9261245727539,
+ "learning_rate": 2.48e-05,
+ "loss": 0.3061,
+ "step": 4844
+ },
+ {
+ "epoch": 27.685714285714287,
+ "grad_norm": 84.7331771850586,
+ "learning_rate": 2.4793650793650792e-05,
+ "loss": 0.2617,
+ "step": 4845
+ },
+ {
+ "epoch": 27.69142857142857,
+ "grad_norm": 17.409889221191406,
+ "learning_rate": 2.478730158730159e-05,
+ "loss": 0.2637,
+ "step": 4846
+ },
+ {
+ "epoch": 27.697142857142858,
+ "grad_norm": 88.08624267578125,
+ "learning_rate": 2.478095238095238e-05,
+ "loss": 0.3734,
+ "step": 4847
+ },
+ {
+ "epoch": 27.70285714285714,
+ "grad_norm": 56.591800689697266,
+ "learning_rate": 2.4774603174603177e-05,
+ "loss": 0.2685,
+ "step": 4848
+ },
+ {
+ "epoch": 27.708571428571428,
+ "grad_norm": 31.55167579650879,
+ "learning_rate": 2.476825396825397e-05,
+ "loss": 0.2221,
+ "step": 4849
+ },
+ {
+ "epoch": 27.714285714285715,
+ "grad_norm": 40.55548095703125,
+ "learning_rate": 2.4761904761904762e-05,
+ "loss": 0.2963,
+ "step": 4850
+ },
+ {
+ "epoch": 27.72,
+ "grad_norm": 99.02445220947266,
+ "learning_rate": 2.475555555555556e-05,
+ "loss": 0.2521,
+ "step": 4851
+ },
+ {
+ "epoch": 27.725714285714286,
+ "grad_norm": 246.45144653320312,
+ "learning_rate": 2.474920634920635e-05,
+ "loss": 0.2875,
+ "step": 4852
+ },
+ {
+ "epoch": 27.731428571428573,
+ "grad_norm": 39.322391510009766,
+ "learning_rate": 2.4742857142857147e-05,
+ "loss": 0.3158,
+ "step": 4853
+ },
+ {
+ "epoch": 27.737142857142857,
+ "grad_norm": 44.78469467163086,
+ "learning_rate": 2.473650793650794e-05,
+ "loss": 0.3035,
+ "step": 4854
+ },
+ {
+ "epoch": 27.742857142857144,
+ "grad_norm": 349.8539733886719,
+ "learning_rate": 2.4730158730158732e-05,
+ "loss": 0.3249,
+ "step": 4855
+ },
+ {
+ "epoch": 27.748571428571427,
+ "grad_norm": 51.817283630371094,
+ "learning_rate": 2.4723809523809525e-05,
+ "loss": 0.3091,
+ "step": 4856
+ },
+ {
+ "epoch": 27.754285714285714,
+ "grad_norm": 18.263761520385742,
+ "learning_rate": 2.471746031746032e-05,
+ "loss": 0.2652,
+ "step": 4857
+ },
+ {
+ "epoch": 27.76,
+ "grad_norm": 94.6199722290039,
+ "learning_rate": 2.4711111111111114e-05,
+ "loss": 0.2096,
+ "step": 4858
+ },
+ {
+ "epoch": 27.765714285714285,
+ "grad_norm": 621.2186889648438,
+ "learning_rate": 2.4704761904761906e-05,
+ "loss": 0.3711,
+ "step": 4859
+ },
+ {
+ "epoch": 27.771428571428572,
+ "grad_norm": 162.09405517578125,
+ "learning_rate": 2.46984126984127e-05,
+ "loss": 0.4606,
+ "step": 4860
+ },
+ {
+ "epoch": 27.777142857142856,
+ "grad_norm": 92.1196060180664,
+ "learning_rate": 2.4692063492063495e-05,
+ "loss": 0.2604,
+ "step": 4861
+ },
+ {
+ "epoch": 27.782857142857143,
+ "grad_norm": 52.80637741088867,
+ "learning_rate": 2.4685714285714288e-05,
+ "loss": 0.2786,
+ "step": 4862
+ },
+ {
+ "epoch": 27.78857142857143,
+ "grad_norm": 59.52845001220703,
+ "learning_rate": 2.467936507936508e-05,
+ "loss": 0.2649,
+ "step": 4863
+ },
+ {
+ "epoch": 27.794285714285714,
+ "grad_norm": 19.578981399536133,
+ "learning_rate": 2.4673015873015876e-05,
+ "loss": 0.2346,
+ "step": 4864
+ },
+ {
+ "epoch": 27.8,
+ "grad_norm": 35.54555892944336,
+ "learning_rate": 2.466666666666667e-05,
+ "loss": 0.4155,
+ "step": 4865
+ },
+ {
+ "epoch": 27.805714285714284,
+ "grad_norm": 93.74425506591797,
+ "learning_rate": 2.466031746031746e-05,
+ "loss": 0.3929,
+ "step": 4866
+ },
+ {
+ "epoch": 27.81142857142857,
+ "grad_norm": 34.482975006103516,
+ "learning_rate": 2.4653968253968254e-05,
+ "loss": 0.2819,
+ "step": 4867
+ },
+ {
+ "epoch": 27.81714285714286,
+ "grad_norm": 30.5424747467041,
+ "learning_rate": 2.464761904761905e-05,
+ "loss": 0.2795,
+ "step": 4868
+ },
+ {
+ "epoch": 27.822857142857142,
+ "grad_norm": 32.831085205078125,
+ "learning_rate": 2.4641269841269843e-05,
+ "loss": 0.2125,
+ "step": 4869
+ },
+ {
+ "epoch": 27.82857142857143,
+ "grad_norm": 95.80289459228516,
+ "learning_rate": 2.4634920634920635e-05,
+ "loss": 0.3558,
+ "step": 4870
+ },
+ {
+ "epoch": 27.834285714285713,
+ "grad_norm": 97.45088958740234,
+ "learning_rate": 2.4628571428571428e-05,
+ "loss": 0.327,
+ "step": 4871
+ },
+ {
+ "epoch": 27.84,
+ "grad_norm": 27.350614547729492,
+ "learning_rate": 2.4622222222222224e-05,
+ "loss": 0.3236,
+ "step": 4872
+ },
+ {
+ "epoch": 27.845714285714287,
+ "grad_norm": 52.39337158203125,
+ "learning_rate": 2.4615873015873017e-05,
+ "loss": 0.376,
+ "step": 4873
+ },
+ {
+ "epoch": 27.85142857142857,
+ "grad_norm": 27.236066818237305,
+ "learning_rate": 2.460952380952381e-05,
+ "loss": 0.5718,
+ "step": 4874
+ },
+ {
+ "epoch": 27.857142857142858,
+ "grad_norm": 34.752994537353516,
+ "learning_rate": 2.4603174603174602e-05,
+ "loss": 0.2736,
+ "step": 4875
+ },
+ {
+ "epoch": 27.86285714285714,
+ "grad_norm": 23.44194793701172,
+ "learning_rate": 2.4596825396825398e-05,
+ "loss": 0.3231,
+ "step": 4876
+ },
+ {
+ "epoch": 27.86857142857143,
+ "grad_norm": 23.610010147094727,
+ "learning_rate": 2.459047619047619e-05,
+ "loss": 0.2362,
+ "step": 4877
+ },
+ {
+ "epoch": 27.874285714285715,
+ "grad_norm": 18.690441131591797,
+ "learning_rate": 2.4584126984126983e-05,
+ "loss": 0.2454,
+ "step": 4878
+ },
+ {
+ "epoch": 27.88,
+ "grad_norm": 14.173318862915039,
+ "learning_rate": 2.457777777777778e-05,
+ "loss": 0.2281,
+ "step": 4879
+ },
+ {
+ "epoch": 27.885714285714286,
+ "grad_norm": 55.35274887084961,
+ "learning_rate": 2.4571428571428572e-05,
+ "loss": 0.4128,
+ "step": 4880
+ },
+ {
+ "epoch": 27.89142857142857,
+ "grad_norm": 25.49809455871582,
+ "learning_rate": 2.4565079365079365e-05,
+ "loss": 0.2281,
+ "step": 4881
+ },
+ {
+ "epoch": 27.897142857142857,
+ "grad_norm": 32.44559097290039,
+ "learning_rate": 2.4558730158730157e-05,
+ "loss": 0.3497,
+ "step": 4882
+ },
+ {
+ "epoch": 27.902857142857144,
+ "grad_norm": 68.36529541015625,
+ "learning_rate": 2.4552380952380953e-05,
+ "loss": 0.3238,
+ "step": 4883
+ },
+ {
+ "epoch": 27.908571428571427,
+ "grad_norm": 58.09840774536133,
+ "learning_rate": 2.4546031746031746e-05,
+ "loss": 0.4048,
+ "step": 4884
+ },
+ {
+ "epoch": 27.914285714285715,
+ "grad_norm": 98.97122955322266,
+ "learning_rate": 2.4539682539682542e-05,
+ "loss": 0.279,
+ "step": 4885
+ },
+ {
+ "epoch": 27.92,
+ "grad_norm": 35.524574279785156,
+ "learning_rate": 2.4533333333333334e-05,
+ "loss": 0.2158,
+ "step": 4886
+ },
+ {
+ "epoch": 27.925714285714285,
+ "grad_norm": 86.26341247558594,
+ "learning_rate": 2.452698412698413e-05,
+ "loss": 0.3683,
+ "step": 4887
+ },
+ {
+ "epoch": 27.931428571428572,
+ "grad_norm": 73.19541931152344,
+ "learning_rate": 2.4520634920634923e-05,
+ "loss": 0.4275,
+ "step": 4888
+ },
+ {
+ "epoch": 27.937142857142856,
+ "grad_norm": 113.84541320800781,
+ "learning_rate": 2.4514285714285716e-05,
+ "loss": 0.2033,
+ "step": 4889
+ },
+ {
+ "epoch": 27.942857142857143,
+ "grad_norm": 37.84712600708008,
+ "learning_rate": 2.4507936507936512e-05,
+ "loss": 0.2811,
+ "step": 4890
+ },
+ {
+ "epoch": 27.94857142857143,
+ "grad_norm": 35.4206428527832,
+ "learning_rate": 2.4501587301587304e-05,
+ "loss": 0.2498,
+ "step": 4891
+ },
+ {
+ "epoch": 27.954285714285714,
+ "grad_norm": 56.23537826538086,
+ "learning_rate": 2.4495238095238097e-05,
+ "loss": 0.2603,
+ "step": 4892
+ },
+ {
+ "epoch": 27.96,
+ "grad_norm": 92.81861114501953,
+ "learning_rate": 2.448888888888889e-05,
+ "loss": 0.304,
+ "step": 4893
+ },
+ {
+ "epoch": 27.965714285714284,
+ "grad_norm": 48.27447509765625,
+ "learning_rate": 2.4482539682539686e-05,
+ "loss": 0.284,
+ "step": 4894
+ },
+ {
+ "epoch": 27.97142857142857,
+ "grad_norm": 49.307498931884766,
+ "learning_rate": 2.447619047619048e-05,
+ "loss": 0.296,
+ "step": 4895
+ },
+ {
+ "epoch": 27.97714285714286,
+ "grad_norm": 52.156822204589844,
+ "learning_rate": 2.446984126984127e-05,
+ "loss": 0.283,
+ "step": 4896
+ },
+ {
+ "epoch": 27.982857142857142,
+ "grad_norm": 37.40717315673828,
+ "learning_rate": 2.4463492063492064e-05,
+ "loss": 0.3139,
+ "step": 4897
+ },
+ {
+ "epoch": 27.98857142857143,
+ "grad_norm": 29.659826278686523,
+ "learning_rate": 2.445714285714286e-05,
+ "loss": 0.2711,
+ "step": 4898
+ },
+ {
+ "epoch": 27.994285714285713,
+ "grad_norm": 84.78431701660156,
+ "learning_rate": 2.4450793650793652e-05,
+ "loss": 0.2837,
+ "step": 4899
+ },
+ {
+ "epoch": 28.0,
+ "grad_norm": 43.500694274902344,
+ "learning_rate": 2.4444444444444445e-05,
+ "loss": 0.2073,
+ "step": 4900
+ },
+ {
+ "epoch": 28.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6610374450683594,
+ "eval_map": 0.9033,
+ "eval_map_50": 0.9511,
+ "eval_map_75": 0.9406,
+ "eval_map_large": 0.9038,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9033,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7727,
+ "eval_mar_10": 0.9635,
+ "eval_mar_100": 0.9676,
+ "eval_mar_100_per_class": 0.9676,
+ "eval_mar_large": 0.9676,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 14.5982,
+ "eval_samples_per_second": 20.139,
+ "eval_steps_per_second": 2.535,
+ "step": 4900
+ },
+ {
+ "epoch": 28.005714285714287,
+ "grad_norm": 25.908740997314453,
+ "learning_rate": 2.443809523809524e-05,
+ "loss": 0.2257,
+ "step": 4901
+ },
+ {
+ "epoch": 28.01142857142857,
+ "grad_norm": 45.944671630859375,
+ "learning_rate": 2.4431746031746034e-05,
+ "loss": 0.302,
+ "step": 4902
+ },
+ {
+ "epoch": 28.017142857142858,
+ "grad_norm": 33.82231903076172,
+ "learning_rate": 2.4425396825396826e-05,
+ "loss": 0.2747,
+ "step": 4903
+ },
+ {
+ "epoch": 28.02285714285714,
+ "grad_norm": 40.96854019165039,
+ "learning_rate": 2.441904761904762e-05,
+ "loss": 0.3091,
+ "step": 4904
+ },
+ {
+ "epoch": 28.02857142857143,
+ "grad_norm": 66.51483154296875,
+ "learning_rate": 2.4412698412698415e-05,
+ "loss": 0.3371,
+ "step": 4905
+ },
+ {
+ "epoch": 28.034285714285716,
+ "grad_norm": 73.85417175292969,
+ "learning_rate": 2.4406349206349208e-05,
+ "loss": 0.339,
+ "step": 4906
+ },
+ {
+ "epoch": 28.04,
+ "grad_norm": 92.99559783935547,
+ "learning_rate": 2.44e-05,
+ "loss": 0.4866,
+ "step": 4907
+ },
+ {
+ "epoch": 28.045714285714286,
+ "grad_norm": 90.50003814697266,
+ "learning_rate": 2.4393650793650793e-05,
+ "loss": 0.2672,
+ "step": 4908
+ },
+ {
+ "epoch": 28.05142857142857,
+ "grad_norm": 245.05653381347656,
+ "learning_rate": 2.438730158730159e-05,
+ "loss": 0.2963,
+ "step": 4909
+ },
+ {
+ "epoch": 28.057142857142857,
+ "grad_norm": 45.6023063659668,
+ "learning_rate": 2.438095238095238e-05,
+ "loss": 0.2084,
+ "step": 4910
+ },
+ {
+ "epoch": 28.062857142857144,
+ "grad_norm": 36.97957992553711,
+ "learning_rate": 2.4374603174603174e-05,
+ "loss": 0.3108,
+ "step": 4911
+ },
+ {
+ "epoch": 28.068571428571428,
+ "grad_norm": 62.7725830078125,
+ "learning_rate": 2.436825396825397e-05,
+ "loss": 0.3325,
+ "step": 4912
+ },
+ {
+ "epoch": 28.074285714285715,
+ "grad_norm": 163.5262908935547,
+ "learning_rate": 2.4361904761904763e-05,
+ "loss": 0.185,
+ "step": 4913
+ },
+ {
+ "epoch": 28.08,
+ "grad_norm": 46.41025161743164,
+ "learning_rate": 2.4355555555555555e-05,
+ "loss": 0.3088,
+ "step": 4914
+ },
+ {
+ "epoch": 28.085714285714285,
+ "grad_norm": 131.6428680419922,
+ "learning_rate": 2.4349206349206348e-05,
+ "loss": 0.2327,
+ "step": 4915
+ },
+ {
+ "epoch": 28.091428571428573,
+ "grad_norm": 67.80531311035156,
+ "learning_rate": 2.4342857142857144e-05,
+ "loss": 0.4335,
+ "step": 4916
+ },
+ {
+ "epoch": 28.097142857142856,
+ "grad_norm": 174.15245056152344,
+ "learning_rate": 2.4336507936507937e-05,
+ "loss": 0.2031,
+ "step": 4917
+ },
+ {
+ "epoch": 28.102857142857143,
+ "grad_norm": 39.03656768798828,
+ "learning_rate": 2.433015873015873e-05,
+ "loss": 0.1855,
+ "step": 4918
+ },
+ {
+ "epoch": 28.10857142857143,
+ "grad_norm": 265.5483703613281,
+ "learning_rate": 2.4323809523809525e-05,
+ "loss": 0.2976,
+ "step": 4919
+ },
+ {
+ "epoch": 28.114285714285714,
+ "grad_norm": 35.574119567871094,
+ "learning_rate": 2.4317460317460318e-05,
+ "loss": 0.2286,
+ "step": 4920
+ },
+ {
+ "epoch": 28.12,
+ "grad_norm": 25.896102905273438,
+ "learning_rate": 2.431111111111111e-05,
+ "loss": 0.2065,
+ "step": 4921
+ },
+ {
+ "epoch": 28.125714285714285,
+ "grad_norm": 60.17288589477539,
+ "learning_rate": 2.4304761904761907e-05,
+ "loss": 0.233,
+ "step": 4922
+ },
+ {
+ "epoch": 28.13142857142857,
+ "grad_norm": 61.1347541809082,
+ "learning_rate": 2.42984126984127e-05,
+ "loss": 0.2688,
+ "step": 4923
+ },
+ {
+ "epoch": 28.13714285714286,
+ "grad_norm": 24.536046981811523,
+ "learning_rate": 2.4292063492063495e-05,
+ "loss": 0.2074,
+ "step": 4924
+ },
+ {
+ "epoch": 28.142857142857142,
+ "grad_norm": 48.766456604003906,
+ "learning_rate": 2.4285714285714288e-05,
+ "loss": 0.1893,
+ "step": 4925
+ },
+ {
+ "epoch": 28.14857142857143,
+ "grad_norm": 77.26349639892578,
+ "learning_rate": 2.427936507936508e-05,
+ "loss": 0.2512,
+ "step": 4926
+ },
+ {
+ "epoch": 28.154285714285713,
+ "grad_norm": 37.16670608520508,
+ "learning_rate": 2.4273015873015877e-05,
+ "loss": 0.248,
+ "step": 4927
+ },
+ {
+ "epoch": 28.16,
+ "grad_norm": 34.46220016479492,
+ "learning_rate": 2.426666666666667e-05,
+ "loss": 0.2669,
+ "step": 4928
+ },
+ {
+ "epoch": 28.165714285714287,
+ "grad_norm": 367.3511962890625,
+ "learning_rate": 2.4260317460317462e-05,
+ "loss": 0.2103,
+ "step": 4929
+ },
+ {
+ "epoch": 28.17142857142857,
+ "grad_norm": 38.58537292480469,
+ "learning_rate": 2.4253968253968255e-05,
+ "loss": 0.2382,
+ "step": 4930
+ },
+ {
+ "epoch": 28.177142857142858,
+ "grad_norm": 58.70418167114258,
+ "learning_rate": 2.424761904761905e-05,
+ "loss": 0.329,
+ "step": 4931
+ },
+ {
+ "epoch": 28.18285714285714,
+ "grad_norm": 561.0505981445312,
+ "learning_rate": 2.4241269841269843e-05,
+ "loss": 0.3132,
+ "step": 4932
+ },
+ {
+ "epoch": 28.18857142857143,
+ "grad_norm": 12.026835441589355,
+ "learning_rate": 2.4234920634920636e-05,
+ "loss": 0.2756,
+ "step": 4933
+ },
+ {
+ "epoch": 28.194285714285716,
+ "grad_norm": 80.02447509765625,
+ "learning_rate": 2.4228571428571432e-05,
+ "loss": 0.2718,
+ "step": 4934
+ },
+ {
+ "epoch": 28.2,
+ "grad_norm": 71.53707885742188,
+ "learning_rate": 2.4222222222222224e-05,
+ "loss": 0.3472,
+ "step": 4935
+ },
+ {
+ "epoch": 28.205714285714286,
+ "grad_norm": 41.7224235534668,
+ "learning_rate": 2.4215873015873017e-05,
+ "loss": 0.3375,
+ "step": 4936
+ },
+ {
+ "epoch": 28.21142857142857,
+ "grad_norm": 33.52546310424805,
+ "learning_rate": 2.420952380952381e-05,
+ "loss": 0.227,
+ "step": 4937
+ },
+ {
+ "epoch": 28.217142857142857,
+ "grad_norm": 52.87554168701172,
+ "learning_rate": 2.4203174603174606e-05,
+ "loss": 0.3054,
+ "step": 4938
+ },
+ {
+ "epoch": 28.222857142857144,
+ "grad_norm": 21.825029373168945,
+ "learning_rate": 2.41968253968254e-05,
+ "loss": 0.3198,
+ "step": 4939
+ },
+ {
+ "epoch": 28.228571428571428,
+ "grad_norm": 53.98521041870117,
+ "learning_rate": 2.419047619047619e-05,
+ "loss": 0.2784,
+ "step": 4940
+ },
+ {
+ "epoch": 28.234285714285715,
+ "grad_norm": 47.195152282714844,
+ "learning_rate": 2.4184126984126984e-05,
+ "loss": 0.2392,
+ "step": 4941
+ },
+ {
+ "epoch": 28.24,
+ "grad_norm": 46.09539031982422,
+ "learning_rate": 2.417777777777778e-05,
+ "loss": 0.4016,
+ "step": 4942
+ },
+ {
+ "epoch": 28.245714285714286,
+ "grad_norm": 32.95344543457031,
+ "learning_rate": 2.4171428571428572e-05,
+ "loss": 0.2238,
+ "step": 4943
+ },
+ {
+ "epoch": 28.251428571428573,
+ "grad_norm": 44.7630500793457,
+ "learning_rate": 2.4165079365079365e-05,
+ "loss": 0.2587,
+ "step": 4944
+ },
+ {
+ "epoch": 28.257142857142856,
+ "grad_norm": 28.06485939025879,
+ "learning_rate": 2.415873015873016e-05,
+ "loss": 0.2784,
+ "step": 4945
+ },
+ {
+ "epoch": 28.262857142857143,
+ "grad_norm": 24.325355529785156,
+ "learning_rate": 2.4152380952380954e-05,
+ "loss": 0.3475,
+ "step": 4946
+ },
+ {
+ "epoch": 28.268571428571427,
+ "grad_norm": 75.55956268310547,
+ "learning_rate": 2.4146031746031746e-05,
+ "loss": 0.3422,
+ "step": 4947
+ },
+ {
+ "epoch": 28.274285714285714,
+ "grad_norm": 50.63954162597656,
+ "learning_rate": 2.413968253968254e-05,
+ "loss": 0.3834,
+ "step": 4948
+ },
+ {
+ "epoch": 28.28,
+ "grad_norm": 62.6453857421875,
+ "learning_rate": 2.4133333333333335e-05,
+ "loss": 0.2165,
+ "step": 4949
+ },
+ {
+ "epoch": 28.285714285714285,
+ "grad_norm": 24.08328628540039,
+ "learning_rate": 2.4126984126984128e-05,
+ "loss": 0.2537,
+ "step": 4950
+ },
+ {
+ "epoch": 28.291428571428572,
+ "grad_norm": 69.02284240722656,
+ "learning_rate": 2.412063492063492e-05,
+ "loss": 0.3462,
+ "step": 4951
+ },
+ {
+ "epoch": 28.29714285714286,
+ "grad_norm": 51.43291473388672,
+ "learning_rate": 2.4114285714285713e-05,
+ "loss": 0.2596,
+ "step": 4952
+ },
+ {
+ "epoch": 28.302857142857142,
+ "grad_norm": 30.58786964416504,
+ "learning_rate": 2.410793650793651e-05,
+ "loss": 0.3577,
+ "step": 4953
+ },
+ {
+ "epoch": 28.30857142857143,
+ "grad_norm": 278.1788330078125,
+ "learning_rate": 2.41015873015873e-05,
+ "loss": 0.249,
+ "step": 4954
+ },
+ {
+ "epoch": 28.314285714285713,
+ "grad_norm": 100.6025390625,
+ "learning_rate": 2.4095238095238094e-05,
+ "loss": 0.389,
+ "step": 4955
+ },
+ {
+ "epoch": 28.32,
+ "grad_norm": 37.11484146118164,
+ "learning_rate": 2.408888888888889e-05,
+ "loss": 0.2978,
+ "step": 4956
+ },
+ {
+ "epoch": 28.325714285714287,
+ "grad_norm": 30.713354110717773,
+ "learning_rate": 2.4082539682539683e-05,
+ "loss": 0.2905,
+ "step": 4957
+ },
+ {
+ "epoch": 28.33142857142857,
+ "grad_norm": 41.22504425048828,
+ "learning_rate": 2.407619047619048e-05,
+ "loss": 0.3737,
+ "step": 4958
+ },
+ {
+ "epoch": 28.337142857142858,
+ "grad_norm": 33.79350280761719,
+ "learning_rate": 2.406984126984127e-05,
+ "loss": 0.3313,
+ "step": 4959
+ },
+ {
+ "epoch": 28.34285714285714,
+ "grad_norm": 29.72348403930664,
+ "learning_rate": 2.4063492063492064e-05,
+ "loss": 0.3027,
+ "step": 4960
+ },
+ {
+ "epoch": 28.34857142857143,
+ "grad_norm": 81.20671081542969,
+ "learning_rate": 2.405714285714286e-05,
+ "loss": 0.2993,
+ "step": 4961
+ },
+ {
+ "epoch": 28.354285714285716,
+ "grad_norm": 53.03126525878906,
+ "learning_rate": 2.4050793650793653e-05,
+ "loss": 0.3504,
+ "step": 4962
+ },
+ {
+ "epoch": 28.36,
+ "grad_norm": 36.0410041809082,
+ "learning_rate": 2.4044444444444445e-05,
+ "loss": 0.231,
+ "step": 4963
+ },
+ {
+ "epoch": 28.365714285714287,
+ "grad_norm": 44.50819396972656,
+ "learning_rate": 2.403809523809524e-05,
+ "loss": 0.2163,
+ "step": 4964
+ },
+ {
+ "epoch": 28.37142857142857,
+ "grad_norm": 49.101707458496094,
+ "learning_rate": 2.4031746031746034e-05,
+ "loss": 0.2293,
+ "step": 4965
+ },
+ {
+ "epoch": 28.377142857142857,
+ "grad_norm": 36.19911575317383,
+ "learning_rate": 2.4025396825396827e-05,
+ "loss": 0.2537,
+ "step": 4966
+ },
+ {
+ "epoch": 28.382857142857144,
+ "grad_norm": 87.96337890625,
+ "learning_rate": 2.4019047619047623e-05,
+ "loss": 0.2773,
+ "step": 4967
+ },
+ {
+ "epoch": 28.388571428571428,
+ "grad_norm": 32.72904586791992,
+ "learning_rate": 2.4012698412698415e-05,
+ "loss": 0.3971,
+ "step": 4968
+ },
+ {
+ "epoch": 28.394285714285715,
+ "grad_norm": 36.359352111816406,
+ "learning_rate": 2.4006349206349208e-05,
+ "loss": 0.2817,
+ "step": 4969
+ },
+ {
+ "epoch": 28.4,
+ "grad_norm": 36.13983917236328,
+ "learning_rate": 2.4e-05,
+ "loss": 0.235,
+ "step": 4970
+ },
+ {
+ "epoch": 28.405714285714286,
+ "grad_norm": 32.429683685302734,
+ "learning_rate": 2.3993650793650797e-05,
+ "loss": 0.2048,
+ "step": 4971
+ },
+ {
+ "epoch": 28.411428571428573,
+ "grad_norm": 285.6448974609375,
+ "learning_rate": 2.398730158730159e-05,
+ "loss": 0.275,
+ "step": 4972
+ },
+ {
+ "epoch": 28.417142857142856,
+ "grad_norm": 24.912200927734375,
+ "learning_rate": 2.3980952380952382e-05,
+ "loss": 0.3147,
+ "step": 4973
+ },
+ {
+ "epoch": 28.422857142857143,
+ "grad_norm": 42.56909942626953,
+ "learning_rate": 2.3974603174603175e-05,
+ "loss": 0.2346,
+ "step": 4974
+ },
+ {
+ "epoch": 28.428571428571427,
+ "grad_norm": 22.605735778808594,
+ "learning_rate": 2.396825396825397e-05,
+ "loss": 0.3097,
+ "step": 4975
+ },
+ {
+ "epoch": 28.434285714285714,
+ "grad_norm": 29.61728286743164,
+ "learning_rate": 2.3961904761904763e-05,
+ "loss": 0.2481,
+ "step": 4976
+ },
+ {
+ "epoch": 28.44,
+ "grad_norm": 81.6629638671875,
+ "learning_rate": 2.3955555555555556e-05,
+ "loss": 0.2466,
+ "step": 4977
+ },
+ {
+ "epoch": 28.445714285714285,
+ "grad_norm": 54.739471435546875,
+ "learning_rate": 2.394920634920635e-05,
+ "loss": 0.3339,
+ "step": 4978
+ },
+ {
+ "epoch": 28.451428571428572,
+ "grad_norm": 41.38652420043945,
+ "learning_rate": 2.3942857142857144e-05,
+ "loss": 0.2028,
+ "step": 4979
+ },
+ {
+ "epoch": 28.457142857142856,
+ "grad_norm": 33.170894622802734,
+ "learning_rate": 2.3936507936507937e-05,
+ "loss": 0.2143,
+ "step": 4980
+ },
+ {
+ "epoch": 28.462857142857143,
+ "grad_norm": 39.60905456542969,
+ "learning_rate": 2.393015873015873e-05,
+ "loss": 0.2176,
+ "step": 4981
+ },
+ {
+ "epoch": 28.46857142857143,
+ "grad_norm": 36.18077850341797,
+ "learning_rate": 2.3923809523809526e-05,
+ "loss": 0.402,
+ "step": 4982
+ },
+ {
+ "epoch": 28.474285714285713,
+ "grad_norm": 29.43824005126953,
+ "learning_rate": 2.391746031746032e-05,
+ "loss": 0.2801,
+ "step": 4983
+ },
+ {
+ "epoch": 28.48,
+ "grad_norm": 109.75287628173828,
+ "learning_rate": 2.391111111111111e-05,
+ "loss": 0.2771,
+ "step": 4984
+ },
+ {
+ "epoch": 28.485714285714284,
+ "grad_norm": 51.55800247192383,
+ "learning_rate": 2.3904761904761904e-05,
+ "loss": 0.2601,
+ "step": 4985
+ },
+ {
+ "epoch": 28.49142857142857,
+ "grad_norm": 46.78274154663086,
+ "learning_rate": 2.38984126984127e-05,
+ "loss": 0.2106,
+ "step": 4986
+ },
+ {
+ "epoch": 28.497142857142858,
+ "grad_norm": 81.14583587646484,
+ "learning_rate": 2.3892063492063492e-05,
+ "loss": 0.2288,
+ "step": 4987
+ },
+ {
+ "epoch": 28.502857142857142,
+ "grad_norm": 421.8680114746094,
+ "learning_rate": 2.3885714285714285e-05,
+ "loss": 0.3071,
+ "step": 4988
+ },
+ {
+ "epoch": 28.50857142857143,
+ "grad_norm": 43.99879455566406,
+ "learning_rate": 2.3879365079365078e-05,
+ "loss": 0.2079,
+ "step": 4989
+ },
+ {
+ "epoch": 28.514285714285712,
+ "grad_norm": 50.826026916503906,
+ "learning_rate": 2.3873015873015874e-05,
+ "loss": 0.2747,
+ "step": 4990
+ },
+ {
+ "epoch": 28.52,
+ "grad_norm": 129.66259765625,
+ "learning_rate": 2.3866666666666666e-05,
+ "loss": 0.3396,
+ "step": 4991
+ },
+ {
+ "epoch": 28.525714285714287,
+ "grad_norm": 36.57542037963867,
+ "learning_rate": 2.3860317460317462e-05,
+ "loss": 0.2043,
+ "step": 4992
+ },
+ {
+ "epoch": 28.53142857142857,
+ "grad_norm": 26.30794334411621,
+ "learning_rate": 2.3853968253968255e-05,
+ "loss": 0.1988,
+ "step": 4993
+ },
+ {
+ "epoch": 28.537142857142857,
+ "grad_norm": 851.5133056640625,
+ "learning_rate": 2.3847619047619048e-05,
+ "loss": 0.2133,
+ "step": 4994
+ },
+ {
+ "epoch": 28.542857142857144,
+ "grad_norm": 84.00727844238281,
+ "learning_rate": 2.3841269841269844e-05,
+ "loss": 0.3999,
+ "step": 4995
+ },
+ {
+ "epoch": 28.548571428571428,
+ "grad_norm": 49.521583557128906,
+ "learning_rate": 2.3834920634920636e-05,
+ "loss": 0.2687,
+ "step": 4996
+ },
+ {
+ "epoch": 28.554285714285715,
+ "grad_norm": 38.404232025146484,
+ "learning_rate": 2.3828571428571432e-05,
+ "loss": 0.3111,
+ "step": 4997
+ },
+ {
+ "epoch": 28.56,
+ "grad_norm": 81.65354919433594,
+ "learning_rate": 2.3822222222222225e-05,
+ "loss": 0.3128,
+ "step": 4998
+ },
+ {
+ "epoch": 28.565714285714286,
+ "grad_norm": 139.422607421875,
+ "learning_rate": 2.3815873015873018e-05,
+ "loss": 0.3024,
+ "step": 4999
+ },
+ {
+ "epoch": 28.571428571428573,
+ "grad_norm": 64.09517669677734,
+ "learning_rate": 2.380952380952381e-05,
+ "loss": 0.2531,
+ "step": 5000
+ },
+ {
+ "epoch": 28.577142857142857,
+ "grad_norm": 725.398681640625,
+ "learning_rate": 2.3803174603174606e-05,
+ "loss": 0.3155,
+ "step": 5001
+ },
+ {
+ "epoch": 28.582857142857144,
+ "grad_norm": 62.54762268066406,
+ "learning_rate": 2.37968253968254e-05,
+ "loss": 0.2865,
+ "step": 5002
+ },
+ {
+ "epoch": 28.588571428571427,
+ "grad_norm": 52.35954284667969,
+ "learning_rate": 2.379047619047619e-05,
+ "loss": 0.1661,
+ "step": 5003
+ },
+ {
+ "epoch": 28.594285714285714,
+ "grad_norm": 21.684301376342773,
+ "learning_rate": 2.3784126984126988e-05,
+ "loss": 0.1934,
+ "step": 5004
+ },
+ {
+ "epoch": 28.6,
+ "grad_norm": 68.8770751953125,
+ "learning_rate": 2.377777777777778e-05,
+ "loss": 0.2781,
+ "step": 5005
+ },
+ {
+ "epoch": 28.605714285714285,
+ "grad_norm": 55.42689514160156,
+ "learning_rate": 2.3771428571428573e-05,
+ "loss": 0.1953,
+ "step": 5006
+ },
+ {
+ "epoch": 28.611428571428572,
+ "grad_norm": 60.84716796875,
+ "learning_rate": 2.3765079365079365e-05,
+ "loss": 0.2181,
+ "step": 5007
+ },
+ {
+ "epoch": 28.617142857142856,
+ "grad_norm": 39.095890045166016,
+ "learning_rate": 2.375873015873016e-05,
+ "loss": 0.2337,
+ "step": 5008
+ },
+ {
+ "epoch": 28.622857142857143,
+ "grad_norm": 43.01543426513672,
+ "learning_rate": 2.3752380952380954e-05,
+ "loss": 0.304,
+ "step": 5009
+ },
+ {
+ "epoch": 28.62857142857143,
+ "grad_norm": 45.74905776977539,
+ "learning_rate": 2.3746031746031747e-05,
+ "loss": 0.2976,
+ "step": 5010
+ },
+ {
+ "epoch": 28.634285714285713,
+ "grad_norm": 34.543521881103516,
+ "learning_rate": 2.373968253968254e-05,
+ "loss": 0.2022,
+ "step": 5011
+ },
+ {
+ "epoch": 28.64,
+ "grad_norm": 35.3597526550293,
+ "learning_rate": 2.3733333333333335e-05,
+ "loss": 0.4038,
+ "step": 5012
+ },
+ {
+ "epoch": 28.645714285714284,
+ "grad_norm": 133.12742614746094,
+ "learning_rate": 2.3726984126984128e-05,
+ "loss": 0.3324,
+ "step": 5013
+ },
+ {
+ "epoch": 28.65142857142857,
+ "grad_norm": 49.119483947753906,
+ "learning_rate": 2.372063492063492e-05,
+ "loss": 0.2856,
+ "step": 5014
+ },
+ {
+ "epoch": 28.65714285714286,
+ "grad_norm": 34.71833038330078,
+ "learning_rate": 2.3714285714285717e-05,
+ "loss": 0.1847,
+ "step": 5015
+ },
+ {
+ "epoch": 28.662857142857142,
+ "grad_norm": 32.97146987915039,
+ "learning_rate": 2.370793650793651e-05,
+ "loss": 0.235,
+ "step": 5016
+ },
+ {
+ "epoch": 28.66857142857143,
+ "grad_norm": 304.8446960449219,
+ "learning_rate": 2.3701587301587302e-05,
+ "loss": 0.2851,
+ "step": 5017
+ },
+ {
+ "epoch": 28.674285714285713,
+ "grad_norm": 37.80881881713867,
+ "learning_rate": 2.3695238095238095e-05,
+ "loss": 0.2828,
+ "step": 5018
+ },
+ {
+ "epoch": 28.68,
+ "grad_norm": 64.08672332763672,
+ "learning_rate": 2.368888888888889e-05,
+ "loss": 0.2789,
+ "step": 5019
+ },
+ {
+ "epoch": 28.685714285714287,
+ "grad_norm": 47.080753326416016,
+ "learning_rate": 2.3682539682539683e-05,
+ "loss": 0.3367,
+ "step": 5020
+ },
+ {
+ "epoch": 28.69142857142857,
+ "grad_norm": 248.08181762695312,
+ "learning_rate": 2.3676190476190476e-05,
+ "loss": 0.369,
+ "step": 5021
+ },
+ {
+ "epoch": 28.697142857142858,
+ "grad_norm": 58.87960433959961,
+ "learning_rate": 2.366984126984127e-05,
+ "loss": 0.3662,
+ "step": 5022
+ },
+ {
+ "epoch": 28.70285714285714,
+ "grad_norm": 69.46220397949219,
+ "learning_rate": 2.3663492063492065e-05,
+ "loss": 0.2592,
+ "step": 5023
+ },
+ {
+ "epoch": 28.708571428571428,
+ "grad_norm": 30.98453140258789,
+ "learning_rate": 2.3657142857142857e-05,
+ "loss": 0.2933,
+ "step": 5024
+ },
+ {
+ "epoch": 28.714285714285715,
+ "grad_norm": 54.25831604003906,
+ "learning_rate": 2.365079365079365e-05,
+ "loss": 0.3409,
+ "step": 5025
+ },
+ {
+ "epoch": 28.72,
+ "grad_norm": 41.06281280517578,
+ "learning_rate": 2.3644444444444446e-05,
+ "loss": 0.2852,
+ "step": 5026
+ },
+ {
+ "epoch": 28.725714285714286,
+ "grad_norm": 17.57217788696289,
+ "learning_rate": 2.363809523809524e-05,
+ "loss": 0.2236,
+ "step": 5027
+ },
+ {
+ "epoch": 28.731428571428573,
+ "grad_norm": 57.584651947021484,
+ "learning_rate": 2.363174603174603e-05,
+ "loss": 0.2076,
+ "step": 5028
+ },
+ {
+ "epoch": 28.737142857142857,
+ "grad_norm": 36.87367248535156,
+ "learning_rate": 2.3625396825396827e-05,
+ "loss": 0.2198,
+ "step": 5029
+ },
+ {
+ "epoch": 28.742857142857144,
+ "grad_norm": 134.8672332763672,
+ "learning_rate": 2.361904761904762e-05,
+ "loss": 0.3409,
+ "step": 5030
+ },
+ {
+ "epoch": 28.748571428571427,
+ "grad_norm": 102.65425872802734,
+ "learning_rate": 2.3612698412698416e-05,
+ "loss": 0.2485,
+ "step": 5031
+ },
+ {
+ "epoch": 28.754285714285714,
+ "grad_norm": 43.4425163269043,
+ "learning_rate": 2.360634920634921e-05,
+ "loss": 0.2658,
+ "step": 5032
+ },
+ {
+ "epoch": 28.76,
+ "grad_norm": 72.87287902832031,
+ "learning_rate": 2.36e-05,
+ "loss": 0.307,
+ "step": 5033
+ },
+ {
+ "epoch": 28.765714285714285,
+ "grad_norm": 70.9742431640625,
+ "learning_rate": 2.3593650793650797e-05,
+ "loss": 0.2784,
+ "step": 5034
+ },
+ {
+ "epoch": 28.771428571428572,
+ "grad_norm": 21.395336151123047,
+ "learning_rate": 2.358730158730159e-05,
+ "loss": 0.2039,
+ "step": 5035
+ },
+ {
+ "epoch": 28.777142857142856,
+ "grad_norm": 38.85607147216797,
+ "learning_rate": 2.3580952380952382e-05,
+ "loss": 0.2805,
+ "step": 5036
+ },
+ {
+ "epoch": 28.782857142857143,
+ "grad_norm": 35.8581657409668,
+ "learning_rate": 2.357460317460318e-05,
+ "loss": 0.3868,
+ "step": 5037
+ },
+ {
+ "epoch": 28.78857142857143,
+ "grad_norm": 36.439613342285156,
+ "learning_rate": 2.356825396825397e-05,
+ "loss": 0.2175,
+ "step": 5038
+ },
+ {
+ "epoch": 28.794285714285714,
+ "grad_norm": 40.9402961730957,
+ "learning_rate": 2.3561904761904764e-05,
+ "loss": 0.2621,
+ "step": 5039
+ },
+ {
+ "epoch": 28.8,
+ "grad_norm": 31.58989715576172,
+ "learning_rate": 2.3555555555555556e-05,
+ "loss": 0.2656,
+ "step": 5040
+ },
+ {
+ "epoch": 28.805714285714284,
+ "grad_norm": 28.273897171020508,
+ "learning_rate": 2.3549206349206352e-05,
+ "loss": 0.3005,
+ "step": 5041
+ },
+ {
+ "epoch": 28.81142857142857,
+ "grad_norm": 31.848737716674805,
+ "learning_rate": 2.3542857142857145e-05,
+ "loss": 0.2608,
+ "step": 5042
+ },
+ {
+ "epoch": 28.81714285714286,
+ "grad_norm": 26.318984985351562,
+ "learning_rate": 2.3536507936507938e-05,
+ "loss": 0.2807,
+ "step": 5043
+ },
+ {
+ "epoch": 28.822857142857142,
+ "grad_norm": 27.732038497924805,
+ "learning_rate": 2.353015873015873e-05,
+ "loss": 0.3175,
+ "step": 5044
+ },
+ {
+ "epoch": 28.82857142857143,
+ "grad_norm": 159.66786193847656,
+ "learning_rate": 2.3523809523809526e-05,
+ "loss": 0.2746,
+ "step": 5045
+ },
+ {
+ "epoch": 28.834285714285713,
+ "grad_norm": 30.27684783935547,
+ "learning_rate": 2.351746031746032e-05,
+ "loss": 0.31,
+ "step": 5046
+ },
+ {
+ "epoch": 28.84,
+ "grad_norm": 179.499755859375,
+ "learning_rate": 2.351111111111111e-05,
+ "loss": 0.2647,
+ "step": 5047
+ },
+ {
+ "epoch": 28.845714285714287,
+ "grad_norm": 37.019981384277344,
+ "learning_rate": 2.3504761904761908e-05,
+ "loss": 0.2639,
+ "step": 5048
+ },
+ {
+ "epoch": 28.85142857142857,
+ "grad_norm": 25.458894729614258,
+ "learning_rate": 2.34984126984127e-05,
+ "loss": 0.2163,
+ "step": 5049
+ },
+ {
+ "epoch": 28.857142857142858,
+ "grad_norm": 28.230152130126953,
+ "learning_rate": 2.3492063492063493e-05,
+ "loss": 0.2093,
+ "step": 5050
+ },
+ {
+ "epoch": 28.86285714285714,
+ "grad_norm": 60.67851257324219,
+ "learning_rate": 2.3485714285714285e-05,
+ "loss": 0.3026,
+ "step": 5051
+ },
+ {
+ "epoch": 28.86857142857143,
+ "grad_norm": 17.1420955657959,
+ "learning_rate": 2.347936507936508e-05,
+ "loss": 0.2783,
+ "step": 5052
+ },
+ {
+ "epoch": 28.874285714285715,
+ "grad_norm": 23.672748565673828,
+ "learning_rate": 2.3473015873015874e-05,
+ "loss": 0.2812,
+ "step": 5053
+ },
+ {
+ "epoch": 28.88,
+ "grad_norm": 41.617652893066406,
+ "learning_rate": 2.3466666666666667e-05,
+ "loss": 0.2867,
+ "step": 5054
+ },
+ {
+ "epoch": 28.885714285714286,
+ "grad_norm": 256.7421875,
+ "learning_rate": 2.346031746031746e-05,
+ "loss": 0.3205,
+ "step": 5055
+ },
+ {
+ "epoch": 28.89142857142857,
+ "grad_norm": 70.80400085449219,
+ "learning_rate": 2.3453968253968255e-05,
+ "loss": 0.3938,
+ "step": 5056
+ },
+ {
+ "epoch": 28.897142857142857,
+ "grad_norm": 35.83601379394531,
+ "learning_rate": 2.3447619047619048e-05,
+ "loss": 0.215,
+ "step": 5057
+ },
+ {
+ "epoch": 28.902857142857144,
+ "grad_norm": 64.71305084228516,
+ "learning_rate": 2.344126984126984e-05,
+ "loss": 0.2704,
+ "step": 5058
+ },
+ {
+ "epoch": 28.908571428571427,
+ "grad_norm": 2356.586181640625,
+ "learning_rate": 2.3434920634920637e-05,
+ "loss": 0.3223,
+ "step": 5059
+ },
+ {
+ "epoch": 28.914285714285715,
+ "grad_norm": 122.84352111816406,
+ "learning_rate": 2.342857142857143e-05,
+ "loss": 0.2599,
+ "step": 5060
+ },
+ {
+ "epoch": 28.92,
+ "grad_norm": 47.86798858642578,
+ "learning_rate": 2.3422222222222222e-05,
+ "loss": 0.4071,
+ "step": 5061
+ },
+ {
+ "epoch": 28.925714285714285,
+ "grad_norm": 75.26738739013672,
+ "learning_rate": 2.3415873015873015e-05,
+ "loss": 0.2682,
+ "step": 5062
+ },
+ {
+ "epoch": 28.931428571428572,
+ "grad_norm": 46.6579704284668,
+ "learning_rate": 2.340952380952381e-05,
+ "loss": 0.3293,
+ "step": 5063
+ },
+ {
+ "epoch": 28.937142857142856,
+ "grad_norm": 56.65286636352539,
+ "learning_rate": 2.3403174603174603e-05,
+ "loss": 0.2709,
+ "step": 5064
+ },
+ {
+ "epoch": 28.942857142857143,
+ "grad_norm": 42.67585372924805,
+ "learning_rate": 2.3396825396825396e-05,
+ "loss": 0.2689,
+ "step": 5065
+ },
+ {
+ "epoch": 28.94857142857143,
+ "grad_norm": 74.5224838256836,
+ "learning_rate": 2.3390476190476192e-05,
+ "loss": 0.3419,
+ "step": 5066
+ },
+ {
+ "epoch": 28.954285714285714,
+ "grad_norm": 54.574241638183594,
+ "learning_rate": 2.3384126984126985e-05,
+ "loss": 0.2683,
+ "step": 5067
+ },
+ {
+ "epoch": 28.96,
+ "grad_norm": 56.5332145690918,
+ "learning_rate": 2.337777777777778e-05,
+ "loss": 0.3041,
+ "step": 5068
+ },
+ {
+ "epoch": 28.965714285714284,
+ "grad_norm": 26.72911262512207,
+ "learning_rate": 2.3371428571428573e-05,
+ "loss": 0.259,
+ "step": 5069
+ },
+ {
+ "epoch": 28.97142857142857,
+ "grad_norm": 55.12527084350586,
+ "learning_rate": 2.336507936507937e-05,
+ "loss": 0.2944,
+ "step": 5070
+ },
+ {
+ "epoch": 28.97714285714286,
+ "grad_norm": 1067.4193115234375,
+ "learning_rate": 2.3358730158730162e-05,
+ "loss": 0.2764,
+ "step": 5071
+ },
+ {
+ "epoch": 28.982857142857142,
+ "grad_norm": 32.64695739746094,
+ "learning_rate": 2.3352380952380955e-05,
+ "loss": 0.2097,
+ "step": 5072
+ },
+ {
+ "epoch": 28.98857142857143,
+ "grad_norm": 53.39537048339844,
+ "learning_rate": 2.3346031746031747e-05,
+ "loss": 0.1862,
+ "step": 5073
+ },
+ {
+ "epoch": 28.994285714285713,
+ "grad_norm": 46.71490478515625,
+ "learning_rate": 2.3339682539682543e-05,
+ "loss": 0.2427,
+ "step": 5074
+ },
+ {
+ "epoch": 29.0,
+ "grad_norm": 23.4241886138916,
+ "learning_rate": 2.3333333333333336e-05,
+ "loss": 0.2403,
+ "step": 5075
+ },
+ {
+ "epoch": 29.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6370882987976074,
+ "eval_map": 0.9197,
+ "eval_map_50": 0.96,
+ "eval_map_75": 0.9496,
+ "eval_map_large": 0.9198,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9197,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7803,
+ "eval_mar_10": 0.9714,
+ "eval_mar_100": 0.9737,
+ "eval_mar_100_per_class": 0.9737,
+ "eval_mar_large": 0.9737,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.2092,
+ "eval_samples_per_second": 22.257,
+ "eval_steps_per_second": 2.801,
+ "step": 5075
+ },
+ {
+ "epoch": 29.005714285714287,
+ "grad_norm": 62.46238327026367,
+ "learning_rate": 2.332698412698413e-05,
+ "loss": 0.2222,
+ "step": 5076
+ },
+ {
+ "epoch": 29.01142857142857,
+ "grad_norm": 28.127099990844727,
+ "learning_rate": 2.332063492063492e-05,
+ "loss": 0.2054,
+ "step": 5077
+ },
+ {
+ "epoch": 29.017142857142858,
+ "grad_norm": 21.029979705810547,
+ "learning_rate": 2.3314285714285717e-05,
+ "loss": 0.2697,
+ "step": 5078
+ },
+ {
+ "epoch": 29.02285714285714,
+ "grad_norm": 34.36430740356445,
+ "learning_rate": 2.330793650793651e-05,
+ "loss": 0.3093,
+ "step": 5079
+ },
+ {
+ "epoch": 29.02857142857143,
+ "grad_norm": 53.51395034790039,
+ "learning_rate": 2.3301587301587302e-05,
+ "loss": 0.2443,
+ "step": 5080
+ },
+ {
+ "epoch": 29.034285714285716,
+ "grad_norm": 153.634521484375,
+ "learning_rate": 2.3295238095238095e-05,
+ "loss": 0.2986,
+ "step": 5081
+ },
+ {
+ "epoch": 29.04,
+ "grad_norm": 27.302949905395508,
+ "learning_rate": 2.328888888888889e-05,
+ "loss": 0.178,
+ "step": 5082
+ },
+ {
+ "epoch": 29.045714285714286,
+ "grad_norm": 99.04785919189453,
+ "learning_rate": 2.3282539682539684e-05,
+ "loss": 0.3069,
+ "step": 5083
+ },
+ {
+ "epoch": 29.05142857142857,
+ "grad_norm": 26.07029914855957,
+ "learning_rate": 2.3276190476190476e-05,
+ "loss": 0.1895,
+ "step": 5084
+ },
+ {
+ "epoch": 29.057142857142857,
+ "grad_norm": 41.6248664855957,
+ "learning_rate": 2.3269841269841272e-05,
+ "loss": 0.5042,
+ "step": 5085
+ },
+ {
+ "epoch": 29.062857142857144,
+ "grad_norm": 36.2380485534668,
+ "learning_rate": 2.3263492063492065e-05,
+ "loss": 0.1864,
+ "step": 5086
+ },
+ {
+ "epoch": 29.068571428571428,
+ "grad_norm": 164.3536376953125,
+ "learning_rate": 2.3257142857142858e-05,
+ "loss": 0.1958,
+ "step": 5087
+ },
+ {
+ "epoch": 29.074285714285715,
+ "grad_norm": 51.7596549987793,
+ "learning_rate": 2.325079365079365e-05,
+ "loss": 0.2627,
+ "step": 5088
+ },
+ {
+ "epoch": 29.08,
+ "grad_norm": 48.146697998046875,
+ "learning_rate": 2.3244444444444446e-05,
+ "loss": 0.2056,
+ "step": 5089
+ },
+ {
+ "epoch": 29.085714285714285,
+ "grad_norm": 104.46647644042969,
+ "learning_rate": 2.323809523809524e-05,
+ "loss": 0.2248,
+ "step": 5090
+ },
+ {
+ "epoch": 29.091428571428573,
+ "grad_norm": 54.64583206176758,
+ "learning_rate": 2.323174603174603e-05,
+ "loss": 0.2388,
+ "step": 5091
+ },
+ {
+ "epoch": 29.097142857142856,
+ "grad_norm": 41.416770935058594,
+ "learning_rate": 2.3225396825396824e-05,
+ "loss": 0.2371,
+ "step": 5092
+ },
+ {
+ "epoch": 29.102857142857143,
+ "grad_norm": 49.938232421875,
+ "learning_rate": 2.321904761904762e-05,
+ "loss": 0.2131,
+ "step": 5093
+ },
+ {
+ "epoch": 29.10857142857143,
+ "grad_norm": 49.01100158691406,
+ "learning_rate": 2.3212698412698413e-05,
+ "loss": 0.252,
+ "step": 5094
+ },
+ {
+ "epoch": 29.114285714285714,
+ "grad_norm": 50.47200012207031,
+ "learning_rate": 2.3206349206349205e-05,
+ "loss": 0.2117,
+ "step": 5095
+ },
+ {
+ "epoch": 29.12,
+ "grad_norm": 28.55891227722168,
+ "learning_rate": 2.32e-05,
+ "loss": 0.2452,
+ "step": 5096
+ },
+ {
+ "epoch": 29.125714285714285,
+ "grad_norm": 63.32829666137695,
+ "learning_rate": 2.3193650793650794e-05,
+ "loss": 0.2514,
+ "step": 5097
+ },
+ {
+ "epoch": 29.13142857142857,
+ "grad_norm": 48.11371994018555,
+ "learning_rate": 2.3187301587301587e-05,
+ "loss": 0.1662,
+ "step": 5098
+ },
+ {
+ "epoch": 29.13714285714286,
+ "grad_norm": 71.61554718017578,
+ "learning_rate": 2.318095238095238e-05,
+ "loss": 0.2542,
+ "step": 5099
+ },
+ {
+ "epoch": 29.142857142857142,
+ "grad_norm": 40.359100341796875,
+ "learning_rate": 2.3174603174603175e-05,
+ "loss": 0.2401,
+ "step": 5100
+ },
+ {
+ "epoch": 29.14857142857143,
+ "grad_norm": 65.17253112792969,
+ "learning_rate": 2.3168253968253968e-05,
+ "loss": 0.2548,
+ "step": 5101
+ },
+ {
+ "epoch": 29.154285714285713,
+ "grad_norm": 28.41598129272461,
+ "learning_rate": 2.3161904761904764e-05,
+ "loss": 0.3215,
+ "step": 5102
+ },
+ {
+ "epoch": 29.16,
+ "grad_norm": 52.9326286315918,
+ "learning_rate": 2.3155555555555557e-05,
+ "loss": 0.2229,
+ "step": 5103
+ },
+ {
+ "epoch": 29.165714285714287,
+ "grad_norm": 53.673404693603516,
+ "learning_rate": 2.314920634920635e-05,
+ "loss": 0.2258,
+ "step": 5104
+ },
+ {
+ "epoch": 29.17142857142857,
+ "grad_norm": 73.0558090209961,
+ "learning_rate": 2.3142857142857145e-05,
+ "loss": 0.2291,
+ "step": 5105
+ },
+ {
+ "epoch": 29.177142857142858,
+ "grad_norm": 91.16373443603516,
+ "learning_rate": 2.3136507936507938e-05,
+ "loss": 0.249,
+ "step": 5106
+ },
+ {
+ "epoch": 29.18285714285714,
+ "grad_norm": 50.14714813232422,
+ "learning_rate": 2.3130158730158734e-05,
+ "loss": 0.2104,
+ "step": 5107
+ },
+ {
+ "epoch": 29.18857142857143,
+ "grad_norm": 26.575700759887695,
+ "learning_rate": 2.3123809523809527e-05,
+ "loss": 0.1729,
+ "step": 5108
+ },
+ {
+ "epoch": 29.194285714285716,
+ "grad_norm": 20.4130916595459,
+ "learning_rate": 2.311746031746032e-05,
+ "loss": 0.2506,
+ "step": 5109
+ },
+ {
+ "epoch": 29.2,
+ "grad_norm": 50.21626663208008,
+ "learning_rate": 2.3111111111111112e-05,
+ "loss": 0.2938,
+ "step": 5110
+ },
+ {
+ "epoch": 29.205714285714286,
+ "grad_norm": 76.35237121582031,
+ "learning_rate": 2.3104761904761908e-05,
+ "loss": 0.1744,
+ "step": 5111
+ },
+ {
+ "epoch": 29.21142857142857,
+ "grad_norm": 56.98535919189453,
+ "learning_rate": 2.30984126984127e-05,
+ "loss": 0.308,
+ "step": 5112
+ },
+ {
+ "epoch": 29.217142857142857,
+ "grad_norm": 25.252614974975586,
+ "learning_rate": 2.3092063492063493e-05,
+ "loss": 0.2666,
+ "step": 5113
+ },
+ {
+ "epoch": 29.222857142857144,
+ "grad_norm": 92.06158447265625,
+ "learning_rate": 2.3085714285714286e-05,
+ "loss": 0.2411,
+ "step": 5114
+ },
+ {
+ "epoch": 29.228571428571428,
+ "grad_norm": 34.30237579345703,
+ "learning_rate": 2.3079365079365082e-05,
+ "loss": 0.2165,
+ "step": 5115
+ },
+ {
+ "epoch": 29.234285714285715,
+ "grad_norm": 46.90911865234375,
+ "learning_rate": 2.3073015873015875e-05,
+ "loss": 0.2352,
+ "step": 5116
+ },
+ {
+ "epoch": 29.24,
+ "grad_norm": 51.809452056884766,
+ "learning_rate": 2.3066666666666667e-05,
+ "loss": 0.1986,
+ "step": 5117
+ },
+ {
+ "epoch": 29.245714285714286,
+ "grad_norm": 65.37468719482422,
+ "learning_rate": 2.3060317460317463e-05,
+ "loss": 0.3478,
+ "step": 5118
+ },
+ {
+ "epoch": 29.251428571428573,
+ "grad_norm": 64.15426635742188,
+ "learning_rate": 2.3053968253968256e-05,
+ "loss": 0.1796,
+ "step": 5119
+ },
+ {
+ "epoch": 29.257142857142856,
+ "grad_norm": 28.12679100036621,
+ "learning_rate": 2.304761904761905e-05,
+ "loss": 0.2485,
+ "step": 5120
+ },
+ {
+ "epoch": 29.262857142857143,
+ "grad_norm": 66.80747985839844,
+ "learning_rate": 2.304126984126984e-05,
+ "loss": 0.4543,
+ "step": 5121
+ },
+ {
+ "epoch": 29.268571428571427,
+ "grad_norm": 271.1167297363281,
+ "learning_rate": 2.3034920634920637e-05,
+ "loss": 0.3339,
+ "step": 5122
+ },
+ {
+ "epoch": 29.274285714285714,
+ "grad_norm": 40.10055160522461,
+ "learning_rate": 2.302857142857143e-05,
+ "loss": 0.2452,
+ "step": 5123
+ },
+ {
+ "epoch": 29.28,
+ "grad_norm": 84.37879180908203,
+ "learning_rate": 2.3022222222222222e-05,
+ "loss": 0.3137,
+ "step": 5124
+ },
+ {
+ "epoch": 29.285714285714285,
+ "grad_norm": 73.4004898071289,
+ "learning_rate": 2.3015873015873015e-05,
+ "loss": 0.4139,
+ "step": 5125
+ },
+ {
+ "epoch": 29.291428571428572,
+ "grad_norm": 53.298362731933594,
+ "learning_rate": 2.300952380952381e-05,
+ "loss": 0.4182,
+ "step": 5126
+ },
+ {
+ "epoch": 29.29714285714286,
+ "grad_norm": 46.19538879394531,
+ "learning_rate": 2.3003174603174604e-05,
+ "loss": 0.2312,
+ "step": 5127
+ },
+ {
+ "epoch": 29.302857142857142,
+ "grad_norm": 27.84966278076172,
+ "learning_rate": 2.2996825396825396e-05,
+ "loss": 0.295,
+ "step": 5128
+ },
+ {
+ "epoch": 29.30857142857143,
+ "grad_norm": 29.424104690551758,
+ "learning_rate": 2.2990476190476192e-05,
+ "loss": 0.1956,
+ "step": 5129
+ },
+ {
+ "epoch": 29.314285714285713,
+ "grad_norm": 52.50281524658203,
+ "learning_rate": 2.2984126984126985e-05,
+ "loss": 0.2747,
+ "step": 5130
+ },
+ {
+ "epoch": 29.32,
+ "grad_norm": 22.580923080444336,
+ "learning_rate": 2.2977777777777778e-05,
+ "loss": 0.2007,
+ "step": 5131
+ },
+ {
+ "epoch": 29.325714285714287,
+ "grad_norm": 71.85227966308594,
+ "learning_rate": 2.297142857142857e-05,
+ "loss": 0.2285,
+ "step": 5132
+ },
+ {
+ "epoch": 29.33142857142857,
+ "grad_norm": 32.25730514526367,
+ "learning_rate": 2.2965079365079366e-05,
+ "loss": 0.2697,
+ "step": 5133
+ },
+ {
+ "epoch": 29.337142857142858,
+ "grad_norm": 73.0450210571289,
+ "learning_rate": 2.295873015873016e-05,
+ "loss": 0.2458,
+ "step": 5134
+ },
+ {
+ "epoch": 29.34285714285714,
+ "grad_norm": 29.31700325012207,
+ "learning_rate": 2.295238095238095e-05,
+ "loss": 0.2876,
+ "step": 5135
+ },
+ {
+ "epoch": 29.34857142857143,
+ "grad_norm": 47.174705505371094,
+ "learning_rate": 2.2946031746031744e-05,
+ "loss": 0.2977,
+ "step": 5136
+ },
+ {
+ "epoch": 29.354285714285716,
+ "grad_norm": 46.58511734008789,
+ "learning_rate": 2.293968253968254e-05,
+ "loss": 0.211,
+ "step": 5137
+ },
+ {
+ "epoch": 29.36,
+ "grad_norm": 625.805908203125,
+ "learning_rate": 2.2933333333333333e-05,
+ "loss": 0.3245,
+ "step": 5138
+ },
+ {
+ "epoch": 29.365714285714287,
+ "grad_norm": 286.7671813964844,
+ "learning_rate": 2.292698412698413e-05,
+ "loss": 0.2697,
+ "step": 5139
+ },
+ {
+ "epoch": 29.37142857142857,
+ "grad_norm": 64.6842041015625,
+ "learning_rate": 2.292063492063492e-05,
+ "loss": 0.402,
+ "step": 5140
+ },
+ {
+ "epoch": 29.377142857142857,
+ "grad_norm": 17.133848190307617,
+ "learning_rate": 2.2914285714285718e-05,
+ "loss": 0.2594,
+ "step": 5141
+ },
+ {
+ "epoch": 29.382857142857144,
+ "grad_norm": 22.38144302368164,
+ "learning_rate": 2.290793650793651e-05,
+ "loss": 0.3667,
+ "step": 5142
+ },
+ {
+ "epoch": 29.388571428571428,
+ "grad_norm": 1273.5078125,
+ "learning_rate": 2.2901587301587303e-05,
+ "loss": 0.3604,
+ "step": 5143
+ },
+ {
+ "epoch": 29.394285714285715,
+ "grad_norm": 188.72727966308594,
+ "learning_rate": 2.28952380952381e-05,
+ "loss": 0.2807,
+ "step": 5144
+ },
+ {
+ "epoch": 29.4,
+ "grad_norm": 29.680089950561523,
+ "learning_rate": 2.288888888888889e-05,
+ "loss": 0.2622,
+ "step": 5145
+ },
+ {
+ "epoch": 29.405714285714286,
+ "grad_norm": 87.98640441894531,
+ "learning_rate": 2.2882539682539684e-05,
+ "loss": 0.3601,
+ "step": 5146
+ },
+ {
+ "epoch": 29.411428571428573,
+ "grad_norm": 108.64775085449219,
+ "learning_rate": 2.2876190476190477e-05,
+ "loss": 0.4437,
+ "step": 5147
+ },
+ {
+ "epoch": 29.417142857142856,
+ "grad_norm": 25.7600040435791,
+ "learning_rate": 2.2869841269841273e-05,
+ "loss": 0.241,
+ "step": 5148
+ },
+ {
+ "epoch": 29.422857142857143,
+ "grad_norm": 23.64268684387207,
+ "learning_rate": 2.2863492063492065e-05,
+ "loss": 0.2116,
+ "step": 5149
+ },
+ {
+ "epoch": 29.428571428571427,
+ "grad_norm": 426.9701232910156,
+ "learning_rate": 2.2857142857142858e-05,
+ "loss": 0.2912,
+ "step": 5150
+ },
+ {
+ "epoch": 29.434285714285714,
+ "grad_norm": 51.756980895996094,
+ "learning_rate": 2.2850793650793654e-05,
+ "loss": 0.2115,
+ "step": 5151
+ },
+ {
+ "epoch": 29.44,
+ "grad_norm": 26.336313247680664,
+ "learning_rate": 2.2844444444444447e-05,
+ "loss": 0.1632,
+ "step": 5152
+ },
+ {
+ "epoch": 29.445714285714285,
+ "grad_norm": 91.51924896240234,
+ "learning_rate": 2.283809523809524e-05,
+ "loss": 0.2748,
+ "step": 5153
+ },
+ {
+ "epoch": 29.451428571428572,
+ "grad_norm": 60.591732025146484,
+ "learning_rate": 2.2831746031746032e-05,
+ "loss": 0.2213,
+ "step": 5154
+ },
+ {
+ "epoch": 29.457142857142856,
+ "grad_norm": 83.90245819091797,
+ "learning_rate": 2.2825396825396828e-05,
+ "loss": 0.2334,
+ "step": 5155
+ },
+ {
+ "epoch": 29.462857142857143,
+ "grad_norm": 51.06547927856445,
+ "learning_rate": 2.281904761904762e-05,
+ "loss": 0.2837,
+ "step": 5156
+ },
+ {
+ "epoch": 29.46857142857143,
+ "grad_norm": 66.79014587402344,
+ "learning_rate": 2.2812698412698413e-05,
+ "loss": 0.3387,
+ "step": 5157
+ },
+ {
+ "epoch": 29.474285714285713,
+ "grad_norm": 36.439510345458984,
+ "learning_rate": 2.2806349206349206e-05,
+ "loss": 0.2813,
+ "step": 5158
+ },
+ {
+ "epoch": 29.48,
+ "grad_norm": 51.553646087646484,
+ "learning_rate": 2.2800000000000002e-05,
+ "loss": 0.3421,
+ "step": 5159
+ },
+ {
+ "epoch": 29.485714285714284,
+ "grad_norm": 33.91830825805664,
+ "learning_rate": 2.2793650793650795e-05,
+ "loss": 0.2587,
+ "step": 5160
+ },
+ {
+ "epoch": 29.49142857142857,
+ "grad_norm": 47.36806106567383,
+ "learning_rate": 2.2787301587301587e-05,
+ "loss": 0.1622,
+ "step": 5161
+ },
+ {
+ "epoch": 29.497142857142858,
+ "grad_norm": 63.804405212402344,
+ "learning_rate": 2.2780952380952383e-05,
+ "loss": 0.2065,
+ "step": 5162
+ },
+ {
+ "epoch": 29.502857142857142,
+ "grad_norm": 147.74241638183594,
+ "learning_rate": 2.2774603174603176e-05,
+ "loss": 0.2365,
+ "step": 5163
+ },
+ {
+ "epoch": 29.50857142857143,
+ "grad_norm": 33.29631423950195,
+ "learning_rate": 2.276825396825397e-05,
+ "loss": 0.2639,
+ "step": 5164
+ },
+ {
+ "epoch": 29.514285714285712,
+ "grad_norm": 43.6353759765625,
+ "learning_rate": 2.276190476190476e-05,
+ "loss": 0.2283,
+ "step": 5165
+ },
+ {
+ "epoch": 29.52,
+ "grad_norm": 96.89825439453125,
+ "learning_rate": 2.2755555555555557e-05,
+ "loss": 0.2651,
+ "step": 5166
+ },
+ {
+ "epoch": 29.525714285714287,
+ "grad_norm": 72.50611114501953,
+ "learning_rate": 2.274920634920635e-05,
+ "loss": 0.2528,
+ "step": 5167
+ },
+ {
+ "epoch": 29.53142857142857,
+ "grad_norm": 40.47755813598633,
+ "learning_rate": 2.2742857142857142e-05,
+ "loss": 0.2446,
+ "step": 5168
+ },
+ {
+ "epoch": 29.537142857142857,
+ "grad_norm": 35.787445068359375,
+ "learning_rate": 2.2736507936507935e-05,
+ "loss": 0.3317,
+ "step": 5169
+ },
+ {
+ "epoch": 29.542857142857144,
+ "grad_norm": 62.62869644165039,
+ "learning_rate": 2.273015873015873e-05,
+ "loss": 0.2892,
+ "step": 5170
+ },
+ {
+ "epoch": 29.548571428571428,
+ "grad_norm": 65.2412338256836,
+ "learning_rate": 2.2723809523809524e-05,
+ "loss": 0.3481,
+ "step": 5171
+ },
+ {
+ "epoch": 29.554285714285715,
+ "grad_norm": 37.17427062988281,
+ "learning_rate": 2.2717460317460316e-05,
+ "loss": 0.3309,
+ "step": 5172
+ },
+ {
+ "epoch": 29.56,
+ "grad_norm": 34.58698654174805,
+ "learning_rate": 2.2711111111111112e-05,
+ "loss": 0.2199,
+ "step": 5173
+ },
+ {
+ "epoch": 29.565714285714286,
+ "grad_norm": 46.027435302734375,
+ "learning_rate": 2.2704761904761905e-05,
+ "loss": 0.3143,
+ "step": 5174
+ },
+ {
+ "epoch": 29.571428571428573,
+ "grad_norm": 68.04759216308594,
+ "learning_rate": 2.2698412698412698e-05,
+ "loss": 0.2608,
+ "step": 5175
+ },
+ {
+ "epoch": 29.577142857142857,
+ "grad_norm": 82.01683807373047,
+ "learning_rate": 2.2692063492063494e-05,
+ "loss": 0.2682,
+ "step": 5176
+ },
+ {
+ "epoch": 29.582857142857144,
+ "grad_norm": 40.78395080566406,
+ "learning_rate": 2.2685714285714286e-05,
+ "loss": 0.2364,
+ "step": 5177
+ },
+ {
+ "epoch": 29.588571428571427,
+ "grad_norm": 69.27226257324219,
+ "learning_rate": 2.2679365079365082e-05,
+ "loss": 0.2444,
+ "step": 5178
+ },
+ {
+ "epoch": 29.594285714285714,
+ "grad_norm": 59.43379211425781,
+ "learning_rate": 2.2673015873015875e-05,
+ "loss": 0.2642,
+ "step": 5179
+ },
+ {
+ "epoch": 29.6,
+ "grad_norm": 100.76307678222656,
+ "learning_rate": 2.2666666666666668e-05,
+ "loss": 0.4777,
+ "step": 5180
+ },
+ {
+ "epoch": 29.605714285714285,
+ "grad_norm": 19.478042602539062,
+ "learning_rate": 2.2660317460317464e-05,
+ "loss": 0.2047,
+ "step": 5181
+ },
+ {
+ "epoch": 29.611428571428572,
+ "grad_norm": 58.325008392333984,
+ "learning_rate": 2.2653968253968256e-05,
+ "loss": 0.1958,
+ "step": 5182
+ },
+ {
+ "epoch": 29.617142857142856,
+ "grad_norm": 45.153602600097656,
+ "learning_rate": 2.264761904761905e-05,
+ "loss": 0.2607,
+ "step": 5183
+ },
+ {
+ "epoch": 29.622857142857143,
+ "grad_norm": 75.98762512207031,
+ "learning_rate": 2.2641269841269845e-05,
+ "loss": 0.317,
+ "step": 5184
+ },
+ {
+ "epoch": 29.62857142857143,
+ "grad_norm": 34.11820602416992,
+ "learning_rate": 2.2634920634920638e-05,
+ "loss": 0.2393,
+ "step": 5185
+ },
+ {
+ "epoch": 29.634285714285713,
+ "grad_norm": 77.3637466430664,
+ "learning_rate": 2.262857142857143e-05,
+ "loss": 0.2647,
+ "step": 5186
+ },
+ {
+ "epoch": 29.64,
+ "grad_norm": 69.17243957519531,
+ "learning_rate": 2.2622222222222223e-05,
+ "loss": 0.2659,
+ "step": 5187
+ },
+ {
+ "epoch": 29.645714285714284,
+ "grad_norm": 54.292015075683594,
+ "learning_rate": 2.261587301587302e-05,
+ "loss": 0.2226,
+ "step": 5188
+ },
+ {
+ "epoch": 29.65142857142857,
+ "grad_norm": 46.84638214111328,
+ "learning_rate": 2.260952380952381e-05,
+ "loss": 0.2499,
+ "step": 5189
+ },
+ {
+ "epoch": 29.65714285714286,
+ "grad_norm": 44.82379150390625,
+ "learning_rate": 2.2603174603174604e-05,
+ "loss": 0.253,
+ "step": 5190
+ },
+ {
+ "epoch": 29.662857142857142,
+ "grad_norm": 675.0682373046875,
+ "learning_rate": 2.2596825396825397e-05,
+ "loss": 0.2743,
+ "step": 5191
+ },
+ {
+ "epoch": 29.66857142857143,
+ "grad_norm": 65.07813262939453,
+ "learning_rate": 2.2590476190476193e-05,
+ "loss": 0.2476,
+ "step": 5192
+ },
+ {
+ "epoch": 29.674285714285713,
+ "grad_norm": 62.987884521484375,
+ "learning_rate": 2.2584126984126985e-05,
+ "loss": 0.2061,
+ "step": 5193
+ },
+ {
+ "epoch": 29.68,
+ "grad_norm": 41.408973693847656,
+ "learning_rate": 2.2577777777777778e-05,
+ "loss": 0.2269,
+ "step": 5194
+ },
+ {
+ "epoch": 29.685714285714287,
+ "grad_norm": 60.94583511352539,
+ "learning_rate": 2.257142857142857e-05,
+ "loss": 0.2377,
+ "step": 5195
+ },
+ {
+ "epoch": 29.69142857142857,
+ "grad_norm": 41.704437255859375,
+ "learning_rate": 2.2565079365079367e-05,
+ "loss": 0.2191,
+ "step": 5196
+ },
+ {
+ "epoch": 29.697142857142858,
+ "grad_norm": 26.715286254882812,
+ "learning_rate": 2.255873015873016e-05,
+ "loss": 0.3247,
+ "step": 5197
+ },
+ {
+ "epoch": 29.70285714285714,
+ "grad_norm": 44.722381591796875,
+ "learning_rate": 2.2552380952380952e-05,
+ "loss": 0.2171,
+ "step": 5198
+ },
+ {
+ "epoch": 29.708571428571428,
+ "grad_norm": 28.22305679321289,
+ "learning_rate": 2.2546031746031748e-05,
+ "loss": 0.2707,
+ "step": 5199
+ },
+ {
+ "epoch": 29.714285714285715,
+ "grad_norm": 39.18876266479492,
+ "learning_rate": 2.253968253968254e-05,
+ "loss": 0.329,
+ "step": 5200
+ },
+ {
+ "epoch": 29.72,
+ "grad_norm": 304.74371337890625,
+ "learning_rate": 2.2533333333333333e-05,
+ "loss": 0.2566,
+ "step": 5201
+ },
+ {
+ "epoch": 29.725714285714286,
+ "grad_norm": 40.92671203613281,
+ "learning_rate": 2.2526984126984126e-05,
+ "loss": 0.2433,
+ "step": 5202
+ },
+ {
+ "epoch": 29.731428571428573,
+ "grad_norm": 31.22627830505371,
+ "learning_rate": 2.2520634920634922e-05,
+ "loss": 0.2306,
+ "step": 5203
+ },
+ {
+ "epoch": 29.737142857142857,
+ "grad_norm": 22.06302833557129,
+ "learning_rate": 2.2514285714285715e-05,
+ "loss": 0.3377,
+ "step": 5204
+ },
+ {
+ "epoch": 29.742857142857144,
+ "grad_norm": 84.18092346191406,
+ "learning_rate": 2.2507936507936507e-05,
+ "loss": 0.3718,
+ "step": 5205
+ },
+ {
+ "epoch": 29.748571428571427,
+ "grad_norm": 19.86476707458496,
+ "learning_rate": 2.25015873015873e-05,
+ "loss": 0.285,
+ "step": 5206
+ },
+ {
+ "epoch": 29.754285714285714,
+ "grad_norm": 32.38277053833008,
+ "learning_rate": 2.2495238095238096e-05,
+ "loss": 0.3177,
+ "step": 5207
+ },
+ {
+ "epoch": 29.76,
+ "grad_norm": 28.177043914794922,
+ "learning_rate": 2.248888888888889e-05,
+ "loss": 0.2743,
+ "step": 5208
+ },
+ {
+ "epoch": 29.765714285714285,
+ "grad_norm": 72.46790313720703,
+ "learning_rate": 2.248253968253968e-05,
+ "loss": 0.2154,
+ "step": 5209
+ },
+ {
+ "epoch": 29.771428571428572,
+ "grad_norm": 32.09682083129883,
+ "learning_rate": 2.2476190476190477e-05,
+ "loss": 0.1605,
+ "step": 5210
+ },
+ {
+ "epoch": 29.777142857142856,
+ "grad_norm": 32.612701416015625,
+ "learning_rate": 2.246984126984127e-05,
+ "loss": 0.1922,
+ "step": 5211
+ },
+ {
+ "epoch": 29.782857142857143,
+ "grad_norm": 23.944807052612305,
+ "learning_rate": 2.2463492063492066e-05,
+ "loss": 0.2776,
+ "step": 5212
+ },
+ {
+ "epoch": 29.78857142857143,
+ "grad_norm": 771.417236328125,
+ "learning_rate": 2.245714285714286e-05,
+ "loss": 0.3039,
+ "step": 5213
+ },
+ {
+ "epoch": 29.794285714285714,
+ "grad_norm": 67.97403717041016,
+ "learning_rate": 2.2450793650793655e-05,
+ "loss": 0.3022,
+ "step": 5214
+ },
+ {
+ "epoch": 29.8,
+ "grad_norm": 278.3428955078125,
+ "learning_rate": 2.2444444444444447e-05,
+ "loss": 0.2148,
+ "step": 5215
+ },
+ {
+ "epoch": 29.805714285714284,
+ "grad_norm": 131.3963623046875,
+ "learning_rate": 2.243809523809524e-05,
+ "loss": 0.2432,
+ "step": 5216
+ },
+ {
+ "epoch": 29.81142857142857,
+ "grad_norm": 36.8978271484375,
+ "learning_rate": 2.2431746031746032e-05,
+ "loss": 0.2134,
+ "step": 5217
+ },
+ {
+ "epoch": 29.81714285714286,
+ "grad_norm": 21.065776824951172,
+ "learning_rate": 2.242539682539683e-05,
+ "loss": 0.2727,
+ "step": 5218
+ },
+ {
+ "epoch": 29.822857142857142,
+ "grad_norm": 24.778491973876953,
+ "learning_rate": 2.241904761904762e-05,
+ "loss": 0.2979,
+ "step": 5219
+ },
+ {
+ "epoch": 29.82857142857143,
+ "grad_norm": 30.23557472229004,
+ "learning_rate": 2.2412698412698414e-05,
+ "loss": 0.2456,
+ "step": 5220
+ },
+ {
+ "epoch": 29.834285714285713,
+ "grad_norm": 59.7723388671875,
+ "learning_rate": 2.240634920634921e-05,
+ "loss": 0.2358,
+ "step": 5221
+ },
+ {
+ "epoch": 29.84,
+ "grad_norm": 22.08249282836914,
+ "learning_rate": 2.2400000000000002e-05,
+ "loss": 0.1699,
+ "step": 5222
+ },
+ {
+ "epoch": 29.845714285714287,
+ "grad_norm": 156.52500915527344,
+ "learning_rate": 2.2393650793650795e-05,
+ "loss": 0.2667,
+ "step": 5223
+ },
+ {
+ "epoch": 29.85142857142857,
+ "grad_norm": 26.32693862915039,
+ "learning_rate": 2.2387301587301588e-05,
+ "loss": 0.4322,
+ "step": 5224
+ },
+ {
+ "epoch": 29.857142857142858,
+ "grad_norm": 56.269779205322266,
+ "learning_rate": 2.2380952380952384e-05,
+ "loss": 0.2021,
+ "step": 5225
+ },
+ {
+ "epoch": 29.86285714285714,
+ "grad_norm": 27.565061569213867,
+ "learning_rate": 2.2374603174603176e-05,
+ "loss": 0.3126,
+ "step": 5226
+ },
+ {
+ "epoch": 29.86857142857143,
+ "grad_norm": 303.22210693359375,
+ "learning_rate": 2.236825396825397e-05,
+ "loss": 0.2455,
+ "step": 5227
+ },
+ {
+ "epoch": 29.874285714285715,
+ "grad_norm": 112.21163177490234,
+ "learning_rate": 2.236190476190476e-05,
+ "loss": 0.3081,
+ "step": 5228
+ },
+ {
+ "epoch": 29.88,
+ "grad_norm": 50.15737533569336,
+ "learning_rate": 2.2355555555555558e-05,
+ "loss": 0.2953,
+ "step": 5229
+ },
+ {
+ "epoch": 29.885714285714286,
+ "grad_norm": 169.55072021484375,
+ "learning_rate": 2.234920634920635e-05,
+ "loss": 0.3248,
+ "step": 5230
+ },
+ {
+ "epoch": 29.89142857142857,
+ "grad_norm": 71.83511352539062,
+ "learning_rate": 2.2342857142857143e-05,
+ "loss": 0.2653,
+ "step": 5231
+ },
+ {
+ "epoch": 29.897142857142857,
+ "grad_norm": 51.96732711791992,
+ "learning_rate": 2.233650793650794e-05,
+ "loss": 0.2534,
+ "step": 5232
+ },
+ {
+ "epoch": 29.902857142857144,
+ "grad_norm": 38.780067443847656,
+ "learning_rate": 2.233015873015873e-05,
+ "loss": 0.2196,
+ "step": 5233
+ },
+ {
+ "epoch": 29.908571428571427,
+ "grad_norm": 19.927183151245117,
+ "learning_rate": 2.2323809523809524e-05,
+ "loss": 0.2076,
+ "step": 5234
+ },
+ {
+ "epoch": 29.914285714285715,
+ "grad_norm": 28.219009399414062,
+ "learning_rate": 2.2317460317460317e-05,
+ "loss": 0.2898,
+ "step": 5235
+ },
+ {
+ "epoch": 29.92,
+ "grad_norm": 31.704973220825195,
+ "learning_rate": 2.2311111111111113e-05,
+ "loss": 0.2539,
+ "step": 5236
+ },
+ {
+ "epoch": 29.925714285714285,
+ "grad_norm": 70.12614440917969,
+ "learning_rate": 2.2304761904761905e-05,
+ "loss": 0.2214,
+ "step": 5237
+ },
+ {
+ "epoch": 29.931428571428572,
+ "grad_norm": 23.82673454284668,
+ "learning_rate": 2.2298412698412698e-05,
+ "loss": 0.4413,
+ "step": 5238
+ },
+ {
+ "epoch": 29.937142857142856,
+ "grad_norm": 33.32120895385742,
+ "learning_rate": 2.229206349206349e-05,
+ "loss": 0.2039,
+ "step": 5239
+ },
+ {
+ "epoch": 29.942857142857143,
+ "grad_norm": 72.25456237792969,
+ "learning_rate": 2.2285714285714287e-05,
+ "loss": 0.267,
+ "step": 5240
+ },
+ {
+ "epoch": 29.94857142857143,
+ "grad_norm": 46.91978454589844,
+ "learning_rate": 2.227936507936508e-05,
+ "loss": 0.2319,
+ "step": 5241
+ },
+ {
+ "epoch": 29.954285714285714,
+ "grad_norm": 26.755491256713867,
+ "learning_rate": 2.2273015873015872e-05,
+ "loss": 0.1912,
+ "step": 5242
+ },
+ {
+ "epoch": 29.96,
+ "grad_norm": 77.56159973144531,
+ "learning_rate": 2.2266666666666668e-05,
+ "loss": 0.192,
+ "step": 5243
+ },
+ {
+ "epoch": 29.965714285714284,
+ "grad_norm": 65.45929718017578,
+ "learning_rate": 2.226031746031746e-05,
+ "loss": 0.2842,
+ "step": 5244
+ },
+ {
+ "epoch": 29.97142857142857,
+ "grad_norm": 46.151546478271484,
+ "learning_rate": 2.2253968253968253e-05,
+ "loss": 0.289,
+ "step": 5245
+ },
+ {
+ "epoch": 29.97714285714286,
+ "grad_norm": 61.48402786254883,
+ "learning_rate": 2.224761904761905e-05,
+ "loss": 0.2239,
+ "step": 5246
+ },
+ {
+ "epoch": 29.982857142857142,
+ "grad_norm": 42.47544479370117,
+ "learning_rate": 2.2241269841269842e-05,
+ "loss": 0.3292,
+ "step": 5247
+ },
+ {
+ "epoch": 29.98857142857143,
+ "grad_norm": 16.77855110168457,
+ "learning_rate": 2.2234920634920635e-05,
+ "loss": 0.2999,
+ "step": 5248
+ },
+ {
+ "epoch": 29.994285714285713,
+ "grad_norm": 27.23601531982422,
+ "learning_rate": 2.222857142857143e-05,
+ "loss": 0.2653,
+ "step": 5249
+ },
+ {
+ "epoch": 30.0,
+ "grad_norm": 66.7364730834961,
+ "learning_rate": 2.2222222222222223e-05,
+ "loss": 0.3852,
+ "step": 5250
+ },
+ {
+ "epoch": 30.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6095895767211914,
+ "eval_map": 0.9311,
+ "eval_map_50": 0.9653,
+ "eval_map_75": 0.9597,
+ "eval_map_large": 0.9312,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9311,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7902,
+ "eval_mar_10": 0.9762,
+ "eval_mar_100": 0.9794,
+ "eval_mar_100_per_class": 0.9794,
+ "eval_mar_large": 0.9794,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.7648,
+ "eval_samples_per_second": 21.359,
+ "eval_steps_per_second": 2.688,
+ "step": 5250
+ },
+ {
+ "epoch": 30.005714285714287,
+ "grad_norm": 57.8994255065918,
+ "learning_rate": 2.221587301587302e-05,
+ "loss": 0.2644,
+ "step": 5251
+ },
+ {
+ "epoch": 30.01142857142857,
+ "grad_norm": 38.62663650512695,
+ "learning_rate": 2.2209523809523812e-05,
+ "loss": 0.3136,
+ "step": 5252
+ },
+ {
+ "epoch": 30.017142857142858,
+ "grad_norm": 44.23182678222656,
+ "learning_rate": 2.2203174603174605e-05,
+ "loss": 0.2108,
+ "step": 5253
+ },
+ {
+ "epoch": 30.02285714285714,
+ "grad_norm": 86.72383117675781,
+ "learning_rate": 2.21968253968254e-05,
+ "loss": 0.2607,
+ "step": 5254
+ },
+ {
+ "epoch": 30.02857142857143,
+ "grad_norm": 26.44148826599121,
+ "learning_rate": 2.2190476190476193e-05,
+ "loss": 0.2828,
+ "step": 5255
+ },
+ {
+ "epoch": 30.034285714285716,
+ "grad_norm": 54.37052536010742,
+ "learning_rate": 2.2184126984126986e-05,
+ "loss": 0.3152,
+ "step": 5256
+ },
+ {
+ "epoch": 30.04,
+ "grad_norm": 970.4421997070312,
+ "learning_rate": 2.217777777777778e-05,
+ "loss": 0.2307,
+ "step": 5257
+ },
+ {
+ "epoch": 30.045714285714286,
+ "grad_norm": 74.14012908935547,
+ "learning_rate": 2.2171428571428575e-05,
+ "loss": 0.2416,
+ "step": 5258
+ },
+ {
+ "epoch": 30.05142857142857,
+ "grad_norm": 65.60440063476562,
+ "learning_rate": 2.2165079365079367e-05,
+ "loss": 0.2617,
+ "step": 5259
+ },
+ {
+ "epoch": 30.057142857142857,
+ "grad_norm": 32.856510162353516,
+ "learning_rate": 2.215873015873016e-05,
+ "loss": 0.3224,
+ "step": 5260
+ },
+ {
+ "epoch": 30.062857142857144,
+ "grad_norm": 36.577796936035156,
+ "learning_rate": 2.2152380952380952e-05,
+ "loss": 0.2311,
+ "step": 5261
+ },
+ {
+ "epoch": 30.068571428571428,
+ "grad_norm": 37.82815933227539,
+ "learning_rate": 2.214603174603175e-05,
+ "loss": 0.255,
+ "step": 5262
+ },
+ {
+ "epoch": 30.074285714285715,
+ "grad_norm": 319.6885070800781,
+ "learning_rate": 2.213968253968254e-05,
+ "loss": 0.3182,
+ "step": 5263
+ },
+ {
+ "epoch": 30.08,
+ "grad_norm": 53.454898834228516,
+ "learning_rate": 2.2133333333333334e-05,
+ "loss": 0.2069,
+ "step": 5264
+ },
+ {
+ "epoch": 30.085714285714285,
+ "grad_norm": 52.656856536865234,
+ "learning_rate": 2.212698412698413e-05,
+ "loss": 0.268,
+ "step": 5265
+ },
+ {
+ "epoch": 30.091428571428573,
+ "grad_norm": 59.526424407958984,
+ "learning_rate": 2.2120634920634922e-05,
+ "loss": 0.3191,
+ "step": 5266
+ },
+ {
+ "epoch": 30.097142857142856,
+ "grad_norm": 95.74357604980469,
+ "learning_rate": 2.2114285714285715e-05,
+ "loss": 0.2975,
+ "step": 5267
+ },
+ {
+ "epoch": 30.102857142857143,
+ "grad_norm": 60.69863510131836,
+ "learning_rate": 2.2107936507936508e-05,
+ "loss": 0.3128,
+ "step": 5268
+ },
+ {
+ "epoch": 30.10857142857143,
+ "grad_norm": 403.5923767089844,
+ "learning_rate": 2.2101587301587304e-05,
+ "loss": 0.3187,
+ "step": 5269
+ },
+ {
+ "epoch": 30.114285714285714,
+ "grad_norm": 37.859458923339844,
+ "learning_rate": 2.2095238095238096e-05,
+ "loss": 0.2386,
+ "step": 5270
+ },
+ {
+ "epoch": 30.12,
+ "grad_norm": 43.76021957397461,
+ "learning_rate": 2.208888888888889e-05,
+ "loss": 0.2183,
+ "step": 5271
+ },
+ {
+ "epoch": 30.125714285714285,
+ "grad_norm": 60.64640808105469,
+ "learning_rate": 2.208253968253968e-05,
+ "loss": 0.2306,
+ "step": 5272
+ },
+ {
+ "epoch": 30.13142857142857,
+ "grad_norm": 55.451297760009766,
+ "learning_rate": 2.2076190476190478e-05,
+ "loss": 0.2518,
+ "step": 5273
+ },
+ {
+ "epoch": 30.13714285714286,
+ "grad_norm": 51.61478805541992,
+ "learning_rate": 2.206984126984127e-05,
+ "loss": 0.1972,
+ "step": 5274
+ },
+ {
+ "epoch": 30.142857142857142,
+ "grad_norm": 47.890682220458984,
+ "learning_rate": 2.2063492063492063e-05,
+ "loss": 0.1798,
+ "step": 5275
+ },
+ {
+ "epoch": 30.14857142857143,
+ "grad_norm": 86.82106018066406,
+ "learning_rate": 2.205714285714286e-05,
+ "loss": 0.2597,
+ "step": 5276
+ },
+ {
+ "epoch": 30.154285714285713,
+ "grad_norm": 26.615827560424805,
+ "learning_rate": 2.205079365079365e-05,
+ "loss": 0.2647,
+ "step": 5277
+ },
+ {
+ "epoch": 30.16,
+ "grad_norm": 28.28487205505371,
+ "learning_rate": 2.2044444444444444e-05,
+ "loss": 0.2985,
+ "step": 5278
+ },
+ {
+ "epoch": 30.165714285714287,
+ "grad_norm": 46.122684478759766,
+ "learning_rate": 2.2038095238095237e-05,
+ "loss": 0.3281,
+ "step": 5279
+ },
+ {
+ "epoch": 30.17142857142857,
+ "grad_norm": 19.797990798950195,
+ "learning_rate": 2.2031746031746033e-05,
+ "loss": 0.2221,
+ "step": 5280
+ },
+ {
+ "epoch": 30.177142857142858,
+ "grad_norm": 32.67826843261719,
+ "learning_rate": 2.2025396825396825e-05,
+ "loss": 0.1973,
+ "step": 5281
+ },
+ {
+ "epoch": 30.18285714285714,
+ "grad_norm": 42.12900924682617,
+ "learning_rate": 2.2019047619047618e-05,
+ "loss": 0.2547,
+ "step": 5282
+ },
+ {
+ "epoch": 30.18857142857143,
+ "grad_norm": 29.2160701751709,
+ "learning_rate": 2.2012698412698414e-05,
+ "loss": 0.2711,
+ "step": 5283
+ },
+ {
+ "epoch": 30.194285714285716,
+ "grad_norm": 43.39205551147461,
+ "learning_rate": 2.2006349206349207e-05,
+ "loss": 0.2193,
+ "step": 5284
+ },
+ {
+ "epoch": 30.2,
+ "grad_norm": 60.42385482788086,
+ "learning_rate": 2.2000000000000003e-05,
+ "loss": 0.221,
+ "step": 5285
+ },
+ {
+ "epoch": 30.205714285714286,
+ "grad_norm": 31.63413429260254,
+ "learning_rate": 2.1993650793650795e-05,
+ "loss": 0.2456,
+ "step": 5286
+ },
+ {
+ "epoch": 30.21142857142857,
+ "grad_norm": 56.024932861328125,
+ "learning_rate": 2.1987301587301588e-05,
+ "loss": 0.2272,
+ "step": 5287
+ },
+ {
+ "epoch": 30.217142857142857,
+ "grad_norm": 40.43967819213867,
+ "learning_rate": 2.1980952380952384e-05,
+ "loss": 0.2552,
+ "step": 5288
+ },
+ {
+ "epoch": 30.222857142857144,
+ "grad_norm": 138.42617797851562,
+ "learning_rate": 2.1974603174603177e-05,
+ "loss": 0.3272,
+ "step": 5289
+ },
+ {
+ "epoch": 30.228571428571428,
+ "grad_norm": 52.62572479248047,
+ "learning_rate": 2.196825396825397e-05,
+ "loss": 0.1945,
+ "step": 5290
+ },
+ {
+ "epoch": 30.234285714285715,
+ "grad_norm": 25.761674880981445,
+ "learning_rate": 2.1961904761904765e-05,
+ "loss": 0.2854,
+ "step": 5291
+ },
+ {
+ "epoch": 30.24,
+ "grad_norm": 24.410228729248047,
+ "learning_rate": 2.1955555555555558e-05,
+ "loss": 0.1844,
+ "step": 5292
+ },
+ {
+ "epoch": 30.245714285714286,
+ "grad_norm": 24.37540626525879,
+ "learning_rate": 2.194920634920635e-05,
+ "loss": 0.3114,
+ "step": 5293
+ },
+ {
+ "epoch": 30.251428571428573,
+ "grad_norm": 32.545597076416016,
+ "learning_rate": 2.1942857142857143e-05,
+ "loss": 0.1947,
+ "step": 5294
+ },
+ {
+ "epoch": 30.257142857142856,
+ "grad_norm": 26.83466148376465,
+ "learning_rate": 2.193650793650794e-05,
+ "loss": 0.2757,
+ "step": 5295
+ },
+ {
+ "epoch": 30.262857142857143,
+ "grad_norm": 114.49763488769531,
+ "learning_rate": 2.1930158730158732e-05,
+ "loss": 0.2074,
+ "step": 5296
+ },
+ {
+ "epoch": 30.268571428571427,
+ "grad_norm": 80.31643676757812,
+ "learning_rate": 2.1923809523809525e-05,
+ "loss": 0.2124,
+ "step": 5297
+ },
+ {
+ "epoch": 30.274285714285714,
+ "grad_norm": 74.36065673828125,
+ "learning_rate": 2.1917460317460317e-05,
+ "loss": 0.2864,
+ "step": 5298
+ },
+ {
+ "epoch": 30.28,
+ "grad_norm": 36.70797348022461,
+ "learning_rate": 2.1911111111111113e-05,
+ "loss": 0.2341,
+ "step": 5299
+ },
+ {
+ "epoch": 30.285714285714285,
+ "grad_norm": 36.677345275878906,
+ "learning_rate": 2.1904761904761906e-05,
+ "loss": 0.2009,
+ "step": 5300
+ },
+ {
+ "epoch": 30.291428571428572,
+ "grad_norm": 33.610931396484375,
+ "learning_rate": 2.18984126984127e-05,
+ "loss": 0.3764,
+ "step": 5301
+ },
+ {
+ "epoch": 30.29714285714286,
+ "grad_norm": 102.84770202636719,
+ "learning_rate": 2.1892063492063495e-05,
+ "loss": 0.2748,
+ "step": 5302
+ },
+ {
+ "epoch": 30.302857142857142,
+ "grad_norm": 45.53736877441406,
+ "learning_rate": 2.1885714285714287e-05,
+ "loss": 0.2016,
+ "step": 5303
+ },
+ {
+ "epoch": 30.30857142857143,
+ "grad_norm": 43.9678840637207,
+ "learning_rate": 2.187936507936508e-05,
+ "loss": 0.1706,
+ "step": 5304
+ },
+ {
+ "epoch": 30.314285714285713,
+ "grad_norm": 230.81417846679688,
+ "learning_rate": 2.1873015873015872e-05,
+ "loss": 0.2759,
+ "step": 5305
+ },
+ {
+ "epoch": 30.32,
+ "grad_norm": 99.33362579345703,
+ "learning_rate": 2.186666666666667e-05,
+ "loss": 0.3033,
+ "step": 5306
+ },
+ {
+ "epoch": 30.325714285714287,
+ "grad_norm": 55.34583282470703,
+ "learning_rate": 2.186031746031746e-05,
+ "loss": 0.2376,
+ "step": 5307
+ },
+ {
+ "epoch": 30.33142857142857,
+ "grad_norm": 64.33992767333984,
+ "learning_rate": 2.1853968253968254e-05,
+ "loss": 0.3425,
+ "step": 5308
+ },
+ {
+ "epoch": 30.337142857142858,
+ "grad_norm": 33.70119857788086,
+ "learning_rate": 2.1847619047619046e-05,
+ "loss": 0.2414,
+ "step": 5309
+ },
+ {
+ "epoch": 30.34285714285714,
+ "grad_norm": 171.85140991210938,
+ "learning_rate": 2.1841269841269842e-05,
+ "loss": 0.2787,
+ "step": 5310
+ },
+ {
+ "epoch": 30.34857142857143,
+ "grad_norm": 20.956310272216797,
+ "learning_rate": 2.1834920634920635e-05,
+ "loss": 0.2725,
+ "step": 5311
+ },
+ {
+ "epoch": 30.354285714285716,
+ "grad_norm": 62.92211151123047,
+ "learning_rate": 2.1828571428571428e-05,
+ "loss": 0.1804,
+ "step": 5312
+ },
+ {
+ "epoch": 30.36,
+ "grad_norm": 48.061279296875,
+ "learning_rate": 2.1822222222222224e-05,
+ "loss": 0.2567,
+ "step": 5313
+ },
+ {
+ "epoch": 30.365714285714287,
+ "grad_norm": 70.20124053955078,
+ "learning_rate": 2.1815873015873016e-05,
+ "loss": 0.334,
+ "step": 5314
+ },
+ {
+ "epoch": 30.37142857142857,
+ "grad_norm": 226.6661834716797,
+ "learning_rate": 2.180952380952381e-05,
+ "loss": 0.2575,
+ "step": 5315
+ },
+ {
+ "epoch": 30.377142857142857,
+ "grad_norm": 242.7920684814453,
+ "learning_rate": 2.18031746031746e-05,
+ "loss": 0.2951,
+ "step": 5316
+ },
+ {
+ "epoch": 30.382857142857144,
+ "grad_norm": 64.47325134277344,
+ "learning_rate": 2.1796825396825398e-05,
+ "loss": 0.2664,
+ "step": 5317
+ },
+ {
+ "epoch": 30.388571428571428,
+ "grad_norm": 28.074567794799805,
+ "learning_rate": 2.179047619047619e-05,
+ "loss": 0.2415,
+ "step": 5318
+ },
+ {
+ "epoch": 30.394285714285715,
+ "grad_norm": 78.06990814208984,
+ "learning_rate": 2.1784126984126983e-05,
+ "loss": 0.2959,
+ "step": 5319
+ },
+ {
+ "epoch": 30.4,
+ "grad_norm": 35.36109924316406,
+ "learning_rate": 2.177777777777778e-05,
+ "loss": 0.3656,
+ "step": 5320
+ },
+ {
+ "epoch": 30.405714285714286,
+ "grad_norm": 31.605953216552734,
+ "learning_rate": 2.177142857142857e-05,
+ "loss": 0.2689,
+ "step": 5321
+ },
+ {
+ "epoch": 30.411428571428573,
+ "grad_norm": 109.99222564697266,
+ "learning_rate": 2.1765079365079368e-05,
+ "loss": 0.1857,
+ "step": 5322
+ },
+ {
+ "epoch": 30.417142857142856,
+ "grad_norm": 44.36388397216797,
+ "learning_rate": 2.175873015873016e-05,
+ "loss": 0.1954,
+ "step": 5323
+ },
+ {
+ "epoch": 30.422857142857143,
+ "grad_norm": 27.54041862487793,
+ "learning_rate": 2.1752380952380956e-05,
+ "loss": 0.1823,
+ "step": 5324
+ },
+ {
+ "epoch": 30.428571428571427,
+ "grad_norm": 66.09359741210938,
+ "learning_rate": 2.174603174603175e-05,
+ "loss": 0.1908,
+ "step": 5325
+ },
+ {
+ "epoch": 30.434285714285714,
+ "grad_norm": 27.425779342651367,
+ "learning_rate": 2.173968253968254e-05,
+ "loss": 0.221,
+ "step": 5326
+ },
+ {
+ "epoch": 30.44,
+ "grad_norm": 52.88359069824219,
+ "learning_rate": 2.1733333333333334e-05,
+ "loss": 0.2874,
+ "step": 5327
+ },
+ {
+ "epoch": 30.445714285714285,
+ "grad_norm": 41.36067581176758,
+ "learning_rate": 2.172698412698413e-05,
+ "loss": 0.2248,
+ "step": 5328
+ },
+ {
+ "epoch": 30.451428571428572,
+ "grad_norm": 62.25459671020508,
+ "learning_rate": 2.1720634920634923e-05,
+ "loss": 0.2223,
+ "step": 5329
+ },
+ {
+ "epoch": 30.457142857142856,
+ "grad_norm": 39.943809509277344,
+ "learning_rate": 2.1714285714285715e-05,
+ "loss": 0.3783,
+ "step": 5330
+ },
+ {
+ "epoch": 30.462857142857143,
+ "grad_norm": 26.524595260620117,
+ "learning_rate": 2.1707936507936508e-05,
+ "loss": 0.2734,
+ "step": 5331
+ },
+ {
+ "epoch": 30.46857142857143,
+ "grad_norm": 32.22235107421875,
+ "learning_rate": 2.1701587301587304e-05,
+ "loss": 0.243,
+ "step": 5332
+ },
+ {
+ "epoch": 30.474285714285713,
+ "grad_norm": 36.29214096069336,
+ "learning_rate": 2.1695238095238097e-05,
+ "loss": 0.2394,
+ "step": 5333
+ },
+ {
+ "epoch": 30.48,
+ "grad_norm": 186.89295959472656,
+ "learning_rate": 2.168888888888889e-05,
+ "loss": 0.2326,
+ "step": 5334
+ },
+ {
+ "epoch": 30.485714285714284,
+ "grad_norm": 51.586448669433594,
+ "learning_rate": 2.1682539682539685e-05,
+ "loss": 0.2229,
+ "step": 5335
+ },
+ {
+ "epoch": 30.49142857142857,
+ "grad_norm": 496.5753479003906,
+ "learning_rate": 2.1676190476190478e-05,
+ "loss": 0.2687,
+ "step": 5336
+ },
+ {
+ "epoch": 30.497142857142858,
+ "grad_norm": 41.751808166503906,
+ "learning_rate": 2.166984126984127e-05,
+ "loss": 0.3579,
+ "step": 5337
+ },
+ {
+ "epoch": 30.502857142857142,
+ "grad_norm": 45.87074661254883,
+ "learning_rate": 2.1663492063492063e-05,
+ "loss": 0.2715,
+ "step": 5338
+ },
+ {
+ "epoch": 30.50857142857143,
+ "grad_norm": 35.66616439819336,
+ "learning_rate": 2.165714285714286e-05,
+ "loss": 0.3016,
+ "step": 5339
+ },
+ {
+ "epoch": 30.514285714285712,
+ "grad_norm": 76.56392669677734,
+ "learning_rate": 2.1650793650793652e-05,
+ "loss": 0.2987,
+ "step": 5340
+ },
+ {
+ "epoch": 30.52,
+ "grad_norm": 25.273380279541016,
+ "learning_rate": 2.1644444444444445e-05,
+ "loss": 0.2906,
+ "step": 5341
+ },
+ {
+ "epoch": 30.525714285714287,
+ "grad_norm": 82.47328186035156,
+ "learning_rate": 2.1638095238095237e-05,
+ "loss": 0.2485,
+ "step": 5342
+ },
+ {
+ "epoch": 30.53142857142857,
+ "grad_norm": 17.97199821472168,
+ "learning_rate": 2.1631746031746033e-05,
+ "loss": 0.275,
+ "step": 5343
+ },
+ {
+ "epoch": 30.537142857142857,
+ "grad_norm": 78.71432495117188,
+ "learning_rate": 2.1625396825396826e-05,
+ "loss": 0.3009,
+ "step": 5344
+ },
+ {
+ "epoch": 30.542857142857144,
+ "grad_norm": 53.01747131347656,
+ "learning_rate": 2.161904761904762e-05,
+ "loss": 0.2443,
+ "step": 5345
+ },
+ {
+ "epoch": 30.548571428571428,
+ "grad_norm": 66.9712905883789,
+ "learning_rate": 2.1612698412698415e-05,
+ "loss": 0.2314,
+ "step": 5346
+ },
+ {
+ "epoch": 30.554285714285715,
+ "grad_norm": 32.60740280151367,
+ "learning_rate": 2.1606349206349207e-05,
+ "loss": 0.2436,
+ "step": 5347
+ },
+ {
+ "epoch": 30.56,
+ "grad_norm": 76.29956817626953,
+ "learning_rate": 2.16e-05,
+ "loss": 0.404,
+ "step": 5348
+ },
+ {
+ "epoch": 30.565714285714286,
+ "grad_norm": 84.5020523071289,
+ "learning_rate": 2.1593650793650793e-05,
+ "loss": 0.2227,
+ "step": 5349
+ },
+ {
+ "epoch": 30.571428571428573,
+ "grad_norm": 47.953948974609375,
+ "learning_rate": 2.158730158730159e-05,
+ "loss": 0.3307,
+ "step": 5350
+ },
+ {
+ "epoch": 30.577142857142857,
+ "grad_norm": 107.53913116455078,
+ "learning_rate": 2.158095238095238e-05,
+ "loss": 0.2171,
+ "step": 5351
+ },
+ {
+ "epoch": 30.582857142857144,
+ "grad_norm": 572.5935668945312,
+ "learning_rate": 2.1574603174603174e-05,
+ "loss": 0.2976,
+ "step": 5352
+ },
+ {
+ "epoch": 30.588571428571427,
+ "grad_norm": 990.3221435546875,
+ "learning_rate": 2.1568253968253966e-05,
+ "loss": 0.2402,
+ "step": 5353
+ },
+ {
+ "epoch": 30.594285714285714,
+ "grad_norm": 74.72994232177734,
+ "learning_rate": 2.1561904761904762e-05,
+ "loss": 0.2424,
+ "step": 5354
+ },
+ {
+ "epoch": 30.6,
+ "grad_norm": 46.14615249633789,
+ "learning_rate": 2.1555555555555555e-05,
+ "loss": 0.2754,
+ "step": 5355
+ },
+ {
+ "epoch": 30.605714285714285,
+ "grad_norm": 46.194210052490234,
+ "learning_rate": 2.154920634920635e-05,
+ "loss": 0.2838,
+ "step": 5356
+ },
+ {
+ "epoch": 30.611428571428572,
+ "grad_norm": 49.12913131713867,
+ "learning_rate": 2.1542857142857144e-05,
+ "loss": 0.3064,
+ "step": 5357
+ },
+ {
+ "epoch": 30.617142857142856,
+ "grad_norm": 52.44061279296875,
+ "learning_rate": 2.1536507936507936e-05,
+ "loss": 0.2395,
+ "step": 5358
+ },
+ {
+ "epoch": 30.622857142857143,
+ "grad_norm": 69.73971557617188,
+ "learning_rate": 2.1530158730158732e-05,
+ "loss": 0.3199,
+ "step": 5359
+ },
+ {
+ "epoch": 30.62857142857143,
+ "grad_norm": 50.69144058227539,
+ "learning_rate": 2.1523809523809525e-05,
+ "loss": 0.1757,
+ "step": 5360
+ },
+ {
+ "epoch": 30.634285714285713,
+ "grad_norm": 64.29901885986328,
+ "learning_rate": 2.151746031746032e-05,
+ "loss": 0.2406,
+ "step": 5361
+ },
+ {
+ "epoch": 30.64,
+ "grad_norm": 34.74845504760742,
+ "learning_rate": 2.1511111111111114e-05,
+ "loss": 0.2962,
+ "step": 5362
+ },
+ {
+ "epoch": 30.645714285714284,
+ "grad_norm": 62.44707107543945,
+ "learning_rate": 2.1504761904761906e-05,
+ "loss": 0.289,
+ "step": 5363
+ },
+ {
+ "epoch": 30.65142857142857,
+ "grad_norm": 81.79146575927734,
+ "learning_rate": 2.14984126984127e-05,
+ "loss": 0.2472,
+ "step": 5364
+ },
+ {
+ "epoch": 30.65714285714286,
+ "grad_norm": 53.31319808959961,
+ "learning_rate": 2.1492063492063495e-05,
+ "loss": 0.3685,
+ "step": 5365
+ },
+ {
+ "epoch": 30.662857142857142,
+ "grad_norm": 34.42056655883789,
+ "learning_rate": 2.1485714285714288e-05,
+ "loss": 0.1763,
+ "step": 5366
+ },
+ {
+ "epoch": 30.66857142857143,
+ "grad_norm": 34.83342361450195,
+ "learning_rate": 2.147936507936508e-05,
+ "loss": 0.3054,
+ "step": 5367
+ },
+ {
+ "epoch": 30.674285714285713,
+ "grad_norm": 72.38312530517578,
+ "learning_rate": 2.1473015873015876e-05,
+ "loss": 0.3662,
+ "step": 5368
+ },
+ {
+ "epoch": 30.68,
+ "grad_norm": 56.11985778808594,
+ "learning_rate": 2.146666666666667e-05,
+ "loss": 0.288,
+ "step": 5369
+ },
+ {
+ "epoch": 30.685714285714287,
+ "grad_norm": 38.910179138183594,
+ "learning_rate": 2.146031746031746e-05,
+ "loss": 0.2519,
+ "step": 5370
+ },
+ {
+ "epoch": 30.69142857142857,
+ "grad_norm": 19.08800506591797,
+ "learning_rate": 2.1453968253968254e-05,
+ "loss": 0.199,
+ "step": 5371
+ },
+ {
+ "epoch": 30.697142857142858,
+ "grad_norm": 35.26356506347656,
+ "learning_rate": 2.144761904761905e-05,
+ "loss": 0.2699,
+ "step": 5372
+ },
+ {
+ "epoch": 30.70285714285714,
+ "grad_norm": 26.942726135253906,
+ "learning_rate": 2.1441269841269843e-05,
+ "loss": 0.2765,
+ "step": 5373
+ },
+ {
+ "epoch": 30.708571428571428,
+ "grad_norm": 84.79847717285156,
+ "learning_rate": 2.1434920634920636e-05,
+ "loss": 0.3339,
+ "step": 5374
+ },
+ {
+ "epoch": 30.714285714285715,
+ "grad_norm": 582.523681640625,
+ "learning_rate": 2.1428571428571428e-05,
+ "loss": 0.3743,
+ "step": 5375
+ },
+ {
+ "epoch": 30.72,
+ "grad_norm": 23.32284927368164,
+ "learning_rate": 2.1422222222222224e-05,
+ "loss": 0.2225,
+ "step": 5376
+ },
+ {
+ "epoch": 30.725714285714286,
+ "grad_norm": 154.95120239257812,
+ "learning_rate": 2.1415873015873017e-05,
+ "loss": 0.1966,
+ "step": 5377
+ },
+ {
+ "epoch": 30.731428571428573,
+ "grad_norm": 152.61624145507812,
+ "learning_rate": 2.140952380952381e-05,
+ "loss": 0.3489,
+ "step": 5378
+ },
+ {
+ "epoch": 30.737142857142857,
+ "grad_norm": 117.03369140625,
+ "learning_rate": 2.1403174603174605e-05,
+ "loss": 0.3226,
+ "step": 5379
+ },
+ {
+ "epoch": 30.742857142857144,
+ "grad_norm": 68.2706298828125,
+ "learning_rate": 2.1396825396825398e-05,
+ "loss": 0.4486,
+ "step": 5380
+ },
+ {
+ "epoch": 30.748571428571427,
+ "grad_norm": 37.4405517578125,
+ "learning_rate": 2.139047619047619e-05,
+ "loss": 0.299,
+ "step": 5381
+ },
+ {
+ "epoch": 30.754285714285714,
+ "grad_norm": 52.34811019897461,
+ "learning_rate": 2.1384126984126983e-05,
+ "loss": 0.2035,
+ "step": 5382
+ },
+ {
+ "epoch": 30.76,
+ "grad_norm": 52.2584114074707,
+ "learning_rate": 2.137777777777778e-05,
+ "loss": 0.2441,
+ "step": 5383
+ },
+ {
+ "epoch": 30.765714285714285,
+ "grad_norm": 1242.759521484375,
+ "learning_rate": 2.1371428571428572e-05,
+ "loss": 0.3314,
+ "step": 5384
+ },
+ {
+ "epoch": 30.771428571428572,
+ "grad_norm": 22.513540267944336,
+ "learning_rate": 2.1365079365079365e-05,
+ "loss": 0.214,
+ "step": 5385
+ },
+ {
+ "epoch": 30.777142857142856,
+ "grad_norm": 159.3976287841797,
+ "learning_rate": 2.1358730158730157e-05,
+ "loss": 0.4593,
+ "step": 5386
+ },
+ {
+ "epoch": 30.782857142857143,
+ "grad_norm": 22.903493881225586,
+ "learning_rate": 2.1352380952380953e-05,
+ "loss": 0.3298,
+ "step": 5387
+ },
+ {
+ "epoch": 30.78857142857143,
+ "grad_norm": 38.81813049316406,
+ "learning_rate": 2.1346031746031746e-05,
+ "loss": 0.2337,
+ "step": 5388
+ },
+ {
+ "epoch": 30.794285714285714,
+ "grad_norm": 27.080564498901367,
+ "learning_rate": 2.133968253968254e-05,
+ "loss": 0.2422,
+ "step": 5389
+ },
+ {
+ "epoch": 30.8,
+ "grad_norm": 436.7923889160156,
+ "learning_rate": 2.1333333333333335e-05,
+ "loss": 0.2084,
+ "step": 5390
+ },
+ {
+ "epoch": 30.805714285714284,
+ "grad_norm": 27.97318458557129,
+ "learning_rate": 2.1326984126984127e-05,
+ "loss": 0.2332,
+ "step": 5391
+ },
+ {
+ "epoch": 30.81142857142857,
+ "grad_norm": 37.840797424316406,
+ "learning_rate": 2.132063492063492e-05,
+ "loss": 0.2998,
+ "step": 5392
+ },
+ {
+ "epoch": 30.81714285714286,
+ "grad_norm": 31.519920349121094,
+ "learning_rate": 2.1314285714285716e-05,
+ "loss": 0.3113,
+ "step": 5393
+ },
+ {
+ "epoch": 30.822857142857142,
+ "grad_norm": 53.09763717651367,
+ "learning_rate": 2.130793650793651e-05,
+ "loss": 0.1737,
+ "step": 5394
+ },
+ {
+ "epoch": 30.82857142857143,
+ "grad_norm": 53.32387161254883,
+ "learning_rate": 2.1301587301587305e-05,
+ "loss": 0.2794,
+ "step": 5395
+ },
+ {
+ "epoch": 30.834285714285713,
+ "grad_norm": 90.9300765991211,
+ "learning_rate": 2.1295238095238097e-05,
+ "loss": 0.2058,
+ "step": 5396
+ },
+ {
+ "epoch": 30.84,
+ "grad_norm": 23.7263240814209,
+ "learning_rate": 2.128888888888889e-05,
+ "loss": 0.2754,
+ "step": 5397
+ },
+ {
+ "epoch": 30.845714285714287,
+ "grad_norm": 79.28535461425781,
+ "learning_rate": 2.1282539682539686e-05,
+ "loss": 0.3019,
+ "step": 5398
+ },
+ {
+ "epoch": 30.85142857142857,
+ "grad_norm": 65.68453979492188,
+ "learning_rate": 2.127619047619048e-05,
+ "loss": 0.2335,
+ "step": 5399
+ },
+ {
+ "epoch": 30.857142857142858,
+ "grad_norm": 30.954830169677734,
+ "learning_rate": 2.126984126984127e-05,
+ "loss": 0.194,
+ "step": 5400
+ },
+ {
+ "epoch": 30.86285714285714,
+ "grad_norm": 912.3456420898438,
+ "learning_rate": 2.1263492063492064e-05,
+ "loss": 0.2008,
+ "step": 5401
+ },
+ {
+ "epoch": 30.86857142857143,
+ "grad_norm": 71.56365966796875,
+ "learning_rate": 2.125714285714286e-05,
+ "loss": 0.2306,
+ "step": 5402
+ },
+ {
+ "epoch": 30.874285714285715,
+ "grad_norm": 51.95694351196289,
+ "learning_rate": 2.1250793650793652e-05,
+ "loss": 0.256,
+ "step": 5403
+ },
+ {
+ "epoch": 30.88,
+ "grad_norm": 49.92461013793945,
+ "learning_rate": 2.1244444444444445e-05,
+ "loss": 0.2412,
+ "step": 5404
+ },
+ {
+ "epoch": 30.885714285714286,
+ "grad_norm": 40.5355224609375,
+ "learning_rate": 2.123809523809524e-05,
+ "loss": 0.1947,
+ "step": 5405
+ },
+ {
+ "epoch": 30.89142857142857,
+ "grad_norm": 384.1972351074219,
+ "learning_rate": 2.1231746031746034e-05,
+ "loss": 0.2032,
+ "step": 5406
+ },
+ {
+ "epoch": 30.897142857142857,
+ "grad_norm": 32.00604248046875,
+ "learning_rate": 2.1225396825396826e-05,
+ "loss": 0.4292,
+ "step": 5407
+ },
+ {
+ "epoch": 30.902857142857144,
+ "grad_norm": 54.61763000488281,
+ "learning_rate": 2.121904761904762e-05,
+ "loss": 0.2052,
+ "step": 5408
+ },
+ {
+ "epoch": 30.908571428571427,
+ "grad_norm": 17.433794021606445,
+ "learning_rate": 2.1212698412698415e-05,
+ "loss": 0.1749,
+ "step": 5409
+ },
+ {
+ "epoch": 30.914285714285715,
+ "grad_norm": 73.77420043945312,
+ "learning_rate": 2.1206349206349208e-05,
+ "loss": 0.3023,
+ "step": 5410
+ },
+ {
+ "epoch": 30.92,
+ "grad_norm": 39.46044158935547,
+ "learning_rate": 2.12e-05,
+ "loss": 0.2494,
+ "step": 5411
+ },
+ {
+ "epoch": 30.925714285714285,
+ "grad_norm": 28.747493743896484,
+ "learning_rate": 2.1193650793650793e-05,
+ "loss": 0.481,
+ "step": 5412
+ },
+ {
+ "epoch": 30.931428571428572,
+ "grad_norm": 50.620052337646484,
+ "learning_rate": 2.118730158730159e-05,
+ "loss": 0.256,
+ "step": 5413
+ },
+ {
+ "epoch": 30.937142857142856,
+ "grad_norm": 43.31208801269531,
+ "learning_rate": 2.118095238095238e-05,
+ "loss": 0.2663,
+ "step": 5414
+ },
+ {
+ "epoch": 30.942857142857143,
+ "grad_norm": 66.55274963378906,
+ "learning_rate": 2.1174603174603174e-05,
+ "loss": 0.2472,
+ "step": 5415
+ },
+ {
+ "epoch": 30.94857142857143,
+ "grad_norm": 114.87254333496094,
+ "learning_rate": 2.116825396825397e-05,
+ "loss": 0.2805,
+ "step": 5416
+ },
+ {
+ "epoch": 30.954285714285714,
+ "grad_norm": 20.99826431274414,
+ "learning_rate": 2.1161904761904763e-05,
+ "loss": 0.2322,
+ "step": 5417
+ },
+ {
+ "epoch": 30.96,
+ "grad_norm": 53.93523406982422,
+ "learning_rate": 2.1155555555555556e-05,
+ "loss": 0.5788,
+ "step": 5418
+ },
+ {
+ "epoch": 30.965714285714284,
+ "grad_norm": 54.85189437866211,
+ "learning_rate": 2.1149206349206348e-05,
+ "loss": 0.2267,
+ "step": 5419
+ },
+ {
+ "epoch": 30.97142857142857,
+ "grad_norm": 44.18019104003906,
+ "learning_rate": 2.1142857142857144e-05,
+ "loss": 0.1996,
+ "step": 5420
+ },
+ {
+ "epoch": 30.97714285714286,
+ "grad_norm": 37.03151321411133,
+ "learning_rate": 2.1136507936507937e-05,
+ "loss": 0.2557,
+ "step": 5421
+ },
+ {
+ "epoch": 30.982857142857142,
+ "grad_norm": 40.45878219604492,
+ "learning_rate": 2.113015873015873e-05,
+ "loss": 0.1859,
+ "step": 5422
+ },
+ {
+ "epoch": 30.98857142857143,
+ "grad_norm": 32.16279602050781,
+ "learning_rate": 2.1123809523809522e-05,
+ "loss": 0.2043,
+ "step": 5423
+ },
+ {
+ "epoch": 30.994285714285713,
+ "grad_norm": 76.29581451416016,
+ "learning_rate": 2.1117460317460318e-05,
+ "loss": 0.276,
+ "step": 5424
+ },
+ {
+ "epoch": 31.0,
+ "grad_norm": 701.8846435546875,
+ "learning_rate": 2.111111111111111e-05,
+ "loss": 0.2463,
+ "step": 5425
+ },
+ {
+ "epoch": 31.0,
+ "eval_classes": 0,
+ "eval_loss": 0.6001912951469421,
+ "eval_map": 0.9271,
+ "eval_map_50": 0.9659,
+ "eval_map_75": 0.9567,
+ "eval_map_large": 0.9272,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9271,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.787,
+ "eval_mar_10": 0.9737,
+ "eval_mar_100": 0.9746,
+ "eval_mar_100_per_class": 0.9746,
+ "eval_mar_large": 0.9746,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.2904,
+ "eval_samples_per_second": 22.121,
+ "eval_steps_per_second": 2.784,
+ "step": 5425
+ },
+ {
+ "epoch": 31.005714285714287,
+ "grad_norm": 67.76434326171875,
+ "learning_rate": 2.1104761904761903e-05,
+ "loss": 0.1804,
+ "step": 5426
+ },
+ {
+ "epoch": 31.01142857142857,
+ "grad_norm": 57.018394470214844,
+ "learning_rate": 2.10984126984127e-05,
+ "loss": 0.2118,
+ "step": 5427
+ },
+ {
+ "epoch": 31.017142857142858,
+ "grad_norm": 42.061222076416016,
+ "learning_rate": 2.1092063492063492e-05,
+ "loss": 0.3424,
+ "step": 5428
+ },
+ {
+ "epoch": 31.02285714285714,
+ "grad_norm": 89.73456573486328,
+ "learning_rate": 2.1085714285714288e-05,
+ "loss": 0.2357,
+ "step": 5429
+ },
+ {
+ "epoch": 31.02857142857143,
+ "grad_norm": 139.79827880859375,
+ "learning_rate": 2.107936507936508e-05,
+ "loss": 0.2857,
+ "step": 5430
+ },
+ {
+ "epoch": 31.034285714285716,
+ "grad_norm": 28.9287166595459,
+ "learning_rate": 2.1073015873015873e-05,
+ "loss": 0.2584,
+ "step": 5431
+ },
+ {
+ "epoch": 31.04,
+ "grad_norm": 30.034549713134766,
+ "learning_rate": 2.106666666666667e-05,
+ "loss": 0.2977,
+ "step": 5432
+ },
+ {
+ "epoch": 31.045714285714286,
+ "grad_norm": 70.49272155761719,
+ "learning_rate": 2.1060317460317462e-05,
+ "loss": 0.2071,
+ "step": 5433
+ },
+ {
+ "epoch": 31.05142857142857,
+ "grad_norm": 47.150718688964844,
+ "learning_rate": 2.1053968253968255e-05,
+ "loss": 0.189,
+ "step": 5434
+ },
+ {
+ "epoch": 31.057142857142857,
+ "grad_norm": 139.5208740234375,
+ "learning_rate": 2.104761904761905e-05,
+ "loss": 0.194,
+ "step": 5435
+ },
+ {
+ "epoch": 31.062857142857144,
+ "grad_norm": 93.4288330078125,
+ "learning_rate": 2.1041269841269843e-05,
+ "loss": 0.2558,
+ "step": 5436
+ },
+ {
+ "epoch": 31.068571428571428,
+ "grad_norm": 27.41012954711914,
+ "learning_rate": 2.1034920634920636e-05,
+ "loss": 0.2515,
+ "step": 5437
+ },
+ {
+ "epoch": 31.074285714285715,
+ "grad_norm": 134.5114288330078,
+ "learning_rate": 2.1028571428571432e-05,
+ "loss": 0.2749,
+ "step": 5438
+ },
+ {
+ "epoch": 31.08,
+ "grad_norm": 25.675273895263672,
+ "learning_rate": 2.1022222222222225e-05,
+ "loss": 0.2624,
+ "step": 5439
+ },
+ {
+ "epoch": 31.085714285714285,
+ "grad_norm": 31.290029525756836,
+ "learning_rate": 2.1015873015873017e-05,
+ "loss": 0.1919,
+ "step": 5440
+ },
+ {
+ "epoch": 31.091428571428573,
+ "grad_norm": 170.2156982421875,
+ "learning_rate": 2.100952380952381e-05,
+ "loss": 0.235,
+ "step": 5441
+ },
+ {
+ "epoch": 31.097142857142856,
+ "grad_norm": 62.920921325683594,
+ "learning_rate": 2.1003174603174606e-05,
+ "loss": 0.1886,
+ "step": 5442
+ },
+ {
+ "epoch": 31.102857142857143,
+ "grad_norm": 906.18896484375,
+ "learning_rate": 2.09968253968254e-05,
+ "loss": 0.305,
+ "step": 5443
+ },
+ {
+ "epoch": 31.10857142857143,
+ "grad_norm": 41.33077621459961,
+ "learning_rate": 2.099047619047619e-05,
+ "loss": 0.2124,
+ "step": 5444
+ },
+ {
+ "epoch": 31.114285714285714,
+ "grad_norm": 26.16013526916504,
+ "learning_rate": 2.0984126984126984e-05,
+ "loss": 0.2462,
+ "step": 5445
+ },
+ {
+ "epoch": 31.12,
+ "grad_norm": 36.835792541503906,
+ "learning_rate": 2.097777777777778e-05,
+ "loss": 0.2799,
+ "step": 5446
+ },
+ {
+ "epoch": 31.125714285714285,
+ "grad_norm": 68.2093734741211,
+ "learning_rate": 2.0971428571428572e-05,
+ "loss": 0.2181,
+ "step": 5447
+ },
+ {
+ "epoch": 31.13142857142857,
+ "grad_norm": 52.82464599609375,
+ "learning_rate": 2.0965079365079365e-05,
+ "loss": 0.2331,
+ "step": 5448
+ },
+ {
+ "epoch": 31.13714285714286,
+ "grad_norm": 45.499717712402344,
+ "learning_rate": 2.095873015873016e-05,
+ "loss": 0.2341,
+ "step": 5449
+ },
+ {
+ "epoch": 31.142857142857142,
+ "grad_norm": 76.03508758544922,
+ "learning_rate": 2.0952380952380954e-05,
+ "loss": 0.1941,
+ "step": 5450
+ },
+ {
+ "epoch": 31.14857142857143,
+ "grad_norm": 22.56733512878418,
+ "learning_rate": 2.0946031746031746e-05,
+ "loss": 0.2002,
+ "step": 5451
+ },
+ {
+ "epoch": 31.154285714285713,
+ "grad_norm": 46.011348724365234,
+ "learning_rate": 2.093968253968254e-05,
+ "loss": 0.3206,
+ "step": 5452
+ },
+ {
+ "epoch": 31.16,
+ "grad_norm": 35.01982879638672,
+ "learning_rate": 2.0933333333333335e-05,
+ "loss": 0.2467,
+ "step": 5453
+ },
+ {
+ "epoch": 31.165714285714287,
+ "grad_norm": 64.70989227294922,
+ "learning_rate": 2.0926984126984128e-05,
+ "loss": 0.2938,
+ "step": 5454
+ },
+ {
+ "epoch": 31.17142857142857,
+ "grad_norm": 21.85089874267578,
+ "learning_rate": 2.092063492063492e-05,
+ "loss": 0.3465,
+ "step": 5455
+ },
+ {
+ "epoch": 31.177142857142858,
+ "grad_norm": 48.40916442871094,
+ "learning_rate": 2.0914285714285713e-05,
+ "loss": 0.1811,
+ "step": 5456
+ },
+ {
+ "epoch": 31.18285714285714,
+ "grad_norm": 76.00787353515625,
+ "learning_rate": 2.090793650793651e-05,
+ "loss": 0.4322,
+ "step": 5457
+ },
+ {
+ "epoch": 31.18857142857143,
+ "grad_norm": 20.779376983642578,
+ "learning_rate": 2.09015873015873e-05,
+ "loss": 0.2481,
+ "step": 5458
+ },
+ {
+ "epoch": 31.194285714285716,
+ "grad_norm": 47.01481246948242,
+ "learning_rate": 2.0895238095238094e-05,
+ "loss": 0.2247,
+ "step": 5459
+ },
+ {
+ "epoch": 31.2,
+ "grad_norm": 63.114315032958984,
+ "learning_rate": 2.088888888888889e-05,
+ "loss": 0.2689,
+ "step": 5460
+ },
+ {
+ "epoch": 31.205714285714286,
+ "grad_norm": 42.617279052734375,
+ "learning_rate": 2.0882539682539683e-05,
+ "loss": 0.2182,
+ "step": 5461
+ },
+ {
+ "epoch": 31.21142857142857,
+ "grad_norm": 24.569684982299805,
+ "learning_rate": 2.0876190476190476e-05,
+ "loss": 0.2207,
+ "step": 5462
+ },
+ {
+ "epoch": 31.217142857142857,
+ "grad_norm": 37.63087844848633,
+ "learning_rate": 2.0869841269841268e-05,
+ "loss": 0.2266,
+ "step": 5463
+ },
+ {
+ "epoch": 31.222857142857144,
+ "grad_norm": 19.446998596191406,
+ "learning_rate": 2.0863492063492064e-05,
+ "loss": 0.236,
+ "step": 5464
+ },
+ {
+ "epoch": 31.228571428571428,
+ "grad_norm": 50.6137580871582,
+ "learning_rate": 2.0857142857142857e-05,
+ "loss": 0.1773,
+ "step": 5465
+ },
+ {
+ "epoch": 31.234285714285715,
+ "grad_norm": 67.84950256347656,
+ "learning_rate": 2.0850793650793653e-05,
+ "loss": 0.1816,
+ "step": 5466
+ },
+ {
+ "epoch": 31.24,
+ "grad_norm": 76.56767272949219,
+ "learning_rate": 2.0844444444444446e-05,
+ "loss": 0.2586,
+ "step": 5467
+ },
+ {
+ "epoch": 31.245714285714286,
+ "grad_norm": 730.4011840820312,
+ "learning_rate": 2.083809523809524e-05,
+ "loss": 0.3457,
+ "step": 5468
+ },
+ {
+ "epoch": 31.251428571428573,
+ "grad_norm": 74.66912078857422,
+ "learning_rate": 2.0831746031746034e-05,
+ "loss": 0.1926,
+ "step": 5469
+ },
+ {
+ "epoch": 31.257142857142856,
+ "grad_norm": 63.90117263793945,
+ "learning_rate": 2.0825396825396827e-05,
+ "loss": 0.2367,
+ "step": 5470
+ },
+ {
+ "epoch": 31.262857142857143,
+ "grad_norm": 73.22410583496094,
+ "learning_rate": 2.0819047619047623e-05,
+ "loss": 0.2367,
+ "step": 5471
+ },
+ {
+ "epoch": 31.268571428571427,
+ "grad_norm": 36.858543395996094,
+ "learning_rate": 2.0812698412698415e-05,
+ "loss": 0.3009,
+ "step": 5472
+ },
+ {
+ "epoch": 31.274285714285714,
+ "grad_norm": 21.480995178222656,
+ "learning_rate": 2.0806349206349208e-05,
+ "loss": 0.1774,
+ "step": 5473
+ },
+ {
+ "epoch": 31.28,
+ "grad_norm": 35.80170822143555,
+ "learning_rate": 2.08e-05,
+ "loss": 0.234,
+ "step": 5474
+ },
+ {
+ "epoch": 31.285714285714285,
+ "grad_norm": 49.55852508544922,
+ "learning_rate": 2.0793650793650797e-05,
+ "loss": 0.1872,
+ "step": 5475
+ },
+ {
+ "epoch": 31.291428571428572,
+ "grad_norm": 32.34794616699219,
+ "learning_rate": 2.078730158730159e-05,
+ "loss": 0.2328,
+ "step": 5476
+ },
+ {
+ "epoch": 31.29714285714286,
+ "grad_norm": 53.036277770996094,
+ "learning_rate": 2.0780952380952382e-05,
+ "loss": 0.3155,
+ "step": 5477
+ },
+ {
+ "epoch": 31.302857142857142,
+ "grad_norm": 34.03774642944336,
+ "learning_rate": 2.0774603174603175e-05,
+ "loss": 0.2306,
+ "step": 5478
+ },
+ {
+ "epoch": 31.30857142857143,
+ "grad_norm": 124.47933197021484,
+ "learning_rate": 2.076825396825397e-05,
+ "loss": 0.2288,
+ "step": 5479
+ },
+ {
+ "epoch": 31.314285714285713,
+ "grad_norm": 31.235647201538086,
+ "learning_rate": 2.0761904761904763e-05,
+ "loss": 0.2811,
+ "step": 5480
+ },
+ {
+ "epoch": 31.32,
+ "grad_norm": 112.3315200805664,
+ "learning_rate": 2.0755555555555556e-05,
+ "loss": 0.302,
+ "step": 5481
+ },
+ {
+ "epoch": 31.325714285714287,
+ "grad_norm": 32.7776985168457,
+ "learning_rate": 2.0749206349206352e-05,
+ "loss": 0.2671,
+ "step": 5482
+ },
+ {
+ "epoch": 31.33142857142857,
+ "grad_norm": 60.15741729736328,
+ "learning_rate": 2.0742857142857145e-05,
+ "loss": 0.3821,
+ "step": 5483
+ },
+ {
+ "epoch": 31.337142857142858,
+ "grad_norm": 28.403162002563477,
+ "learning_rate": 2.0736507936507937e-05,
+ "loss": 0.1984,
+ "step": 5484
+ },
+ {
+ "epoch": 31.34285714285714,
+ "grad_norm": 51.33551788330078,
+ "learning_rate": 2.073015873015873e-05,
+ "loss": 0.2028,
+ "step": 5485
+ },
+ {
+ "epoch": 31.34857142857143,
+ "grad_norm": 31.286685943603516,
+ "learning_rate": 2.0723809523809526e-05,
+ "loss": 0.306,
+ "step": 5486
+ },
+ {
+ "epoch": 31.354285714285716,
+ "grad_norm": 43.21095275878906,
+ "learning_rate": 2.071746031746032e-05,
+ "loss": 0.3492,
+ "step": 5487
+ },
+ {
+ "epoch": 31.36,
+ "grad_norm": 21.936094284057617,
+ "learning_rate": 2.071111111111111e-05,
+ "loss": 0.1757,
+ "step": 5488
+ },
+ {
+ "epoch": 31.365714285714287,
+ "grad_norm": 115.03231811523438,
+ "learning_rate": 2.0704761904761904e-05,
+ "loss": 0.2715,
+ "step": 5489
+ },
+ {
+ "epoch": 31.37142857142857,
+ "grad_norm": 78.47013854980469,
+ "learning_rate": 2.06984126984127e-05,
+ "loss": 0.2645,
+ "step": 5490
+ },
+ {
+ "epoch": 31.377142857142857,
+ "grad_norm": 45.49565505981445,
+ "learning_rate": 2.0692063492063493e-05,
+ "loss": 0.2694,
+ "step": 5491
+ },
+ {
+ "epoch": 31.382857142857144,
+ "grad_norm": 24.608911514282227,
+ "learning_rate": 2.0685714285714285e-05,
+ "loss": 0.176,
+ "step": 5492
+ },
+ {
+ "epoch": 31.388571428571428,
+ "grad_norm": 88.68647766113281,
+ "learning_rate": 2.067936507936508e-05,
+ "loss": 0.2936,
+ "step": 5493
+ },
+ {
+ "epoch": 31.394285714285715,
+ "grad_norm": 54.628055572509766,
+ "learning_rate": 2.0673015873015874e-05,
+ "loss": 0.2538,
+ "step": 5494
+ },
+ {
+ "epoch": 31.4,
+ "grad_norm": 79.88322448730469,
+ "learning_rate": 2.0666666666666666e-05,
+ "loss": 0.2181,
+ "step": 5495
+ },
+ {
+ "epoch": 31.405714285714286,
+ "grad_norm": 459.6210632324219,
+ "learning_rate": 2.066031746031746e-05,
+ "loss": 0.2895,
+ "step": 5496
+ },
+ {
+ "epoch": 31.411428571428573,
+ "grad_norm": 72.49649810791016,
+ "learning_rate": 2.0653968253968255e-05,
+ "loss": 0.2486,
+ "step": 5497
+ },
+ {
+ "epoch": 31.417142857142856,
+ "grad_norm": 39.870601654052734,
+ "learning_rate": 2.0647619047619048e-05,
+ "loss": 0.2942,
+ "step": 5498
+ },
+ {
+ "epoch": 31.422857142857143,
+ "grad_norm": 67.51768493652344,
+ "learning_rate": 2.064126984126984e-05,
+ "loss": 0.3125,
+ "step": 5499
+ },
+ {
+ "epoch": 31.428571428571427,
+ "grad_norm": 1334.420654296875,
+ "learning_rate": 2.0634920634920636e-05,
+ "loss": 0.2806,
+ "step": 5500
+ },
+ {
+ "epoch": 31.434285714285714,
+ "grad_norm": 164.1793212890625,
+ "learning_rate": 2.062857142857143e-05,
+ "loss": 0.2918,
+ "step": 5501
+ },
+ {
+ "epoch": 31.44,
+ "grad_norm": 63.5439453125,
+ "learning_rate": 2.062222222222222e-05,
+ "loss": 0.2616,
+ "step": 5502
+ },
+ {
+ "epoch": 31.445714285714285,
+ "grad_norm": 69.50310516357422,
+ "learning_rate": 2.0615873015873018e-05,
+ "loss": 0.2505,
+ "step": 5503
+ },
+ {
+ "epoch": 31.451428571428572,
+ "grad_norm": 38.087135314941406,
+ "learning_rate": 2.060952380952381e-05,
+ "loss": 0.2052,
+ "step": 5504
+ },
+ {
+ "epoch": 31.457142857142856,
+ "grad_norm": 44.78586196899414,
+ "learning_rate": 2.0603174603174606e-05,
+ "loss": 0.2981,
+ "step": 5505
+ },
+ {
+ "epoch": 31.462857142857143,
+ "grad_norm": 60.432674407958984,
+ "learning_rate": 2.05968253968254e-05,
+ "loss": 0.2139,
+ "step": 5506
+ },
+ {
+ "epoch": 31.46857142857143,
+ "grad_norm": 43.80990982055664,
+ "learning_rate": 2.059047619047619e-05,
+ "loss": 0.3612,
+ "step": 5507
+ },
+ {
+ "epoch": 31.474285714285713,
+ "grad_norm": 157.6404266357422,
+ "learning_rate": 2.0584126984126988e-05,
+ "loss": 0.319,
+ "step": 5508
+ },
+ {
+ "epoch": 31.48,
+ "grad_norm": 76.8299789428711,
+ "learning_rate": 2.057777777777778e-05,
+ "loss": 0.2222,
+ "step": 5509
+ },
+ {
+ "epoch": 31.485714285714284,
+ "grad_norm": 67.4052505493164,
+ "learning_rate": 2.0571428571428573e-05,
+ "loss": 0.256,
+ "step": 5510
+ },
+ {
+ "epoch": 31.49142857142857,
+ "grad_norm": 35.062255859375,
+ "learning_rate": 2.0565079365079366e-05,
+ "loss": 0.2586,
+ "step": 5511
+ },
+ {
+ "epoch": 31.497142857142858,
+ "grad_norm": 46.1373405456543,
+ "learning_rate": 2.055873015873016e-05,
+ "loss": 0.3328,
+ "step": 5512
+ },
+ {
+ "epoch": 31.502857142857142,
+ "grad_norm": 305.5950927734375,
+ "learning_rate": 2.0552380952380954e-05,
+ "loss": 0.1931,
+ "step": 5513
+ },
+ {
+ "epoch": 31.50857142857143,
+ "grad_norm": 36.37525939941406,
+ "learning_rate": 2.0546031746031747e-05,
+ "loss": 0.2586,
+ "step": 5514
+ },
+ {
+ "epoch": 31.514285714285712,
+ "grad_norm": 557.3544311523438,
+ "learning_rate": 2.053968253968254e-05,
+ "loss": 0.2959,
+ "step": 5515
+ },
+ {
+ "epoch": 31.52,
+ "grad_norm": 63.56522750854492,
+ "learning_rate": 2.0533333333333336e-05,
+ "loss": 0.2672,
+ "step": 5516
+ },
+ {
+ "epoch": 31.525714285714287,
+ "grad_norm": 258.99517822265625,
+ "learning_rate": 2.0526984126984128e-05,
+ "loss": 0.3091,
+ "step": 5517
+ },
+ {
+ "epoch": 31.53142857142857,
+ "grad_norm": 24.586519241333008,
+ "learning_rate": 2.052063492063492e-05,
+ "loss": 0.3207,
+ "step": 5518
+ },
+ {
+ "epoch": 31.537142857142857,
+ "grad_norm": 20.953536987304688,
+ "learning_rate": 2.0514285714285717e-05,
+ "loss": 0.2527,
+ "step": 5519
+ },
+ {
+ "epoch": 31.542857142857144,
+ "grad_norm": 117.11972045898438,
+ "learning_rate": 2.050793650793651e-05,
+ "loss": 0.2423,
+ "step": 5520
+ },
+ {
+ "epoch": 31.548571428571428,
+ "grad_norm": 115.53871154785156,
+ "learning_rate": 2.0501587301587302e-05,
+ "loss": 0.2458,
+ "step": 5521
+ },
+ {
+ "epoch": 31.554285714285715,
+ "grad_norm": 56.487728118896484,
+ "learning_rate": 2.0495238095238095e-05,
+ "loss": 0.287,
+ "step": 5522
+ },
+ {
+ "epoch": 31.56,
+ "grad_norm": 77.6148681640625,
+ "learning_rate": 2.048888888888889e-05,
+ "loss": 0.26,
+ "step": 5523
+ },
+ {
+ "epoch": 31.565714285714286,
+ "grad_norm": 51.763145446777344,
+ "learning_rate": 2.0482539682539683e-05,
+ "loss": 0.2177,
+ "step": 5524
+ },
+ {
+ "epoch": 31.571428571428573,
+ "grad_norm": 45.99419021606445,
+ "learning_rate": 2.0476190476190476e-05,
+ "loss": 0.1893,
+ "step": 5525
+ },
+ {
+ "epoch": 31.577142857142857,
+ "grad_norm": 46.7635498046875,
+ "learning_rate": 2.046984126984127e-05,
+ "loss": 0.243,
+ "step": 5526
+ },
+ {
+ "epoch": 31.582857142857144,
+ "grad_norm": 48.2321662902832,
+ "learning_rate": 2.0463492063492065e-05,
+ "loss": 0.2543,
+ "step": 5527
+ },
+ {
+ "epoch": 31.588571428571427,
+ "grad_norm": 69.36721801757812,
+ "learning_rate": 2.0457142857142857e-05,
+ "loss": 0.3989,
+ "step": 5528
+ },
+ {
+ "epoch": 31.594285714285714,
+ "grad_norm": 119.64047241210938,
+ "learning_rate": 2.045079365079365e-05,
+ "loss": 0.3188,
+ "step": 5529
+ },
+ {
+ "epoch": 31.6,
+ "grad_norm": 46.181678771972656,
+ "learning_rate": 2.0444444444444446e-05,
+ "loss": 0.2096,
+ "step": 5530
+ },
+ {
+ "epoch": 31.605714285714285,
+ "grad_norm": 57.24357223510742,
+ "learning_rate": 2.043809523809524e-05,
+ "loss": 0.3174,
+ "step": 5531
+ },
+ {
+ "epoch": 31.611428571428572,
+ "grad_norm": 67.29043579101562,
+ "learning_rate": 2.043174603174603e-05,
+ "loss": 0.2383,
+ "step": 5532
+ },
+ {
+ "epoch": 31.617142857142856,
+ "grad_norm": 28.11064910888672,
+ "learning_rate": 2.0425396825396824e-05,
+ "loss": 0.2024,
+ "step": 5533
+ },
+ {
+ "epoch": 31.622857142857143,
+ "grad_norm": 74.12226104736328,
+ "learning_rate": 2.041904761904762e-05,
+ "loss": 0.2789,
+ "step": 5534
+ },
+ {
+ "epoch": 31.62857142857143,
+ "grad_norm": 32.433372497558594,
+ "learning_rate": 2.0412698412698413e-05,
+ "loss": 0.2769,
+ "step": 5535
+ },
+ {
+ "epoch": 31.634285714285713,
+ "grad_norm": 30.32268524169922,
+ "learning_rate": 2.0406349206349205e-05,
+ "loss": 0.2244,
+ "step": 5536
+ },
+ {
+ "epoch": 31.64,
+ "grad_norm": 48.050132751464844,
+ "learning_rate": 2.04e-05,
+ "loss": 0.5059,
+ "step": 5537
+ },
+ {
+ "epoch": 31.645714285714284,
+ "grad_norm": 57.8617057800293,
+ "learning_rate": 2.0393650793650794e-05,
+ "loss": 0.219,
+ "step": 5538
+ },
+ {
+ "epoch": 31.65142857142857,
+ "grad_norm": 38.06205749511719,
+ "learning_rate": 2.038730158730159e-05,
+ "loss": 0.3258,
+ "step": 5539
+ },
+ {
+ "epoch": 31.65714285714286,
+ "grad_norm": 1111.97412109375,
+ "learning_rate": 2.0380952380952382e-05,
+ "loss": 0.2625,
+ "step": 5540
+ },
+ {
+ "epoch": 31.662857142857142,
+ "grad_norm": 51.53617858886719,
+ "learning_rate": 2.0374603174603175e-05,
+ "loss": 0.1704,
+ "step": 5541
+ },
+ {
+ "epoch": 31.66857142857143,
+ "grad_norm": 39.850181579589844,
+ "learning_rate": 2.036825396825397e-05,
+ "loss": 0.2694,
+ "step": 5542
+ },
+ {
+ "epoch": 31.674285714285713,
+ "grad_norm": 65.64685821533203,
+ "learning_rate": 2.0361904761904764e-05,
+ "loss": 0.418,
+ "step": 5543
+ },
+ {
+ "epoch": 31.68,
+ "grad_norm": 61.119632720947266,
+ "learning_rate": 2.0355555555555556e-05,
+ "loss": 0.2006,
+ "step": 5544
+ },
+ {
+ "epoch": 31.685714285714287,
+ "grad_norm": 55.178192138671875,
+ "learning_rate": 2.0349206349206352e-05,
+ "loss": 0.2113,
+ "step": 5545
+ },
+ {
+ "epoch": 31.69142857142857,
+ "grad_norm": 25.63457489013672,
+ "learning_rate": 2.0342857142857145e-05,
+ "loss": 0.258,
+ "step": 5546
+ },
+ {
+ "epoch": 31.697142857142858,
+ "grad_norm": 41.02190399169922,
+ "learning_rate": 2.0336507936507938e-05,
+ "loss": 0.2765,
+ "step": 5547
+ },
+ {
+ "epoch": 31.70285714285714,
+ "grad_norm": 56.24254608154297,
+ "learning_rate": 2.033015873015873e-05,
+ "loss": 0.2344,
+ "step": 5548
+ },
+ {
+ "epoch": 31.708571428571428,
+ "grad_norm": 395.86474609375,
+ "learning_rate": 2.0323809523809526e-05,
+ "loss": 0.2746,
+ "step": 5549
+ },
+ {
+ "epoch": 31.714285714285715,
+ "grad_norm": 48.37761688232422,
+ "learning_rate": 2.031746031746032e-05,
+ "loss": 0.1888,
+ "step": 5550
+ },
+ {
+ "epoch": 31.72,
+ "grad_norm": 46.34769821166992,
+ "learning_rate": 2.031111111111111e-05,
+ "loss": 0.2266,
+ "step": 5551
+ },
+ {
+ "epoch": 31.725714285714286,
+ "grad_norm": 79.68155670166016,
+ "learning_rate": 2.0304761904761908e-05,
+ "loss": 0.2262,
+ "step": 5552
+ },
+ {
+ "epoch": 31.731428571428573,
+ "grad_norm": 74.87509155273438,
+ "learning_rate": 2.02984126984127e-05,
+ "loss": 0.2685,
+ "step": 5553
+ },
+ {
+ "epoch": 31.737142857142857,
+ "grad_norm": 178.58267211914062,
+ "learning_rate": 2.0292063492063493e-05,
+ "loss": 0.2651,
+ "step": 5554
+ },
+ {
+ "epoch": 31.742857142857144,
+ "grad_norm": 31.781280517578125,
+ "learning_rate": 2.0285714285714286e-05,
+ "loss": 0.2259,
+ "step": 5555
+ },
+ {
+ "epoch": 31.748571428571427,
+ "grad_norm": 187.2349853515625,
+ "learning_rate": 2.027936507936508e-05,
+ "loss": 0.2315,
+ "step": 5556
+ },
+ {
+ "epoch": 31.754285714285714,
+ "grad_norm": 68.71722412109375,
+ "learning_rate": 2.0273015873015874e-05,
+ "loss": 0.2797,
+ "step": 5557
+ },
+ {
+ "epoch": 31.76,
+ "grad_norm": 41.61360168457031,
+ "learning_rate": 2.0266666666666667e-05,
+ "loss": 0.2068,
+ "step": 5558
+ },
+ {
+ "epoch": 31.765714285714285,
+ "grad_norm": 30.7895450592041,
+ "learning_rate": 2.026031746031746e-05,
+ "loss": 0.2622,
+ "step": 5559
+ },
+ {
+ "epoch": 31.771428571428572,
+ "grad_norm": 66.16661071777344,
+ "learning_rate": 2.0253968253968256e-05,
+ "loss": 0.246,
+ "step": 5560
+ },
+ {
+ "epoch": 31.777142857142856,
+ "grad_norm": 277.6895751953125,
+ "learning_rate": 2.0247619047619048e-05,
+ "loss": 0.3371,
+ "step": 5561
+ },
+ {
+ "epoch": 31.782857142857143,
+ "grad_norm": 241.08880615234375,
+ "learning_rate": 2.024126984126984e-05,
+ "loss": 0.2674,
+ "step": 5562
+ },
+ {
+ "epoch": 31.78857142857143,
+ "grad_norm": 38.30510711669922,
+ "learning_rate": 2.0234920634920637e-05,
+ "loss": 0.2794,
+ "step": 5563
+ },
+ {
+ "epoch": 31.794285714285714,
+ "grad_norm": 50.54317092895508,
+ "learning_rate": 2.022857142857143e-05,
+ "loss": 0.3048,
+ "step": 5564
+ },
+ {
+ "epoch": 31.8,
+ "grad_norm": 316.1440734863281,
+ "learning_rate": 2.0222222222222222e-05,
+ "loss": 0.2916,
+ "step": 5565
+ },
+ {
+ "epoch": 31.805714285714284,
+ "grad_norm": 44.01576614379883,
+ "learning_rate": 2.0215873015873015e-05,
+ "loss": 0.265,
+ "step": 5566
+ },
+ {
+ "epoch": 31.81142857142857,
+ "grad_norm": 61.58749008178711,
+ "learning_rate": 2.020952380952381e-05,
+ "loss": 0.3227,
+ "step": 5567
+ },
+ {
+ "epoch": 31.81714285714286,
+ "grad_norm": 58.81139373779297,
+ "learning_rate": 2.0203174603174603e-05,
+ "loss": 0.2526,
+ "step": 5568
+ },
+ {
+ "epoch": 31.822857142857142,
+ "grad_norm": 62.55400085449219,
+ "learning_rate": 2.0196825396825396e-05,
+ "loss": 0.1877,
+ "step": 5569
+ },
+ {
+ "epoch": 31.82857142857143,
+ "grad_norm": 60.63581466674805,
+ "learning_rate": 2.019047619047619e-05,
+ "loss": 0.2509,
+ "step": 5570
+ },
+ {
+ "epoch": 31.834285714285713,
+ "grad_norm": 31.16853141784668,
+ "learning_rate": 2.0184126984126985e-05,
+ "loss": 0.2073,
+ "step": 5571
+ },
+ {
+ "epoch": 31.84,
+ "grad_norm": 86.914794921875,
+ "learning_rate": 2.0177777777777777e-05,
+ "loss": 0.2536,
+ "step": 5572
+ },
+ {
+ "epoch": 31.845714285714287,
+ "grad_norm": 77.10182189941406,
+ "learning_rate": 2.0171428571428573e-05,
+ "loss": 0.2441,
+ "step": 5573
+ },
+ {
+ "epoch": 31.85142857142857,
+ "grad_norm": 438.7278137207031,
+ "learning_rate": 2.0165079365079366e-05,
+ "loss": 0.2637,
+ "step": 5574
+ },
+ {
+ "epoch": 31.857142857142858,
+ "grad_norm": 634.8936157226562,
+ "learning_rate": 2.015873015873016e-05,
+ "loss": 0.2978,
+ "step": 5575
+ },
+ {
+ "epoch": 31.86285714285714,
+ "grad_norm": 65.57212829589844,
+ "learning_rate": 2.0152380952380955e-05,
+ "loss": 0.3746,
+ "step": 5576
+ },
+ {
+ "epoch": 31.86857142857143,
+ "grad_norm": 52.18415451049805,
+ "learning_rate": 2.0146031746031747e-05,
+ "loss": 0.355,
+ "step": 5577
+ },
+ {
+ "epoch": 31.874285714285715,
+ "grad_norm": 277.6800231933594,
+ "learning_rate": 2.0139682539682543e-05,
+ "loss": 0.3008,
+ "step": 5578
+ },
+ {
+ "epoch": 31.88,
+ "grad_norm": 50.11739730834961,
+ "learning_rate": 2.0133333333333336e-05,
+ "loss": 0.2894,
+ "step": 5579
+ },
+ {
+ "epoch": 31.885714285714286,
+ "grad_norm": 56.192787170410156,
+ "learning_rate": 2.012698412698413e-05,
+ "loss": 0.2317,
+ "step": 5580
+ },
+ {
+ "epoch": 31.89142857142857,
+ "grad_norm": 827.2831420898438,
+ "learning_rate": 2.012063492063492e-05,
+ "loss": 0.2363,
+ "step": 5581
+ },
+ {
+ "epoch": 31.897142857142857,
+ "grad_norm": 69.73470306396484,
+ "learning_rate": 2.0114285714285717e-05,
+ "loss": 0.203,
+ "step": 5582
+ },
+ {
+ "epoch": 31.902857142857144,
+ "grad_norm": 30.762157440185547,
+ "learning_rate": 2.010793650793651e-05,
+ "loss": 0.2364,
+ "step": 5583
+ },
+ {
+ "epoch": 31.908571428571427,
+ "grad_norm": 27.67940330505371,
+ "learning_rate": 2.0101587301587303e-05,
+ "loss": 0.2964,
+ "step": 5584
+ },
+ {
+ "epoch": 31.914285714285715,
+ "grad_norm": 53.260562896728516,
+ "learning_rate": 2.00952380952381e-05,
+ "loss": 0.2025,
+ "step": 5585
+ },
+ {
+ "epoch": 31.92,
+ "grad_norm": 39.59288024902344,
+ "learning_rate": 2.008888888888889e-05,
+ "loss": 0.2818,
+ "step": 5586
+ },
+ {
+ "epoch": 31.925714285714285,
+ "grad_norm": 67.39969635009766,
+ "learning_rate": 2.0082539682539684e-05,
+ "loss": 0.2169,
+ "step": 5587
+ },
+ {
+ "epoch": 31.931428571428572,
+ "grad_norm": 43.52405548095703,
+ "learning_rate": 2.0076190476190476e-05,
+ "loss": 0.2053,
+ "step": 5588
+ },
+ {
+ "epoch": 31.937142857142856,
+ "grad_norm": 58.90415573120117,
+ "learning_rate": 2.0069841269841272e-05,
+ "loss": 0.2644,
+ "step": 5589
+ },
+ {
+ "epoch": 31.942857142857143,
+ "grad_norm": 29.253192901611328,
+ "learning_rate": 2.0063492063492065e-05,
+ "loss": 0.2197,
+ "step": 5590
+ },
+ {
+ "epoch": 31.94857142857143,
+ "grad_norm": 33.914119720458984,
+ "learning_rate": 2.0057142857142858e-05,
+ "loss": 0.1722,
+ "step": 5591
+ },
+ {
+ "epoch": 31.954285714285714,
+ "grad_norm": 680.5887451171875,
+ "learning_rate": 2.005079365079365e-05,
+ "loss": 0.2555,
+ "step": 5592
+ },
+ {
+ "epoch": 31.96,
+ "grad_norm": 91.59898376464844,
+ "learning_rate": 2.0044444444444446e-05,
+ "loss": 0.323,
+ "step": 5593
+ },
+ {
+ "epoch": 31.965714285714284,
+ "grad_norm": 49.2437744140625,
+ "learning_rate": 2.003809523809524e-05,
+ "loss": 0.2468,
+ "step": 5594
+ },
+ {
+ "epoch": 31.97142857142857,
+ "grad_norm": 51.85927200317383,
+ "learning_rate": 2.003174603174603e-05,
+ "loss": 0.2949,
+ "step": 5595
+ },
+ {
+ "epoch": 31.97714285714286,
+ "grad_norm": 84.97169494628906,
+ "learning_rate": 2.0025396825396828e-05,
+ "loss": 0.1939,
+ "step": 5596
+ },
+ {
+ "epoch": 31.982857142857142,
+ "grad_norm": 31.042736053466797,
+ "learning_rate": 2.001904761904762e-05,
+ "loss": 0.2351,
+ "step": 5597
+ },
+ {
+ "epoch": 31.98857142857143,
+ "grad_norm": 573.0760498046875,
+ "learning_rate": 2.0012698412698413e-05,
+ "loss": 0.3312,
+ "step": 5598
+ },
+ {
+ "epoch": 31.994285714285713,
+ "grad_norm": 41.7373046875,
+ "learning_rate": 2.0006349206349206e-05,
+ "loss": 0.2306,
+ "step": 5599
+ },
+ {
+ "epoch": 32.0,
+ "grad_norm": 31.659475326538086,
+ "learning_rate": 2e-05,
+ "loss": 0.2117,
+ "step": 5600
+ },
+ {
+ "epoch": 32.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5764447450637817,
+ "eval_map": 0.9374,
+ "eval_map_50": 0.9692,
+ "eval_map_75": 0.963,
+ "eval_map_large": 0.9374,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9374,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7838,
+ "eval_mar_10": 0.9768,
+ "eval_mar_100": 0.9781,
+ "eval_mar_100_per_class": 0.9781,
+ "eval_mar_large": 0.9781,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.1244,
+ "eval_samples_per_second": 22.401,
+ "eval_steps_per_second": 2.819,
+ "step": 5600
+ },
+ {
+ "epoch": 32.005714285714284,
+ "grad_norm": 68.49040985107422,
+ "learning_rate": 1.9993650793650794e-05,
+ "loss": 0.2377,
+ "step": 5601
+ },
+ {
+ "epoch": 32.011428571428574,
+ "grad_norm": 75.28813171386719,
+ "learning_rate": 1.9987301587301587e-05,
+ "loss": 0.2451,
+ "step": 5602
+ },
+ {
+ "epoch": 32.01714285714286,
+ "grad_norm": 29.55270767211914,
+ "learning_rate": 1.998095238095238e-05,
+ "loss": 0.1495,
+ "step": 5603
+ },
+ {
+ "epoch": 32.02285714285714,
+ "grad_norm": 1036.5198974609375,
+ "learning_rate": 1.9974603174603176e-05,
+ "loss": 0.2689,
+ "step": 5604
+ },
+ {
+ "epoch": 32.02857142857143,
+ "grad_norm": 25.972496032714844,
+ "learning_rate": 1.9968253968253968e-05,
+ "loss": 0.1889,
+ "step": 5605
+ },
+ {
+ "epoch": 32.034285714285716,
+ "grad_norm": 56.76987075805664,
+ "learning_rate": 1.996190476190476e-05,
+ "loss": 0.3136,
+ "step": 5606
+ },
+ {
+ "epoch": 32.04,
+ "grad_norm": 48.14201354980469,
+ "learning_rate": 1.9955555555555557e-05,
+ "loss": 0.1673,
+ "step": 5607
+ },
+ {
+ "epoch": 32.04571428571428,
+ "grad_norm": 55.37479782104492,
+ "learning_rate": 1.994920634920635e-05,
+ "loss": 0.3009,
+ "step": 5608
+ },
+ {
+ "epoch": 32.05142857142857,
+ "grad_norm": 27.12615394592285,
+ "learning_rate": 1.9942857142857142e-05,
+ "loss": 0.2047,
+ "step": 5609
+ },
+ {
+ "epoch": 32.05714285714286,
+ "grad_norm": 57.392921447753906,
+ "learning_rate": 1.9936507936507938e-05,
+ "loss": 0.2449,
+ "step": 5610
+ },
+ {
+ "epoch": 32.06285714285714,
+ "grad_norm": 44.158294677734375,
+ "learning_rate": 1.993015873015873e-05,
+ "loss": 0.2323,
+ "step": 5611
+ },
+ {
+ "epoch": 32.06857142857143,
+ "grad_norm": 53.65294647216797,
+ "learning_rate": 1.9923809523809527e-05,
+ "loss": 0.1964,
+ "step": 5612
+ },
+ {
+ "epoch": 32.074285714285715,
+ "grad_norm": 98.22899627685547,
+ "learning_rate": 1.991746031746032e-05,
+ "loss": 0.2387,
+ "step": 5613
+ },
+ {
+ "epoch": 32.08,
+ "grad_norm": 46.33811950683594,
+ "learning_rate": 1.9911111111111112e-05,
+ "loss": 0.2555,
+ "step": 5614
+ },
+ {
+ "epoch": 32.08571428571429,
+ "grad_norm": 21.99703025817871,
+ "learning_rate": 1.9904761904761908e-05,
+ "loss": 0.1987,
+ "step": 5615
+ },
+ {
+ "epoch": 32.09142857142857,
+ "grad_norm": 34.59161376953125,
+ "learning_rate": 1.98984126984127e-05,
+ "loss": 0.2103,
+ "step": 5616
+ },
+ {
+ "epoch": 32.097142857142856,
+ "grad_norm": 28.423341751098633,
+ "learning_rate": 1.9892063492063493e-05,
+ "loss": 0.2242,
+ "step": 5617
+ },
+ {
+ "epoch": 32.10285714285714,
+ "grad_norm": 48.24335479736328,
+ "learning_rate": 1.9885714285714286e-05,
+ "loss": 0.3536,
+ "step": 5618
+ },
+ {
+ "epoch": 32.10857142857143,
+ "grad_norm": 75.23125457763672,
+ "learning_rate": 1.9879365079365082e-05,
+ "loss": 0.2559,
+ "step": 5619
+ },
+ {
+ "epoch": 32.114285714285714,
+ "grad_norm": 28.813011169433594,
+ "learning_rate": 1.9873015873015875e-05,
+ "loss": 0.2605,
+ "step": 5620
+ },
+ {
+ "epoch": 32.12,
+ "grad_norm": 101.69721221923828,
+ "learning_rate": 1.9866666666666667e-05,
+ "loss": 0.2976,
+ "step": 5621
+ },
+ {
+ "epoch": 32.12571428571429,
+ "grad_norm": 46.14924240112305,
+ "learning_rate": 1.9860317460317463e-05,
+ "loss": 0.1942,
+ "step": 5622
+ },
+ {
+ "epoch": 32.13142857142857,
+ "grad_norm": 49.38679122924805,
+ "learning_rate": 1.9853968253968256e-05,
+ "loss": 0.1939,
+ "step": 5623
+ },
+ {
+ "epoch": 32.137142857142855,
+ "grad_norm": 30.698293685913086,
+ "learning_rate": 1.984761904761905e-05,
+ "loss": 0.2122,
+ "step": 5624
+ },
+ {
+ "epoch": 32.142857142857146,
+ "grad_norm": 68.74352264404297,
+ "learning_rate": 1.984126984126984e-05,
+ "loss": 0.273,
+ "step": 5625
+ },
+ {
+ "epoch": 32.14857142857143,
+ "grad_norm": 53.25042724609375,
+ "learning_rate": 1.9834920634920637e-05,
+ "loss": 0.2859,
+ "step": 5626
+ },
+ {
+ "epoch": 32.15428571428571,
+ "grad_norm": 83.85069274902344,
+ "learning_rate": 1.982857142857143e-05,
+ "loss": 0.1411,
+ "step": 5627
+ },
+ {
+ "epoch": 32.16,
+ "grad_norm": 327.1810607910156,
+ "learning_rate": 1.9822222222222223e-05,
+ "loss": 0.3035,
+ "step": 5628
+ },
+ {
+ "epoch": 32.16571428571429,
+ "grad_norm": 26.484102249145508,
+ "learning_rate": 1.9815873015873015e-05,
+ "loss": 0.2007,
+ "step": 5629
+ },
+ {
+ "epoch": 32.17142857142857,
+ "grad_norm": 44.97897720336914,
+ "learning_rate": 1.980952380952381e-05,
+ "loss": 0.1995,
+ "step": 5630
+ },
+ {
+ "epoch": 32.177142857142854,
+ "grad_norm": 52.42127990722656,
+ "learning_rate": 1.9803174603174604e-05,
+ "loss": 0.1818,
+ "step": 5631
+ },
+ {
+ "epoch": 32.182857142857145,
+ "grad_norm": 25.286893844604492,
+ "learning_rate": 1.9796825396825396e-05,
+ "loss": 0.2031,
+ "step": 5632
+ },
+ {
+ "epoch": 32.18857142857143,
+ "grad_norm": 83.523193359375,
+ "learning_rate": 1.9790476190476193e-05,
+ "loss": 0.4617,
+ "step": 5633
+ },
+ {
+ "epoch": 32.19428571428571,
+ "grad_norm": 32.6356315612793,
+ "learning_rate": 1.9784126984126985e-05,
+ "loss": 0.223,
+ "step": 5634
+ },
+ {
+ "epoch": 32.2,
+ "grad_norm": 362.17803955078125,
+ "learning_rate": 1.9777777777777778e-05,
+ "loss": 0.2614,
+ "step": 5635
+ },
+ {
+ "epoch": 32.205714285714286,
+ "grad_norm": 43.75965881347656,
+ "learning_rate": 1.977142857142857e-05,
+ "loss": 0.2011,
+ "step": 5636
+ },
+ {
+ "epoch": 32.21142857142857,
+ "grad_norm": 25.195335388183594,
+ "learning_rate": 1.9765079365079366e-05,
+ "loss": 0.1964,
+ "step": 5637
+ },
+ {
+ "epoch": 32.21714285714286,
+ "grad_norm": 49.41599655151367,
+ "learning_rate": 1.975873015873016e-05,
+ "loss": 0.2598,
+ "step": 5638
+ },
+ {
+ "epoch": 32.222857142857144,
+ "grad_norm": 28.786399841308594,
+ "learning_rate": 1.9752380952380952e-05,
+ "loss": 0.1453,
+ "step": 5639
+ },
+ {
+ "epoch": 32.22857142857143,
+ "grad_norm": 37.16865158081055,
+ "learning_rate": 1.9746031746031744e-05,
+ "loss": 0.3094,
+ "step": 5640
+ },
+ {
+ "epoch": 32.23428571428571,
+ "grad_norm": 40.28287887573242,
+ "learning_rate": 1.973968253968254e-05,
+ "loss": 0.3459,
+ "step": 5641
+ },
+ {
+ "epoch": 32.24,
+ "grad_norm": 36.927947998046875,
+ "learning_rate": 1.9733333333333333e-05,
+ "loss": 0.4682,
+ "step": 5642
+ },
+ {
+ "epoch": 32.245714285714286,
+ "grad_norm": 35.456565856933594,
+ "learning_rate": 1.9726984126984126e-05,
+ "loss": 0.2156,
+ "step": 5643
+ },
+ {
+ "epoch": 32.25142857142857,
+ "grad_norm": 12.010313034057617,
+ "learning_rate": 1.972063492063492e-05,
+ "loss": 0.1473,
+ "step": 5644
+ },
+ {
+ "epoch": 32.25714285714286,
+ "grad_norm": 32.582576751708984,
+ "learning_rate": 1.9714285714285714e-05,
+ "loss": 0.381,
+ "step": 5645
+ },
+ {
+ "epoch": 32.26285714285714,
+ "grad_norm": 53.8582878112793,
+ "learning_rate": 1.9707936507936507e-05,
+ "loss": 0.2633,
+ "step": 5646
+ },
+ {
+ "epoch": 32.26857142857143,
+ "grad_norm": 720.4382934570312,
+ "learning_rate": 1.9701587301587303e-05,
+ "loss": 0.2267,
+ "step": 5647
+ },
+ {
+ "epoch": 32.27428571428572,
+ "grad_norm": 41.28740310668945,
+ "learning_rate": 1.9695238095238096e-05,
+ "loss": 0.2127,
+ "step": 5648
+ },
+ {
+ "epoch": 32.28,
+ "grad_norm": 50.774024963378906,
+ "learning_rate": 1.968888888888889e-05,
+ "loss": 0.2454,
+ "step": 5649
+ },
+ {
+ "epoch": 32.285714285714285,
+ "grad_norm": 47.7146110534668,
+ "learning_rate": 1.9682539682539684e-05,
+ "loss": 0.3452,
+ "step": 5650
+ },
+ {
+ "epoch": 32.29142857142857,
+ "grad_norm": 29.196165084838867,
+ "learning_rate": 1.9676190476190477e-05,
+ "loss": 0.2583,
+ "step": 5651
+ },
+ {
+ "epoch": 32.29714285714286,
+ "grad_norm": 36.90263748168945,
+ "learning_rate": 1.9669841269841273e-05,
+ "loss": 0.2178,
+ "step": 5652
+ },
+ {
+ "epoch": 32.30285714285714,
+ "grad_norm": 34.539119720458984,
+ "learning_rate": 1.9663492063492066e-05,
+ "loss": 0.2255,
+ "step": 5653
+ },
+ {
+ "epoch": 32.308571428571426,
+ "grad_norm": 21.559595108032227,
+ "learning_rate": 1.9657142857142858e-05,
+ "loss": 0.2509,
+ "step": 5654
+ },
+ {
+ "epoch": 32.31428571428572,
+ "grad_norm": 32.04145812988281,
+ "learning_rate": 1.9650793650793654e-05,
+ "loss": 0.1896,
+ "step": 5655
+ },
+ {
+ "epoch": 32.32,
+ "grad_norm": 20.757484436035156,
+ "learning_rate": 1.9644444444444447e-05,
+ "loss": 0.1825,
+ "step": 5656
+ },
+ {
+ "epoch": 32.325714285714284,
+ "grad_norm": 69.93598175048828,
+ "learning_rate": 1.963809523809524e-05,
+ "loss": 0.3693,
+ "step": 5657
+ },
+ {
+ "epoch": 32.331428571428575,
+ "grad_norm": 87.04507446289062,
+ "learning_rate": 1.9631746031746032e-05,
+ "loss": 0.2081,
+ "step": 5658
+ },
+ {
+ "epoch": 32.33714285714286,
+ "grad_norm": 68.42386627197266,
+ "learning_rate": 1.9625396825396828e-05,
+ "loss": 0.2115,
+ "step": 5659
+ },
+ {
+ "epoch": 32.34285714285714,
+ "grad_norm": 55.57241439819336,
+ "learning_rate": 1.961904761904762e-05,
+ "loss": 0.2773,
+ "step": 5660
+ },
+ {
+ "epoch": 32.348571428571425,
+ "grad_norm": 1198.2554931640625,
+ "learning_rate": 1.9612698412698413e-05,
+ "loss": 0.3929,
+ "step": 5661
+ },
+ {
+ "epoch": 32.354285714285716,
+ "grad_norm": 70.51028442382812,
+ "learning_rate": 1.9606349206349206e-05,
+ "loss": 0.2643,
+ "step": 5662
+ },
+ {
+ "epoch": 32.36,
+ "grad_norm": 99.01145935058594,
+ "learning_rate": 1.9600000000000002e-05,
+ "loss": 0.2505,
+ "step": 5663
+ },
+ {
+ "epoch": 32.36571428571428,
+ "grad_norm": 37.562618255615234,
+ "learning_rate": 1.9593650793650795e-05,
+ "loss": 0.303,
+ "step": 5664
+ },
+ {
+ "epoch": 32.371428571428574,
+ "grad_norm": 42.09748077392578,
+ "learning_rate": 1.9587301587301587e-05,
+ "loss": 0.3365,
+ "step": 5665
+ },
+ {
+ "epoch": 32.37714285714286,
+ "grad_norm": 97.21544647216797,
+ "learning_rate": 1.9580952380952383e-05,
+ "loss": 0.2125,
+ "step": 5666
+ },
+ {
+ "epoch": 32.38285714285714,
+ "grad_norm": 37.335872650146484,
+ "learning_rate": 1.9574603174603176e-05,
+ "loss": 0.3086,
+ "step": 5667
+ },
+ {
+ "epoch": 32.38857142857143,
+ "grad_norm": 46.7644157409668,
+ "learning_rate": 1.956825396825397e-05,
+ "loss": 0.249,
+ "step": 5668
+ },
+ {
+ "epoch": 32.394285714285715,
+ "grad_norm": 67.24867248535156,
+ "learning_rate": 1.956190476190476e-05,
+ "loss": 0.2295,
+ "step": 5669
+ },
+ {
+ "epoch": 32.4,
+ "grad_norm": 19.075559616088867,
+ "learning_rate": 1.9555555555555557e-05,
+ "loss": 0.2432,
+ "step": 5670
+ },
+ {
+ "epoch": 32.40571428571428,
+ "grad_norm": 1084.0755615234375,
+ "learning_rate": 1.954920634920635e-05,
+ "loss": 0.2487,
+ "step": 5671
+ },
+ {
+ "epoch": 32.41142857142857,
+ "grad_norm": 415.1152648925781,
+ "learning_rate": 1.9542857142857143e-05,
+ "loss": 0.2982,
+ "step": 5672
+ },
+ {
+ "epoch": 32.417142857142856,
+ "grad_norm": 50.63746643066406,
+ "learning_rate": 1.9536507936507935e-05,
+ "loss": 0.2723,
+ "step": 5673
+ },
+ {
+ "epoch": 32.42285714285714,
+ "grad_norm": 498.7071228027344,
+ "learning_rate": 1.953015873015873e-05,
+ "loss": 0.1889,
+ "step": 5674
+ },
+ {
+ "epoch": 32.42857142857143,
+ "grad_norm": 1690.307861328125,
+ "learning_rate": 1.9523809523809524e-05,
+ "loss": 0.2719,
+ "step": 5675
+ },
+ {
+ "epoch": 32.434285714285714,
+ "grad_norm": 77.0636215209961,
+ "learning_rate": 1.9517460317460317e-05,
+ "loss": 0.3183,
+ "step": 5676
+ },
+ {
+ "epoch": 32.44,
+ "grad_norm": 82.31941986083984,
+ "learning_rate": 1.9511111111111113e-05,
+ "loss": 0.2919,
+ "step": 5677
+ },
+ {
+ "epoch": 32.44571428571429,
+ "grad_norm": 40.9356575012207,
+ "learning_rate": 1.9504761904761905e-05,
+ "loss": 0.2544,
+ "step": 5678
+ },
+ {
+ "epoch": 32.45142857142857,
+ "grad_norm": 94.53091430664062,
+ "learning_rate": 1.9498412698412698e-05,
+ "loss": 0.3237,
+ "step": 5679
+ },
+ {
+ "epoch": 32.457142857142856,
+ "grad_norm": 1199.8826904296875,
+ "learning_rate": 1.949206349206349e-05,
+ "loss": 0.3413,
+ "step": 5680
+ },
+ {
+ "epoch": 32.462857142857146,
+ "grad_norm": 36.30007553100586,
+ "learning_rate": 1.9485714285714286e-05,
+ "loss": 0.27,
+ "step": 5681
+ },
+ {
+ "epoch": 32.46857142857143,
+ "grad_norm": 93.01171112060547,
+ "learning_rate": 1.947936507936508e-05,
+ "loss": 0.2413,
+ "step": 5682
+ },
+ {
+ "epoch": 32.47428571428571,
+ "grad_norm": 22.21965217590332,
+ "learning_rate": 1.9473015873015875e-05,
+ "loss": 0.3064,
+ "step": 5683
+ },
+ {
+ "epoch": 32.48,
+ "grad_norm": 44.686790466308594,
+ "learning_rate": 1.9466666666666668e-05,
+ "loss": 0.2629,
+ "step": 5684
+ },
+ {
+ "epoch": 32.48571428571429,
+ "grad_norm": 48.551109313964844,
+ "learning_rate": 1.946031746031746e-05,
+ "loss": 0.2389,
+ "step": 5685
+ },
+ {
+ "epoch": 32.49142857142857,
+ "grad_norm": 80.80036926269531,
+ "learning_rate": 1.9453968253968256e-05,
+ "loss": 0.3031,
+ "step": 5686
+ },
+ {
+ "epoch": 32.497142857142855,
+ "grad_norm": 18.785585403442383,
+ "learning_rate": 1.944761904761905e-05,
+ "loss": 0.2197,
+ "step": 5687
+ },
+ {
+ "epoch": 32.502857142857145,
+ "grad_norm": 28.99616241455078,
+ "learning_rate": 1.9441269841269845e-05,
+ "loss": 0.2276,
+ "step": 5688
+ },
+ {
+ "epoch": 32.50857142857143,
+ "grad_norm": 41.807281494140625,
+ "learning_rate": 1.9434920634920638e-05,
+ "loss": 0.1598,
+ "step": 5689
+ },
+ {
+ "epoch": 32.51428571428571,
+ "grad_norm": 19.651777267456055,
+ "learning_rate": 1.942857142857143e-05,
+ "loss": 0.2166,
+ "step": 5690
+ },
+ {
+ "epoch": 32.52,
+ "grad_norm": 102.34699249267578,
+ "learning_rate": 1.9422222222222223e-05,
+ "loss": 0.183,
+ "step": 5691
+ },
+ {
+ "epoch": 32.52571428571429,
+ "grad_norm": 17.926746368408203,
+ "learning_rate": 1.941587301587302e-05,
+ "loss": 0.1883,
+ "step": 5692
+ },
+ {
+ "epoch": 32.53142857142857,
+ "grad_norm": 45.363197326660156,
+ "learning_rate": 1.940952380952381e-05,
+ "loss": 0.2435,
+ "step": 5693
+ },
+ {
+ "epoch": 32.537142857142854,
+ "grad_norm": 80.7637939453125,
+ "learning_rate": 1.9403174603174604e-05,
+ "loss": 0.217,
+ "step": 5694
+ },
+ {
+ "epoch": 32.542857142857144,
+ "grad_norm": 268.4298095703125,
+ "learning_rate": 1.9396825396825397e-05,
+ "loss": 0.2071,
+ "step": 5695
+ },
+ {
+ "epoch": 32.54857142857143,
+ "grad_norm": 306.17010498046875,
+ "learning_rate": 1.9390476190476193e-05,
+ "loss": 0.3023,
+ "step": 5696
+ },
+ {
+ "epoch": 32.55428571428571,
+ "grad_norm": 35.1732177734375,
+ "learning_rate": 1.9384126984126986e-05,
+ "loss": 0.1646,
+ "step": 5697
+ },
+ {
+ "epoch": 32.56,
+ "grad_norm": 46.12312698364258,
+ "learning_rate": 1.9377777777777778e-05,
+ "loss": 0.2559,
+ "step": 5698
+ },
+ {
+ "epoch": 32.565714285714286,
+ "grad_norm": 213.47662353515625,
+ "learning_rate": 1.9371428571428574e-05,
+ "loss": 0.23,
+ "step": 5699
+ },
+ {
+ "epoch": 32.57142857142857,
+ "grad_norm": 365.5625,
+ "learning_rate": 1.9365079365079367e-05,
+ "loss": 0.2685,
+ "step": 5700
+ },
+ {
+ "epoch": 32.57714285714286,
+ "grad_norm": 49.27860641479492,
+ "learning_rate": 1.935873015873016e-05,
+ "loss": 0.2247,
+ "step": 5701
+ },
+ {
+ "epoch": 32.582857142857144,
+ "grad_norm": 62.4014892578125,
+ "learning_rate": 1.9352380952380952e-05,
+ "loss": 0.2656,
+ "step": 5702
+ },
+ {
+ "epoch": 32.58857142857143,
+ "grad_norm": 56.03572463989258,
+ "learning_rate": 1.9346031746031748e-05,
+ "loss": 0.1949,
+ "step": 5703
+ },
+ {
+ "epoch": 32.59428571428572,
+ "grad_norm": 37.72709274291992,
+ "learning_rate": 1.933968253968254e-05,
+ "loss": 0.2318,
+ "step": 5704
+ },
+ {
+ "epoch": 32.6,
+ "grad_norm": 55.7191162109375,
+ "learning_rate": 1.9333333333333333e-05,
+ "loss": 0.2583,
+ "step": 5705
+ },
+ {
+ "epoch": 32.605714285714285,
+ "grad_norm": 35.487213134765625,
+ "learning_rate": 1.9326984126984126e-05,
+ "loss": 0.2064,
+ "step": 5706
+ },
+ {
+ "epoch": 32.61142857142857,
+ "grad_norm": 49.100337982177734,
+ "learning_rate": 1.9320634920634922e-05,
+ "loss": 0.2311,
+ "step": 5707
+ },
+ {
+ "epoch": 32.61714285714286,
+ "grad_norm": 63.767578125,
+ "learning_rate": 1.9314285714285715e-05,
+ "loss": 0.3147,
+ "step": 5708
+ },
+ {
+ "epoch": 32.62285714285714,
+ "grad_norm": 25.621097564697266,
+ "learning_rate": 1.9307936507936507e-05,
+ "loss": 0.1902,
+ "step": 5709
+ },
+ {
+ "epoch": 32.628571428571426,
+ "grad_norm": 52.18640899658203,
+ "learning_rate": 1.9301587301587303e-05,
+ "loss": 0.2201,
+ "step": 5710
+ },
+ {
+ "epoch": 32.63428571428572,
+ "grad_norm": 74.32487487792969,
+ "learning_rate": 1.9295238095238096e-05,
+ "loss": 0.2006,
+ "step": 5711
+ },
+ {
+ "epoch": 32.64,
+ "grad_norm": 42.27890396118164,
+ "learning_rate": 1.928888888888889e-05,
+ "loss": 0.305,
+ "step": 5712
+ },
+ {
+ "epoch": 32.645714285714284,
+ "grad_norm": 44.3375358581543,
+ "learning_rate": 1.928253968253968e-05,
+ "loss": 0.1867,
+ "step": 5713
+ },
+ {
+ "epoch": 32.651428571428575,
+ "grad_norm": 26.97228240966797,
+ "learning_rate": 1.9276190476190477e-05,
+ "loss": 0.2139,
+ "step": 5714
+ },
+ {
+ "epoch": 32.65714285714286,
+ "grad_norm": 30.574569702148438,
+ "learning_rate": 1.926984126984127e-05,
+ "loss": 0.1768,
+ "step": 5715
+ },
+ {
+ "epoch": 32.66285714285714,
+ "grad_norm": 31.63504409790039,
+ "learning_rate": 1.9263492063492063e-05,
+ "loss": 0.2208,
+ "step": 5716
+ },
+ {
+ "epoch": 32.668571428571425,
+ "grad_norm": 37.90189743041992,
+ "learning_rate": 1.9257142857142855e-05,
+ "loss": 0.2087,
+ "step": 5717
+ },
+ {
+ "epoch": 32.674285714285716,
+ "grad_norm": 78.9525375366211,
+ "learning_rate": 1.925079365079365e-05,
+ "loss": 0.2433,
+ "step": 5718
+ },
+ {
+ "epoch": 32.68,
+ "grad_norm": 564.4703979492188,
+ "learning_rate": 1.9244444444444444e-05,
+ "loss": 0.2879,
+ "step": 5719
+ },
+ {
+ "epoch": 32.68571428571428,
+ "grad_norm": 39.07098388671875,
+ "learning_rate": 1.923809523809524e-05,
+ "loss": 0.3621,
+ "step": 5720
+ },
+ {
+ "epoch": 32.691428571428574,
+ "grad_norm": 30.343294143676758,
+ "learning_rate": 1.9231746031746033e-05,
+ "loss": 0.2439,
+ "step": 5721
+ },
+ {
+ "epoch": 32.69714285714286,
+ "grad_norm": 94.84039306640625,
+ "learning_rate": 1.922539682539683e-05,
+ "loss": 0.2671,
+ "step": 5722
+ },
+ {
+ "epoch": 32.70285714285714,
+ "grad_norm": 945.572998046875,
+ "learning_rate": 1.921904761904762e-05,
+ "loss": 0.2558,
+ "step": 5723
+ },
+ {
+ "epoch": 32.70857142857143,
+ "grad_norm": 60.11537551879883,
+ "learning_rate": 1.9212698412698414e-05,
+ "loss": 0.3213,
+ "step": 5724
+ },
+ {
+ "epoch": 32.714285714285715,
+ "grad_norm": 68.07587432861328,
+ "learning_rate": 1.920634920634921e-05,
+ "loss": 0.1808,
+ "step": 5725
+ },
+ {
+ "epoch": 32.72,
+ "grad_norm": 155.82656860351562,
+ "learning_rate": 1.9200000000000003e-05,
+ "loss": 0.3225,
+ "step": 5726
+ },
+ {
+ "epoch": 32.72571428571428,
+ "grad_norm": 24.076765060424805,
+ "learning_rate": 1.9193650793650795e-05,
+ "loss": 0.2316,
+ "step": 5727
+ },
+ {
+ "epoch": 32.73142857142857,
+ "grad_norm": 36.95673370361328,
+ "learning_rate": 1.9187301587301588e-05,
+ "loss": 0.2036,
+ "step": 5728
+ },
+ {
+ "epoch": 32.73714285714286,
+ "grad_norm": 18.792818069458008,
+ "learning_rate": 1.9180952380952384e-05,
+ "loss": 0.2172,
+ "step": 5729
+ },
+ {
+ "epoch": 32.74285714285714,
+ "grad_norm": 55.471466064453125,
+ "learning_rate": 1.9174603174603176e-05,
+ "loss": 0.2202,
+ "step": 5730
+ },
+ {
+ "epoch": 32.74857142857143,
+ "grad_norm": 81.36102294921875,
+ "learning_rate": 1.916825396825397e-05,
+ "loss": 0.2538,
+ "step": 5731
+ },
+ {
+ "epoch": 32.754285714285714,
+ "grad_norm": 75.76774597167969,
+ "learning_rate": 1.9161904761904762e-05,
+ "loss": 0.2875,
+ "step": 5732
+ },
+ {
+ "epoch": 32.76,
+ "grad_norm": 59.79932403564453,
+ "learning_rate": 1.9155555555555558e-05,
+ "loss": 0.3493,
+ "step": 5733
+ },
+ {
+ "epoch": 32.76571428571429,
+ "grad_norm": 55.21664810180664,
+ "learning_rate": 1.914920634920635e-05,
+ "loss": 0.1894,
+ "step": 5734
+ },
+ {
+ "epoch": 32.77142857142857,
+ "grad_norm": 24.527069091796875,
+ "learning_rate": 1.9142857142857143e-05,
+ "loss": 0.2233,
+ "step": 5735
+ },
+ {
+ "epoch": 32.777142857142856,
+ "grad_norm": 36.4738655090332,
+ "learning_rate": 1.913650793650794e-05,
+ "loss": 0.1894,
+ "step": 5736
+ },
+ {
+ "epoch": 32.78285714285714,
+ "grad_norm": 85.22257232666016,
+ "learning_rate": 1.913015873015873e-05,
+ "loss": 0.2481,
+ "step": 5737
+ },
+ {
+ "epoch": 32.78857142857143,
+ "grad_norm": 61.35136413574219,
+ "learning_rate": 1.9123809523809524e-05,
+ "loss": 0.2234,
+ "step": 5738
+ },
+ {
+ "epoch": 32.794285714285714,
+ "grad_norm": 47.99897384643555,
+ "learning_rate": 1.9117460317460317e-05,
+ "loss": 0.2531,
+ "step": 5739
+ },
+ {
+ "epoch": 32.8,
+ "grad_norm": 30.050312042236328,
+ "learning_rate": 1.9111111111111113e-05,
+ "loss": 0.2414,
+ "step": 5740
+ },
+ {
+ "epoch": 32.80571428571429,
+ "grad_norm": 62.89798355102539,
+ "learning_rate": 1.9104761904761906e-05,
+ "loss": 0.2529,
+ "step": 5741
+ },
+ {
+ "epoch": 32.81142857142857,
+ "grad_norm": 35.83088302612305,
+ "learning_rate": 1.9098412698412698e-05,
+ "loss": 0.2205,
+ "step": 5742
+ },
+ {
+ "epoch": 32.817142857142855,
+ "grad_norm": 49.34012222290039,
+ "learning_rate": 1.909206349206349e-05,
+ "loss": 0.2036,
+ "step": 5743
+ },
+ {
+ "epoch": 32.822857142857146,
+ "grad_norm": 104.82341003417969,
+ "learning_rate": 1.9085714285714287e-05,
+ "loss": 0.2899,
+ "step": 5744
+ },
+ {
+ "epoch": 32.82857142857143,
+ "grad_norm": 26.515548706054688,
+ "learning_rate": 1.907936507936508e-05,
+ "loss": 0.2559,
+ "step": 5745
+ },
+ {
+ "epoch": 32.83428571428571,
+ "grad_norm": 27.458879470825195,
+ "learning_rate": 1.9073015873015872e-05,
+ "loss": 0.207,
+ "step": 5746
+ },
+ {
+ "epoch": 32.84,
+ "grad_norm": 70.85945892333984,
+ "learning_rate": 1.9066666666666668e-05,
+ "loss": 0.2689,
+ "step": 5747
+ },
+ {
+ "epoch": 32.84571428571429,
+ "grad_norm": 93.12171936035156,
+ "learning_rate": 1.906031746031746e-05,
+ "loss": 0.278,
+ "step": 5748
+ },
+ {
+ "epoch": 32.85142857142857,
+ "grad_norm": 65.2746353149414,
+ "learning_rate": 1.9053968253968253e-05,
+ "loss": 0.263,
+ "step": 5749
+ },
+ {
+ "epoch": 32.857142857142854,
+ "grad_norm": 60.12397766113281,
+ "learning_rate": 1.9047619047619046e-05,
+ "loss": 0.3596,
+ "step": 5750
+ },
+ {
+ "epoch": 32.862857142857145,
+ "grad_norm": 74.2511978149414,
+ "learning_rate": 1.9041269841269842e-05,
+ "loss": 0.1573,
+ "step": 5751
+ },
+ {
+ "epoch": 32.86857142857143,
+ "grad_norm": 42.45563888549805,
+ "learning_rate": 1.9034920634920635e-05,
+ "loss": 0.1389,
+ "step": 5752
+ },
+ {
+ "epoch": 32.87428571428571,
+ "grad_norm": 69.6099624633789,
+ "learning_rate": 1.9028571428571427e-05,
+ "loss": 0.2109,
+ "step": 5753
+ },
+ {
+ "epoch": 32.88,
+ "grad_norm": 95.45622253417969,
+ "learning_rate": 1.9022222222222223e-05,
+ "loss": 0.1731,
+ "step": 5754
+ },
+ {
+ "epoch": 32.885714285714286,
+ "grad_norm": 42.497886657714844,
+ "learning_rate": 1.9015873015873016e-05,
+ "loss": 0.2298,
+ "step": 5755
+ },
+ {
+ "epoch": 32.89142857142857,
+ "grad_norm": 56.04786682128906,
+ "learning_rate": 1.9009523809523812e-05,
+ "loss": 0.2151,
+ "step": 5756
+ },
+ {
+ "epoch": 32.89714285714286,
+ "grad_norm": 25.797842025756836,
+ "learning_rate": 1.9003174603174605e-05,
+ "loss": 0.2009,
+ "step": 5757
+ },
+ {
+ "epoch": 32.902857142857144,
+ "grad_norm": 30.58416748046875,
+ "learning_rate": 1.8996825396825397e-05,
+ "loss": 0.2266,
+ "step": 5758
+ },
+ {
+ "epoch": 32.90857142857143,
+ "grad_norm": 62.21387481689453,
+ "learning_rate": 1.8990476190476193e-05,
+ "loss": 0.2812,
+ "step": 5759
+ },
+ {
+ "epoch": 32.91428571428571,
+ "grad_norm": 67.15927124023438,
+ "learning_rate": 1.8984126984126986e-05,
+ "loss": 0.536,
+ "step": 5760
+ },
+ {
+ "epoch": 32.92,
+ "grad_norm": 33.9781379699707,
+ "learning_rate": 1.897777777777778e-05,
+ "loss": 0.2723,
+ "step": 5761
+ },
+ {
+ "epoch": 32.925714285714285,
+ "grad_norm": 29.440732955932617,
+ "learning_rate": 1.8971428571428575e-05,
+ "loss": 0.1815,
+ "step": 5762
+ },
+ {
+ "epoch": 32.93142857142857,
+ "grad_norm": 21.851028442382812,
+ "learning_rate": 1.8965079365079367e-05,
+ "loss": 0.26,
+ "step": 5763
+ },
+ {
+ "epoch": 32.93714285714286,
+ "grad_norm": 36.22255325317383,
+ "learning_rate": 1.895873015873016e-05,
+ "loss": 0.2044,
+ "step": 5764
+ },
+ {
+ "epoch": 32.94285714285714,
+ "grad_norm": 35.30541229248047,
+ "learning_rate": 1.8952380952380953e-05,
+ "loss": 0.2584,
+ "step": 5765
+ },
+ {
+ "epoch": 32.94857142857143,
+ "grad_norm": 164.14581298828125,
+ "learning_rate": 1.894603174603175e-05,
+ "loss": 0.2357,
+ "step": 5766
+ },
+ {
+ "epoch": 32.95428571428572,
+ "grad_norm": 66.16490173339844,
+ "learning_rate": 1.893968253968254e-05,
+ "loss": 0.2905,
+ "step": 5767
+ },
+ {
+ "epoch": 32.96,
+ "grad_norm": 122.70243072509766,
+ "learning_rate": 1.8933333333333334e-05,
+ "loss": 0.2421,
+ "step": 5768
+ },
+ {
+ "epoch": 32.965714285714284,
+ "grad_norm": 33.81968307495117,
+ "learning_rate": 1.892698412698413e-05,
+ "loss": 0.2856,
+ "step": 5769
+ },
+ {
+ "epoch": 32.97142857142857,
+ "grad_norm": 370.8056945800781,
+ "learning_rate": 1.8920634920634923e-05,
+ "loss": 0.2989,
+ "step": 5770
+ },
+ {
+ "epoch": 32.97714285714286,
+ "grad_norm": 56.27879333496094,
+ "learning_rate": 1.8914285714285715e-05,
+ "loss": 0.2699,
+ "step": 5771
+ },
+ {
+ "epoch": 32.98285714285714,
+ "grad_norm": 28.76259422302246,
+ "learning_rate": 1.8907936507936508e-05,
+ "loss": 0.3926,
+ "step": 5772
+ },
+ {
+ "epoch": 32.988571428571426,
+ "grad_norm": 53.48399353027344,
+ "learning_rate": 1.8901587301587304e-05,
+ "loss": 0.2002,
+ "step": 5773
+ },
+ {
+ "epoch": 32.994285714285716,
+ "grad_norm": 38.394779205322266,
+ "learning_rate": 1.8895238095238096e-05,
+ "loss": 0.2,
+ "step": 5774
+ },
+ {
+ "epoch": 33.0,
+ "grad_norm": 173.07803344726562,
+ "learning_rate": 1.888888888888889e-05,
+ "loss": 0.2864,
+ "step": 5775
+ },
+ {
+ "epoch": 33.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5889254212379456,
+ "eval_map": 0.9286,
+ "eval_map_50": 0.9623,
+ "eval_map_75": 0.9582,
+ "eval_map_large": 0.9287,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9286,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7829,
+ "eval_mar_10": 0.9733,
+ "eval_mar_100": 0.9759,
+ "eval_mar_100_per_class": 0.9759,
+ "eval_mar_large": 0.9759,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.1098,
+ "eval_samples_per_second": 22.426,
+ "eval_steps_per_second": 2.822,
+ "step": 5775
+ },
+ {
+ "epoch": 33.005714285714284,
+ "grad_norm": 61.6531867980957,
+ "learning_rate": 1.8882539682539682e-05,
+ "loss": 0.1863,
+ "step": 5776
+ },
+ {
+ "epoch": 33.011428571428574,
+ "grad_norm": 85.87842559814453,
+ "learning_rate": 1.8876190476190478e-05,
+ "loss": 0.1609,
+ "step": 5777
+ },
+ {
+ "epoch": 33.01714285714286,
+ "grad_norm": 66.04412078857422,
+ "learning_rate": 1.886984126984127e-05,
+ "loss": 0.4112,
+ "step": 5778
+ },
+ {
+ "epoch": 33.02285714285714,
+ "grad_norm": 59.694461822509766,
+ "learning_rate": 1.8863492063492063e-05,
+ "loss": 0.1821,
+ "step": 5779
+ },
+ {
+ "epoch": 33.02857142857143,
+ "grad_norm": 48.2636833190918,
+ "learning_rate": 1.885714285714286e-05,
+ "loss": 0.3267,
+ "step": 5780
+ },
+ {
+ "epoch": 33.034285714285716,
+ "grad_norm": 328.946044921875,
+ "learning_rate": 1.8850793650793652e-05,
+ "loss": 0.2647,
+ "step": 5781
+ },
+ {
+ "epoch": 33.04,
+ "grad_norm": 28.746435165405273,
+ "learning_rate": 1.8844444444444444e-05,
+ "loss": 0.2212,
+ "step": 5782
+ },
+ {
+ "epoch": 33.04571428571428,
+ "grad_norm": 28.282203674316406,
+ "learning_rate": 1.8838095238095237e-05,
+ "loss": 0.1856,
+ "step": 5783
+ },
+ {
+ "epoch": 33.05142857142857,
+ "grad_norm": 31.544275283813477,
+ "learning_rate": 1.8831746031746033e-05,
+ "loss": 0.2117,
+ "step": 5784
+ },
+ {
+ "epoch": 33.05714285714286,
+ "grad_norm": 40.49601364135742,
+ "learning_rate": 1.8825396825396826e-05,
+ "loss": 0.1419,
+ "step": 5785
+ },
+ {
+ "epoch": 33.06285714285714,
+ "grad_norm": 46.23110580444336,
+ "learning_rate": 1.8819047619047618e-05,
+ "loss": 0.2121,
+ "step": 5786
+ },
+ {
+ "epoch": 33.06857142857143,
+ "grad_norm": 44.122901916503906,
+ "learning_rate": 1.881269841269841e-05,
+ "loss": 0.2214,
+ "step": 5787
+ },
+ {
+ "epoch": 33.074285714285715,
+ "grad_norm": 23.44615936279297,
+ "learning_rate": 1.8806349206349207e-05,
+ "loss": 0.2412,
+ "step": 5788
+ },
+ {
+ "epoch": 33.08,
+ "grad_norm": 50.15841293334961,
+ "learning_rate": 1.88e-05,
+ "loss": 0.2051,
+ "step": 5789
+ },
+ {
+ "epoch": 33.08571428571429,
+ "grad_norm": 322.5673522949219,
+ "learning_rate": 1.8793650793650792e-05,
+ "loss": 0.2591,
+ "step": 5790
+ },
+ {
+ "epoch": 33.09142857142857,
+ "grad_norm": 218.38636779785156,
+ "learning_rate": 1.8787301587301588e-05,
+ "loss": 0.2429,
+ "step": 5791
+ },
+ {
+ "epoch": 33.097142857142856,
+ "grad_norm": 41.37594223022461,
+ "learning_rate": 1.878095238095238e-05,
+ "loss": 0.2061,
+ "step": 5792
+ },
+ {
+ "epoch": 33.10285714285714,
+ "grad_norm": 39.84482192993164,
+ "learning_rate": 1.8774603174603177e-05,
+ "loss": 0.1754,
+ "step": 5793
+ },
+ {
+ "epoch": 33.10857142857143,
+ "grad_norm": 48.588802337646484,
+ "learning_rate": 1.876825396825397e-05,
+ "loss": 0.2415,
+ "step": 5794
+ },
+ {
+ "epoch": 33.114285714285714,
+ "grad_norm": 45.063846588134766,
+ "learning_rate": 1.8761904761904766e-05,
+ "loss": 0.275,
+ "step": 5795
+ },
+ {
+ "epoch": 33.12,
+ "grad_norm": 55.40730667114258,
+ "learning_rate": 1.8755555555555558e-05,
+ "loss": 0.206,
+ "step": 5796
+ },
+ {
+ "epoch": 33.12571428571429,
+ "grad_norm": 32.80923080444336,
+ "learning_rate": 1.874920634920635e-05,
+ "loss": 0.1894,
+ "step": 5797
+ },
+ {
+ "epoch": 33.13142857142857,
+ "grad_norm": 28.337350845336914,
+ "learning_rate": 1.8742857142857143e-05,
+ "loss": 0.2787,
+ "step": 5798
+ },
+ {
+ "epoch": 33.137142857142855,
+ "grad_norm": 34.23060607910156,
+ "learning_rate": 1.873650793650794e-05,
+ "loss": 0.2507,
+ "step": 5799
+ },
+ {
+ "epoch": 33.142857142857146,
+ "grad_norm": 824.5645141601562,
+ "learning_rate": 1.8730158730158732e-05,
+ "loss": 0.3496,
+ "step": 5800
+ },
+ {
+ "epoch": 33.14857142857143,
+ "grad_norm": 46.26865005493164,
+ "learning_rate": 1.8723809523809525e-05,
+ "loss": 0.2216,
+ "step": 5801
+ },
+ {
+ "epoch": 33.15428571428571,
+ "grad_norm": 90.3018569946289,
+ "learning_rate": 1.871746031746032e-05,
+ "loss": 0.3366,
+ "step": 5802
+ },
+ {
+ "epoch": 33.16,
+ "grad_norm": 41.25313186645508,
+ "learning_rate": 1.8711111111111113e-05,
+ "loss": 0.1777,
+ "step": 5803
+ },
+ {
+ "epoch": 33.16571428571429,
+ "grad_norm": 37.36777877807617,
+ "learning_rate": 1.8704761904761906e-05,
+ "loss": 0.3449,
+ "step": 5804
+ },
+ {
+ "epoch": 33.17142857142857,
+ "grad_norm": 30.33721351623535,
+ "learning_rate": 1.86984126984127e-05,
+ "loss": 0.2448,
+ "step": 5805
+ },
+ {
+ "epoch": 33.177142857142854,
+ "grad_norm": 41.4080696105957,
+ "learning_rate": 1.8692063492063495e-05,
+ "loss": 0.1894,
+ "step": 5806
+ },
+ {
+ "epoch": 33.182857142857145,
+ "grad_norm": 23.389171600341797,
+ "learning_rate": 1.8685714285714287e-05,
+ "loss": 0.1878,
+ "step": 5807
+ },
+ {
+ "epoch": 33.18857142857143,
+ "grad_norm": 33.94114685058594,
+ "learning_rate": 1.867936507936508e-05,
+ "loss": 0.2903,
+ "step": 5808
+ },
+ {
+ "epoch": 33.19428571428571,
+ "grad_norm": 38.92851638793945,
+ "learning_rate": 1.8673015873015873e-05,
+ "loss": 0.2322,
+ "step": 5809
+ },
+ {
+ "epoch": 33.2,
+ "grad_norm": 26.486780166625977,
+ "learning_rate": 1.866666666666667e-05,
+ "loss": 0.2645,
+ "step": 5810
+ },
+ {
+ "epoch": 33.205714285714286,
+ "grad_norm": 79.34441375732422,
+ "learning_rate": 1.866031746031746e-05,
+ "loss": 0.2896,
+ "step": 5811
+ },
+ {
+ "epoch": 33.21142857142857,
+ "grad_norm": 43.19795227050781,
+ "learning_rate": 1.8653968253968254e-05,
+ "loss": 0.3704,
+ "step": 5812
+ },
+ {
+ "epoch": 33.21714285714286,
+ "grad_norm": 56.158416748046875,
+ "learning_rate": 1.864761904761905e-05,
+ "loss": 0.2275,
+ "step": 5813
+ },
+ {
+ "epoch": 33.222857142857144,
+ "grad_norm": 44.3139762878418,
+ "learning_rate": 1.8641269841269843e-05,
+ "loss": 0.3289,
+ "step": 5814
+ },
+ {
+ "epoch": 33.22857142857143,
+ "grad_norm": 30.506383895874023,
+ "learning_rate": 1.8634920634920635e-05,
+ "loss": 0.2183,
+ "step": 5815
+ },
+ {
+ "epoch": 33.23428571428571,
+ "grad_norm": 61.62275695800781,
+ "learning_rate": 1.8628571428571428e-05,
+ "loss": 0.3011,
+ "step": 5816
+ },
+ {
+ "epoch": 33.24,
+ "grad_norm": 31.950824737548828,
+ "learning_rate": 1.8622222222222224e-05,
+ "loss": 0.2199,
+ "step": 5817
+ },
+ {
+ "epoch": 33.245714285714286,
+ "grad_norm": 39.46795654296875,
+ "learning_rate": 1.8615873015873017e-05,
+ "loss": 0.1838,
+ "step": 5818
+ },
+ {
+ "epoch": 33.25142857142857,
+ "grad_norm": 201.7964324951172,
+ "learning_rate": 1.860952380952381e-05,
+ "loss": 0.2733,
+ "step": 5819
+ },
+ {
+ "epoch": 33.25714285714286,
+ "grad_norm": 35.36155700683594,
+ "learning_rate": 1.8603174603174602e-05,
+ "loss": 0.1887,
+ "step": 5820
+ },
+ {
+ "epoch": 33.26285714285714,
+ "grad_norm": 45.871803283691406,
+ "learning_rate": 1.8596825396825398e-05,
+ "loss": 0.1908,
+ "step": 5821
+ },
+ {
+ "epoch": 33.26857142857143,
+ "grad_norm": 32.87693786621094,
+ "learning_rate": 1.859047619047619e-05,
+ "loss": 0.2116,
+ "step": 5822
+ },
+ {
+ "epoch": 33.27428571428572,
+ "grad_norm": 37.83621597290039,
+ "learning_rate": 1.8584126984126983e-05,
+ "loss": 0.2422,
+ "step": 5823
+ },
+ {
+ "epoch": 33.28,
+ "grad_norm": 173.2474822998047,
+ "learning_rate": 1.8577777777777776e-05,
+ "loss": 0.2098,
+ "step": 5824
+ },
+ {
+ "epoch": 33.285714285714285,
+ "grad_norm": 69.15506744384766,
+ "learning_rate": 1.8571428571428572e-05,
+ "loss": 0.278,
+ "step": 5825
+ },
+ {
+ "epoch": 33.29142857142857,
+ "grad_norm": 36.939918518066406,
+ "learning_rate": 1.8565079365079364e-05,
+ "loss": 0.214,
+ "step": 5826
+ },
+ {
+ "epoch": 33.29714285714286,
+ "grad_norm": 54.35163116455078,
+ "learning_rate": 1.855873015873016e-05,
+ "loss": 0.3314,
+ "step": 5827
+ },
+ {
+ "epoch": 33.30285714285714,
+ "grad_norm": 55.574005126953125,
+ "learning_rate": 1.8552380952380953e-05,
+ "loss": 0.2441,
+ "step": 5828
+ },
+ {
+ "epoch": 33.308571428571426,
+ "grad_norm": 27.20143699645996,
+ "learning_rate": 1.8546031746031746e-05,
+ "loss": 0.3751,
+ "step": 5829
+ },
+ {
+ "epoch": 33.31428571428572,
+ "grad_norm": 126.8397445678711,
+ "learning_rate": 1.853968253968254e-05,
+ "loss": 0.3334,
+ "step": 5830
+ },
+ {
+ "epoch": 33.32,
+ "grad_norm": 38.47798156738281,
+ "learning_rate": 1.8533333333333334e-05,
+ "loss": 0.1912,
+ "step": 5831
+ },
+ {
+ "epoch": 33.325714285714284,
+ "grad_norm": 69.825927734375,
+ "learning_rate": 1.852698412698413e-05,
+ "loss": 0.3188,
+ "step": 5832
+ },
+ {
+ "epoch": 33.331428571428575,
+ "grad_norm": 42.98884582519531,
+ "learning_rate": 1.8520634920634923e-05,
+ "loss": 0.3282,
+ "step": 5833
+ },
+ {
+ "epoch": 33.33714285714286,
+ "grad_norm": 59.24766159057617,
+ "learning_rate": 1.8514285714285716e-05,
+ "loss": 0.2309,
+ "step": 5834
+ },
+ {
+ "epoch": 33.34285714285714,
+ "grad_norm": 69.93727111816406,
+ "learning_rate": 1.8507936507936508e-05,
+ "loss": 0.216,
+ "step": 5835
+ },
+ {
+ "epoch": 33.348571428571425,
+ "grad_norm": 316.0108337402344,
+ "learning_rate": 1.8501587301587304e-05,
+ "loss": 0.331,
+ "step": 5836
+ },
+ {
+ "epoch": 33.354285714285716,
+ "grad_norm": 78.41392517089844,
+ "learning_rate": 1.8495238095238097e-05,
+ "loss": 0.3179,
+ "step": 5837
+ },
+ {
+ "epoch": 33.36,
+ "grad_norm": 33.232337951660156,
+ "learning_rate": 1.848888888888889e-05,
+ "loss": 0.2548,
+ "step": 5838
+ },
+ {
+ "epoch": 33.36571428571428,
+ "grad_norm": 41.12696838378906,
+ "learning_rate": 1.8482539682539686e-05,
+ "loss": 0.2283,
+ "step": 5839
+ },
+ {
+ "epoch": 33.371428571428574,
+ "grad_norm": 30.386310577392578,
+ "learning_rate": 1.8476190476190478e-05,
+ "loss": 0.2291,
+ "step": 5840
+ },
+ {
+ "epoch": 33.37714285714286,
+ "grad_norm": 19.18267250061035,
+ "learning_rate": 1.846984126984127e-05,
+ "loss": 0.2395,
+ "step": 5841
+ },
+ {
+ "epoch": 33.38285714285714,
+ "grad_norm": 43.62106704711914,
+ "learning_rate": 1.8463492063492063e-05,
+ "loss": 0.1517,
+ "step": 5842
+ },
+ {
+ "epoch": 33.38857142857143,
+ "grad_norm": 37.835575103759766,
+ "learning_rate": 1.845714285714286e-05,
+ "loss": 0.1752,
+ "step": 5843
+ },
+ {
+ "epoch": 33.394285714285715,
+ "grad_norm": 36.11626434326172,
+ "learning_rate": 1.8450793650793652e-05,
+ "loss": 0.1825,
+ "step": 5844
+ },
+ {
+ "epoch": 33.4,
+ "grad_norm": 65.71627044677734,
+ "learning_rate": 1.8444444444444445e-05,
+ "loss": 0.2111,
+ "step": 5845
+ },
+ {
+ "epoch": 33.40571428571428,
+ "grad_norm": 52.04588317871094,
+ "learning_rate": 1.8438095238095237e-05,
+ "loss": 0.2537,
+ "step": 5846
+ },
+ {
+ "epoch": 33.41142857142857,
+ "grad_norm": 35.33464813232422,
+ "learning_rate": 1.8431746031746033e-05,
+ "loss": 0.1835,
+ "step": 5847
+ },
+ {
+ "epoch": 33.417142857142856,
+ "grad_norm": 45.317501068115234,
+ "learning_rate": 1.8425396825396826e-05,
+ "loss": 0.2132,
+ "step": 5848
+ },
+ {
+ "epoch": 33.42285714285714,
+ "grad_norm": 46.01725769042969,
+ "learning_rate": 1.841904761904762e-05,
+ "loss": 0.2026,
+ "step": 5849
+ },
+ {
+ "epoch": 33.42857142857143,
+ "grad_norm": 46.28281021118164,
+ "learning_rate": 1.8412698412698415e-05,
+ "loss": 0.2128,
+ "step": 5850
+ },
+ {
+ "epoch": 33.434285714285714,
+ "grad_norm": 30.394298553466797,
+ "learning_rate": 1.8406349206349207e-05,
+ "loss": 0.2085,
+ "step": 5851
+ },
+ {
+ "epoch": 33.44,
+ "grad_norm": 51.70058822631836,
+ "learning_rate": 1.84e-05,
+ "loss": 0.2591,
+ "step": 5852
+ },
+ {
+ "epoch": 33.44571428571429,
+ "grad_norm": 48.758636474609375,
+ "learning_rate": 1.8393650793650793e-05,
+ "loss": 0.2412,
+ "step": 5853
+ },
+ {
+ "epoch": 33.45142857142857,
+ "grad_norm": 21.338415145874023,
+ "learning_rate": 1.838730158730159e-05,
+ "loss": 0.2175,
+ "step": 5854
+ },
+ {
+ "epoch": 33.457142857142856,
+ "grad_norm": 40.013221740722656,
+ "learning_rate": 1.838095238095238e-05,
+ "loss": 0.2884,
+ "step": 5855
+ },
+ {
+ "epoch": 33.462857142857146,
+ "grad_norm": 28.742542266845703,
+ "learning_rate": 1.8374603174603174e-05,
+ "loss": 0.2486,
+ "step": 5856
+ },
+ {
+ "epoch": 33.46857142857143,
+ "grad_norm": 31.17852783203125,
+ "learning_rate": 1.8368253968253967e-05,
+ "loss": 0.2581,
+ "step": 5857
+ },
+ {
+ "epoch": 33.47428571428571,
+ "grad_norm": 24.765287399291992,
+ "learning_rate": 1.8361904761904763e-05,
+ "loss": 0.2704,
+ "step": 5858
+ },
+ {
+ "epoch": 33.48,
+ "grad_norm": 85.52215576171875,
+ "learning_rate": 1.8355555555555555e-05,
+ "loss": 0.1736,
+ "step": 5859
+ },
+ {
+ "epoch": 33.48571428571429,
+ "grad_norm": 74.87712860107422,
+ "learning_rate": 1.8349206349206348e-05,
+ "loss": 0.1951,
+ "step": 5860
+ },
+ {
+ "epoch": 33.49142857142857,
+ "grad_norm": 60.96209716796875,
+ "learning_rate": 1.8342857142857144e-05,
+ "loss": 0.2245,
+ "step": 5861
+ },
+ {
+ "epoch": 33.497142857142855,
+ "grad_norm": 39.256160736083984,
+ "learning_rate": 1.8336507936507937e-05,
+ "loss": 0.2472,
+ "step": 5862
+ },
+ {
+ "epoch": 33.502857142857145,
+ "grad_norm": 57.78693771362305,
+ "learning_rate": 1.833015873015873e-05,
+ "loss": 0.1854,
+ "step": 5863
+ },
+ {
+ "epoch": 33.50857142857143,
+ "grad_norm": 46.70375061035156,
+ "learning_rate": 1.8323809523809525e-05,
+ "loss": 0.2039,
+ "step": 5864
+ },
+ {
+ "epoch": 33.51428571428571,
+ "grad_norm": 26.03221893310547,
+ "learning_rate": 1.8317460317460318e-05,
+ "loss": 0.2059,
+ "step": 5865
+ },
+ {
+ "epoch": 33.52,
+ "grad_norm": 468.8216247558594,
+ "learning_rate": 1.8311111111111114e-05,
+ "loss": 0.295,
+ "step": 5866
+ },
+ {
+ "epoch": 33.52571428571429,
+ "grad_norm": 65.85757446289062,
+ "learning_rate": 1.8304761904761906e-05,
+ "loss": 0.219,
+ "step": 5867
+ },
+ {
+ "epoch": 33.53142857142857,
+ "grad_norm": 105.2874984741211,
+ "learning_rate": 1.82984126984127e-05,
+ "loss": 0.251,
+ "step": 5868
+ },
+ {
+ "epoch": 33.537142857142854,
+ "grad_norm": 39.05388641357422,
+ "learning_rate": 1.8292063492063495e-05,
+ "loss": 0.2449,
+ "step": 5869
+ },
+ {
+ "epoch": 33.542857142857144,
+ "grad_norm": 23.31039047241211,
+ "learning_rate": 1.8285714285714288e-05,
+ "loss": 0.2333,
+ "step": 5870
+ },
+ {
+ "epoch": 33.54857142857143,
+ "grad_norm": 966.0343017578125,
+ "learning_rate": 1.827936507936508e-05,
+ "loss": 0.2797,
+ "step": 5871
+ },
+ {
+ "epoch": 33.55428571428571,
+ "grad_norm": 28.16424560546875,
+ "learning_rate": 1.8273015873015876e-05,
+ "loss": 0.2148,
+ "step": 5872
+ },
+ {
+ "epoch": 33.56,
+ "grad_norm": 486.0005187988281,
+ "learning_rate": 1.826666666666667e-05,
+ "loss": 0.2506,
+ "step": 5873
+ },
+ {
+ "epoch": 33.565714285714286,
+ "grad_norm": 132.05374145507812,
+ "learning_rate": 1.8260317460317462e-05,
+ "loss": 0.2897,
+ "step": 5874
+ },
+ {
+ "epoch": 33.57142857142857,
+ "grad_norm": 31.29085922241211,
+ "learning_rate": 1.8253968253968254e-05,
+ "loss": 0.2292,
+ "step": 5875
+ },
+ {
+ "epoch": 33.57714285714286,
+ "grad_norm": 41.23748016357422,
+ "learning_rate": 1.824761904761905e-05,
+ "loss": 0.1858,
+ "step": 5876
+ },
+ {
+ "epoch": 33.582857142857144,
+ "grad_norm": 52.48597717285156,
+ "learning_rate": 1.8241269841269843e-05,
+ "loss": 0.2425,
+ "step": 5877
+ },
+ {
+ "epoch": 33.58857142857143,
+ "grad_norm": 49.15097427368164,
+ "learning_rate": 1.8234920634920636e-05,
+ "loss": 0.2167,
+ "step": 5878
+ },
+ {
+ "epoch": 33.59428571428572,
+ "grad_norm": 94.56541442871094,
+ "learning_rate": 1.8228571428571428e-05,
+ "loss": 0.309,
+ "step": 5879
+ },
+ {
+ "epoch": 33.6,
+ "grad_norm": 113.2981948852539,
+ "learning_rate": 1.8222222222222224e-05,
+ "loss": 0.1997,
+ "step": 5880
+ },
+ {
+ "epoch": 33.605714285714285,
+ "grad_norm": 258.0194396972656,
+ "learning_rate": 1.8215873015873017e-05,
+ "loss": 0.3929,
+ "step": 5881
+ },
+ {
+ "epoch": 33.61142857142857,
+ "grad_norm": 33.491188049316406,
+ "learning_rate": 1.820952380952381e-05,
+ "loss": 0.1989,
+ "step": 5882
+ },
+ {
+ "epoch": 33.61714285714286,
+ "grad_norm": 49.691864013671875,
+ "learning_rate": 1.8203174603174606e-05,
+ "loss": 0.1652,
+ "step": 5883
+ },
+ {
+ "epoch": 33.62285714285714,
+ "grad_norm": 20.83930015563965,
+ "learning_rate": 1.8196825396825398e-05,
+ "loss": 0.1858,
+ "step": 5884
+ },
+ {
+ "epoch": 33.628571428571426,
+ "grad_norm": 60.397544860839844,
+ "learning_rate": 1.819047619047619e-05,
+ "loss": 0.2188,
+ "step": 5885
+ },
+ {
+ "epoch": 33.63428571428572,
+ "grad_norm": 51.880409240722656,
+ "learning_rate": 1.8184126984126984e-05,
+ "loss": 0.3225,
+ "step": 5886
+ },
+ {
+ "epoch": 33.64,
+ "grad_norm": 130.122314453125,
+ "learning_rate": 1.817777777777778e-05,
+ "loss": 0.2302,
+ "step": 5887
+ },
+ {
+ "epoch": 33.645714285714284,
+ "grad_norm": 61.00599670410156,
+ "learning_rate": 1.8171428571428572e-05,
+ "loss": 0.181,
+ "step": 5888
+ },
+ {
+ "epoch": 33.651428571428575,
+ "grad_norm": 34.79129409790039,
+ "learning_rate": 1.8165079365079365e-05,
+ "loss": 0.1801,
+ "step": 5889
+ },
+ {
+ "epoch": 33.65714285714286,
+ "grad_norm": 40.54106521606445,
+ "learning_rate": 1.8158730158730157e-05,
+ "loss": 0.2039,
+ "step": 5890
+ },
+ {
+ "epoch": 33.66285714285714,
+ "grad_norm": 84.70269012451172,
+ "learning_rate": 1.8152380952380953e-05,
+ "loss": 0.1887,
+ "step": 5891
+ },
+ {
+ "epoch": 33.668571428571425,
+ "grad_norm": 70.48677825927734,
+ "learning_rate": 1.8146031746031746e-05,
+ "loss": 0.2872,
+ "step": 5892
+ },
+ {
+ "epoch": 33.674285714285716,
+ "grad_norm": 49.539405822753906,
+ "learning_rate": 1.813968253968254e-05,
+ "loss": 0.1647,
+ "step": 5893
+ },
+ {
+ "epoch": 33.68,
+ "grad_norm": 23.737159729003906,
+ "learning_rate": 1.8133333333333335e-05,
+ "loss": 0.2219,
+ "step": 5894
+ },
+ {
+ "epoch": 33.68571428571428,
+ "grad_norm": 20.998498916625977,
+ "learning_rate": 1.8126984126984127e-05,
+ "loss": 0.2741,
+ "step": 5895
+ },
+ {
+ "epoch": 33.691428571428574,
+ "grad_norm": 33.75244140625,
+ "learning_rate": 1.812063492063492e-05,
+ "loss": 0.1634,
+ "step": 5896
+ },
+ {
+ "epoch": 33.69714285714286,
+ "grad_norm": 279.3129577636719,
+ "learning_rate": 1.8114285714285713e-05,
+ "loss": 0.2181,
+ "step": 5897
+ },
+ {
+ "epoch": 33.70285714285714,
+ "grad_norm": 19.29251480102539,
+ "learning_rate": 1.810793650793651e-05,
+ "loss": 0.2423,
+ "step": 5898
+ },
+ {
+ "epoch": 33.70857142857143,
+ "grad_norm": 83.79081726074219,
+ "learning_rate": 1.81015873015873e-05,
+ "loss": 0.1878,
+ "step": 5899
+ },
+ {
+ "epoch": 33.714285714285715,
+ "grad_norm": 55.04714584350586,
+ "learning_rate": 1.8095238095238094e-05,
+ "loss": 0.2047,
+ "step": 5900
+ },
+ {
+ "epoch": 33.72,
+ "grad_norm": 32.267974853515625,
+ "learning_rate": 1.808888888888889e-05,
+ "loss": 0.2654,
+ "step": 5901
+ },
+ {
+ "epoch": 33.72571428571428,
+ "grad_norm": 759.3602905273438,
+ "learning_rate": 1.8082539682539683e-05,
+ "loss": 0.2962,
+ "step": 5902
+ },
+ {
+ "epoch": 33.73142857142857,
+ "grad_norm": 82.70172882080078,
+ "learning_rate": 1.807619047619048e-05,
+ "loss": 0.2832,
+ "step": 5903
+ },
+ {
+ "epoch": 33.73714285714286,
+ "grad_norm": 28.534273147583008,
+ "learning_rate": 1.806984126984127e-05,
+ "loss": 0.2042,
+ "step": 5904
+ },
+ {
+ "epoch": 33.74285714285714,
+ "grad_norm": 39.39735794067383,
+ "learning_rate": 1.8063492063492067e-05,
+ "loss": 0.2213,
+ "step": 5905
+ },
+ {
+ "epoch": 33.74857142857143,
+ "grad_norm": 106.2161865234375,
+ "learning_rate": 1.805714285714286e-05,
+ "loss": 0.2477,
+ "step": 5906
+ },
+ {
+ "epoch": 33.754285714285714,
+ "grad_norm": 59.39938735961914,
+ "learning_rate": 1.8050793650793653e-05,
+ "loss": 0.3675,
+ "step": 5907
+ },
+ {
+ "epoch": 33.76,
+ "grad_norm": 350.4398498535156,
+ "learning_rate": 1.8044444444444445e-05,
+ "loss": 0.2806,
+ "step": 5908
+ },
+ {
+ "epoch": 33.76571428571429,
+ "grad_norm": 117.81426239013672,
+ "learning_rate": 1.803809523809524e-05,
+ "loss": 0.2766,
+ "step": 5909
+ },
+ {
+ "epoch": 33.77142857142857,
+ "grad_norm": 54.07353973388672,
+ "learning_rate": 1.8031746031746034e-05,
+ "loss": 0.2752,
+ "step": 5910
+ },
+ {
+ "epoch": 33.777142857142856,
+ "grad_norm": 35.34177780151367,
+ "learning_rate": 1.8025396825396827e-05,
+ "loss": 0.2255,
+ "step": 5911
+ },
+ {
+ "epoch": 33.78285714285714,
+ "grad_norm": 57.84912872314453,
+ "learning_rate": 1.801904761904762e-05,
+ "loss": 0.1488,
+ "step": 5912
+ },
+ {
+ "epoch": 33.78857142857143,
+ "grad_norm": 45.213775634765625,
+ "learning_rate": 1.8012698412698415e-05,
+ "loss": 0.1765,
+ "step": 5913
+ },
+ {
+ "epoch": 33.794285714285714,
+ "grad_norm": 474.8720397949219,
+ "learning_rate": 1.8006349206349208e-05,
+ "loss": 0.2694,
+ "step": 5914
+ },
+ {
+ "epoch": 33.8,
+ "grad_norm": 29.5314884185791,
+ "learning_rate": 1.8e-05,
+ "loss": 0.239,
+ "step": 5915
+ },
+ {
+ "epoch": 33.80571428571429,
+ "grad_norm": 38.79159164428711,
+ "learning_rate": 1.7993650793650796e-05,
+ "loss": 0.1762,
+ "step": 5916
+ },
+ {
+ "epoch": 33.81142857142857,
+ "grad_norm": 41.41892623901367,
+ "learning_rate": 1.798730158730159e-05,
+ "loss": 0.1811,
+ "step": 5917
+ },
+ {
+ "epoch": 33.817142857142855,
+ "grad_norm": 36.53205490112305,
+ "learning_rate": 1.7980952380952382e-05,
+ "loss": 0.252,
+ "step": 5918
+ },
+ {
+ "epoch": 33.822857142857146,
+ "grad_norm": 20.373428344726562,
+ "learning_rate": 1.7974603174603174e-05,
+ "loss": 0.2905,
+ "step": 5919
+ },
+ {
+ "epoch": 33.82857142857143,
+ "grad_norm": 37.673095703125,
+ "learning_rate": 1.796825396825397e-05,
+ "loss": 0.2657,
+ "step": 5920
+ },
+ {
+ "epoch": 33.83428571428571,
+ "grad_norm": 32.326541900634766,
+ "learning_rate": 1.7961904761904763e-05,
+ "loss": 0.2661,
+ "step": 5921
+ },
+ {
+ "epoch": 33.84,
+ "grad_norm": 46.6069221496582,
+ "learning_rate": 1.7955555555555556e-05,
+ "loss": 0.3008,
+ "step": 5922
+ },
+ {
+ "epoch": 33.84571428571429,
+ "grad_norm": 18.55901527404785,
+ "learning_rate": 1.794920634920635e-05,
+ "loss": 0.3043,
+ "step": 5923
+ },
+ {
+ "epoch": 33.85142857142857,
+ "grad_norm": 30.72215461730957,
+ "learning_rate": 1.7942857142857144e-05,
+ "loss": 0.2451,
+ "step": 5924
+ },
+ {
+ "epoch": 33.857142857142854,
+ "grad_norm": 259.6184997558594,
+ "learning_rate": 1.7936507936507937e-05,
+ "loss": 0.2118,
+ "step": 5925
+ },
+ {
+ "epoch": 33.862857142857145,
+ "grad_norm": 54.99461364746094,
+ "learning_rate": 1.793015873015873e-05,
+ "loss": 0.2317,
+ "step": 5926
+ },
+ {
+ "epoch": 33.86857142857143,
+ "grad_norm": 50.739219665527344,
+ "learning_rate": 1.7923809523809526e-05,
+ "loss": 0.1776,
+ "step": 5927
+ },
+ {
+ "epoch": 33.87428571428571,
+ "grad_norm": 58.7663459777832,
+ "learning_rate": 1.7917460317460318e-05,
+ "loss": 0.1811,
+ "step": 5928
+ },
+ {
+ "epoch": 33.88,
+ "grad_norm": 78.92102813720703,
+ "learning_rate": 1.791111111111111e-05,
+ "loss": 0.2529,
+ "step": 5929
+ },
+ {
+ "epoch": 33.885714285714286,
+ "grad_norm": 61.0960693359375,
+ "learning_rate": 1.7904761904761904e-05,
+ "loss": 0.2295,
+ "step": 5930
+ },
+ {
+ "epoch": 33.89142857142857,
+ "grad_norm": 43.502567291259766,
+ "learning_rate": 1.78984126984127e-05,
+ "loss": 0.2208,
+ "step": 5931
+ },
+ {
+ "epoch": 33.89714285714286,
+ "grad_norm": 50.775177001953125,
+ "learning_rate": 1.7892063492063492e-05,
+ "loss": 0.215,
+ "step": 5932
+ },
+ {
+ "epoch": 33.902857142857144,
+ "grad_norm": 30.936521530151367,
+ "learning_rate": 1.7885714285714285e-05,
+ "loss": 0.1987,
+ "step": 5933
+ },
+ {
+ "epoch": 33.90857142857143,
+ "grad_norm": 67.06620788574219,
+ "learning_rate": 1.7879365079365077e-05,
+ "loss": 0.4565,
+ "step": 5934
+ },
+ {
+ "epoch": 33.91428571428571,
+ "grad_norm": 64.06864166259766,
+ "learning_rate": 1.7873015873015874e-05,
+ "loss": 0.2216,
+ "step": 5935
+ },
+ {
+ "epoch": 33.92,
+ "grad_norm": 58.99102783203125,
+ "learning_rate": 1.7866666666666666e-05,
+ "loss": 0.1912,
+ "step": 5936
+ },
+ {
+ "epoch": 33.925714285714285,
+ "grad_norm": 54.659034729003906,
+ "learning_rate": 1.7860317460317462e-05,
+ "loss": 0.2342,
+ "step": 5937
+ },
+ {
+ "epoch": 33.93142857142857,
+ "grad_norm": 66.0268325805664,
+ "learning_rate": 1.7853968253968255e-05,
+ "loss": 0.2237,
+ "step": 5938
+ },
+ {
+ "epoch": 33.93714285714286,
+ "grad_norm": 58.34406280517578,
+ "learning_rate": 1.7847619047619047e-05,
+ "loss": 0.2444,
+ "step": 5939
+ },
+ {
+ "epoch": 33.94285714285714,
+ "grad_norm": 43.55903625488281,
+ "learning_rate": 1.7841269841269843e-05,
+ "loss": 0.2069,
+ "step": 5940
+ },
+ {
+ "epoch": 33.94857142857143,
+ "grad_norm": 1790.8717041015625,
+ "learning_rate": 1.7834920634920636e-05,
+ "loss": 0.2266,
+ "step": 5941
+ },
+ {
+ "epoch": 33.95428571428572,
+ "grad_norm": 67.68915557861328,
+ "learning_rate": 1.7828571428571432e-05,
+ "loss": 0.2376,
+ "step": 5942
+ },
+ {
+ "epoch": 33.96,
+ "grad_norm": 25.61754035949707,
+ "learning_rate": 1.7822222222222225e-05,
+ "loss": 0.186,
+ "step": 5943
+ },
+ {
+ "epoch": 33.965714285714284,
+ "grad_norm": 24.572595596313477,
+ "learning_rate": 1.7815873015873017e-05,
+ "loss": 0.1734,
+ "step": 5944
+ },
+ {
+ "epoch": 33.97142857142857,
+ "grad_norm": 74.7918701171875,
+ "learning_rate": 1.780952380952381e-05,
+ "loss": 0.1948,
+ "step": 5945
+ },
+ {
+ "epoch": 33.97714285714286,
+ "grad_norm": 97.4121322631836,
+ "learning_rate": 1.7803174603174606e-05,
+ "loss": 0.3412,
+ "step": 5946
+ },
+ {
+ "epoch": 33.98285714285714,
+ "grad_norm": 514.5524291992188,
+ "learning_rate": 1.77968253968254e-05,
+ "loss": 0.226,
+ "step": 5947
+ },
+ {
+ "epoch": 33.988571428571426,
+ "grad_norm": 50.86960983276367,
+ "learning_rate": 1.779047619047619e-05,
+ "loss": 0.2376,
+ "step": 5948
+ },
+ {
+ "epoch": 33.994285714285716,
+ "grad_norm": 39.601444244384766,
+ "learning_rate": 1.7784126984126984e-05,
+ "loss": 0.291,
+ "step": 5949
+ },
+ {
+ "epoch": 34.0,
+ "grad_norm": 30.264354705810547,
+ "learning_rate": 1.777777777777778e-05,
+ "loss": 0.2828,
+ "step": 5950
+ },
+ {
+ "epoch": 34.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5732106566429138,
+ "eval_map": 0.9371,
+ "eval_map_50": 0.9699,
+ "eval_map_75": 0.9643,
+ "eval_map_large": 0.9372,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9371,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7867,
+ "eval_mar_10": 0.9746,
+ "eval_mar_100": 0.9771,
+ "eval_mar_100_per_class": 0.9771,
+ "eval_mar_large": 0.9771,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.9114,
+ "eval_samples_per_second": 21.134,
+ "eval_steps_per_second": 2.66,
+ "step": 5950
+ },
+ {
+ "epoch": 34.005714285714284,
+ "grad_norm": 39.4473991394043,
+ "learning_rate": 1.7771428571428573e-05,
+ "loss": 0.2211,
+ "step": 5951
+ },
+ {
+ "epoch": 34.011428571428574,
+ "grad_norm": 33.2810173034668,
+ "learning_rate": 1.7765079365079365e-05,
+ "loss": 0.1745,
+ "step": 5952
+ },
+ {
+ "epoch": 34.01714285714286,
+ "grad_norm": 25.688941955566406,
+ "learning_rate": 1.775873015873016e-05,
+ "loss": 0.1702,
+ "step": 5953
+ },
+ {
+ "epoch": 34.02285714285714,
+ "grad_norm": 40.11772537231445,
+ "learning_rate": 1.7752380952380954e-05,
+ "loss": 0.1975,
+ "step": 5954
+ },
+ {
+ "epoch": 34.02857142857143,
+ "grad_norm": 28.627344131469727,
+ "learning_rate": 1.7746031746031747e-05,
+ "loss": 0.2539,
+ "step": 5955
+ },
+ {
+ "epoch": 34.034285714285716,
+ "grad_norm": 69.74943542480469,
+ "learning_rate": 1.773968253968254e-05,
+ "loss": 0.2654,
+ "step": 5956
+ },
+ {
+ "epoch": 34.04,
+ "grad_norm": 49.73385238647461,
+ "learning_rate": 1.7733333333333335e-05,
+ "loss": 0.2028,
+ "step": 5957
+ },
+ {
+ "epoch": 34.04571428571428,
+ "grad_norm": 22.300336837768555,
+ "learning_rate": 1.7726984126984128e-05,
+ "loss": 0.1805,
+ "step": 5958
+ },
+ {
+ "epoch": 34.05142857142857,
+ "grad_norm": 51.91956329345703,
+ "learning_rate": 1.772063492063492e-05,
+ "loss": 0.2041,
+ "step": 5959
+ },
+ {
+ "epoch": 34.05714285714286,
+ "grad_norm": 100.5795669555664,
+ "learning_rate": 1.7714285714285713e-05,
+ "loss": 0.3603,
+ "step": 5960
+ },
+ {
+ "epoch": 34.06285714285714,
+ "grad_norm": 78.1702880859375,
+ "learning_rate": 1.770793650793651e-05,
+ "loss": 0.1778,
+ "step": 5961
+ },
+ {
+ "epoch": 34.06857142857143,
+ "grad_norm": 52.29579544067383,
+ "learning_rate": 1.7701587301587302e-05,
+ "loss": 0.3952,
+ "step": 5962
+ },
+ {
+ "epoch": 34.074285714285715,
+ "grad_norm": 111.71765899658203,
+ "learning_rate": 1.7695238095238094e-05,
+ "loss": 0.3385,
+ "step": 5963
+ },
+ {
+ "epoch": 34.08,
+ "grad_norm": 44.42064666748047,
+ "learning_rate": 1.768888888888889e-05,
+ "loss": 0.2243,
+ "step": 5964
+ },
+ {
+ "epoch": 34.08571428571429,
+ "grad_norm": 29.016536712646484,
+ "learning_rate": 1.7682539682539683e-05,
+ "loss": 0.1984,
+ "step": 5965
+ },
+ {
+ "epoch": 34.09142857142857,
+ "grad_norm": 22.22270965576172,
+ "learning_rate": 1.7676190476190476e-05,
+ "loss": 0.2315,
+ "step": 5966
+ },
+ {
+ "epoch": 34.097142857142856,
+ "grad_norm": 32.561676025390625,
+ "learning_rate": 1.766984126984127e-05,
+ "loss": 0.2779,
+ "step": 5967
+ },
+ {
+ "epoch": 34.10285714285714,
+ "grad_norm": 48.95793151855469,
+ "learning_rate": 1.7663492063492064e-05,
+ "loss": 0.2555,
+ "step": 5968
+ },
+ {
+ "epoch": 34.10857142857143,
+ "grad_norm": 53.35732650756836,
+ "learning_rate": 1.7657142857142857e-05,
+ "loss": 0.1497,
+ "step": 5969
+ },
+ {
+ "epoch": 34.114285714285714,
+ "grad_norm": 85.69971466064453,
+ "learning_rate": 1.765079365079365e-05,
+ "loss": 0.2759,
+ "step": 5970
+ },
+ {
+ "epoch": 34.12,
+ "grad_norm": 31.9514217376709,
+ "learning_rate": 1.7644444444444446e-05,
+ "loss": 0.1854,
+ "step": 5971
+ },
+ {
+ "epoch": 34.12571428571429,
+ "grad_norm": 70.69202423095703,
+ "learning_rate": 1.7638095238095238e-05,
+ "loss": 0.1737,
+ "step": 5972
+ },
+ {
+ "epoch": 34.13142857142857,
+ "grad_norm": 53.84906005859375,
+ "learning_rate": 1.763174603174603e-05,
+ "loss": 0.233,
+ "step": 5973
+ },
+ {
+ "epoch": 34.137142857142855,
+ "grad_norm": 28.47622299194336,
+ "learning_rate": 1.7625396825396827e-05,
+ "loss": 0.1744,
+ "step": 5974
+ },
+ {
+ "epoch": 34.142857142857146,
+ "grad_norm": 141.35400390625,
+ "learning_rate": 1.761904761904762e-05,
+ "loss": 0.2277,
+ "step": 5975
+ },
+ {
+ "epoch": 34.14857142857143,
+ "grad_norm": 39.02643966674805,
+ "learning_rate": 1.7612698412698416e-05,
+ "loss": 0.1827,
+ "step": 5976
+ },
+ {
+ "epoch": 34.15428571428571,
+ "grad_norm": 39.80881881713867,
+ "learning_rate": 1.7606349206349208e-05,
+ "loss": 0.1508,
+ "step": 5977
+ },
+ {
+ "epoch": 34.16,
+ "grad_norm": 27.678665161132812,
+ "learning_rate": 1.76e-05,
+ "loss": 0.3936,
+ "step": 5978
+ },
+ {
+ "epoch": 34.16571428571429,
+ "grad_norm": 34.82521438598633,
+ "learning_rate": 1.7593650793650797e-05,
+ "loss": 0.1694,
+ "step": 5979
+ },
+ {
+ "epoch": 34.17142857142857,
+ "grad_norm": 567.7770385742188,
+ "learning_rate": 1.758730158730159e-05,
+ "loss": 0.1819,
+ "step": 5980
+ },
+ {
+ "epoch": 34.177142857142854,
+ "grad_norm": 39.7049560546875,
+ "learning_rate": 1.7580952380952382e-05,
+ "loss": 0.1963,
+ "step": 5981
+ },
+ {
+ "epoch": 34.182857142857145,
+ "grad_norm": 81.57500457763672,
+ "learning_rate": 1.7574603174603175e-05,
+ "loss": 0.3676,
+ "step": 5982
+ },
+ {
+ "epoch": 34.18857142857143,
+ "grad_norm": 37.454769134521484,
+ "learning_rate": 1.756825396825397e-05,
+ "loss": 0.2362,
+ "step": 5983
+ },
+ {
+ "epoch": 34.19428571428571,
+ "grad_norm": 26.152082443237305,
+ "learning_rate": 1.7561904761904763e-05,
+ "loss": 0.2013,
+ "step": 5984
+ },
+ {
+ "epoch": 34.2,
+ "grad_norm": 48.61448669433594,
+ "learning_rate": 1.7555555555555556e-05,
+ "loss": 0.1921,
+ "step": 5985
+ },
+ {
+ "epoch": 34.205714285714286,
+ "grad_norm": 54.888362884521484,
+ "learning_rate": 1.7549206349206352e-05,
+ "loss": 0.2217,
+ "step": 5986
+ },
+ {
+ "epoch": 34.21142857142857,
+ "grad_norm": 58.8105583190918,
+ "learning_rate": 1.7542857142857145e-05,
+ "loss": 0.2862,
+ "step": 5987
+ },
+ {
+ "epoch": 34.21714285714286,
+ "grad_norm": 44.90412139892578,
+ "learning_rate": 1.7536507936507937e-05,
+ "loss": 0.1811,
+ "step": 5988
+ },
+ {
+ "epoch": 34.222857142857144,
+ "grad_norm": 22.135730743408203,
+ "learning_rate": 1.753015873015873e-05,
+ "loss": 0.2369,
+ "step": 5989
+ },
+ {
+ "epoch": 34.22857142857143,
+ "grad_norm": 52.97178649902344,
+ "learning_rate": 1.7523809523809526e-05,
+ "loss": 0.2835,
+ "step": 5990
+ },
+ {
+ "epoch": 34.23428571428571,
+ "grad_norm": 24.361135482788086,
+ "learning_rate": 1.751746031746032e-05,
+ "loss": 0.2666,
+ "step": 5991
+ },
+ {
+ "epoch": 34.24,
+ "grad_norm": 39.67509078979492,
+ "learning_rate": 1.751111111111111e-05,
+ "loss": 0.2313,
+ "step": 5992
+ },
+ {
+ "epoch": 34.245714285714286,
+ "grad_norm": 67.84577941894531,
+ "learning_rate": 1.7504761904761904e-05,
+ "loss": 0.2451,
+ "step": 5993
+ },
+ {
+ "epoch": 34.25142857142857,
+ "grad_norm": 42.42686462402344,
+ "learning_rate": 1.74984126984127e-05,
+ "loss": 0.213,
+ "step": 5994
+ },
+ {
+ "epoch": 34.25714285714286,
+ "grad_norm": 47.220462799072266,
+ "learning_rate": 1.7492063492063493e-05,
+ "loss": 0.2164,
+ "step": 5995
+ },
+ {
+ "epoch": 34.26285714285714,
+ "grad_norm": 27.19646453857422,
+ "learning_rate": 1.7485714285714285e-05,
+ "loss": 0.201,
+ "step": 5996
+ },
+ {
+ "epoch": 34.26857142857143,
+ "grad_norm": 29.466632843017578,
+ "learning_rate": 1.747936507936508e-05,
+ "loss": 0.3826,
+ "step": 5997
+ },
+ {
+ "epoch": 34.27428571428572,
+ "grad_norm": 25.750511169433594,
+ "learning_rate": 1.7473015873015874e-05,
+ "loss": 0.2327,
+ "step": 5998
+ },
+ {
+ "epoch": 34.28,
+ "grad_norm": 43.1969108581543,
+ "learning_rate": 1.7466666666666667e-05,
+ "loss": 0.2463,
+ "step": 5999
+ },
+ {
+ "epoch": 34.285714285714285,
+ "grad_norm": 111.6524429321289,
+ "learning_rate": 1.746031746031746e-05,
+ "loss": 0.2307,
+ "step": 6000
+ },
+ {
+ "epoch": 34.29142857142857,
+ "grad_norm": 52.85550308227539,
+ "learning_rate": 1.7453968253968255e-05,
+ "loss": 0.1707,
+ "step": 6001
+ },
+ {
+ "epoch": 34.29714285714286,
+ "grad_norm": 89.5367202758789,
+ "learning_rate": 1.7447619047619048e-05,
+ "loss": 0.2228,
+ "step": 6002
+ },
+ {
+ "epoch": 34.30285714285714,
+ "grad_norm": 46.877376556396484,
+ "learning_rate": 1.744126984126984e-05,
+ "loss": 0.3267,
+ "step": 6003
+ },
+ {
+ "epoch": 34.308571428571426,
+ "grad_norm": 29.10450553894043,
+ "learning_rate": 1.7434920634920633e-05,
+ "loss": 0.2678,
+ "step": 6004
+ },
+ {
+ "epoch": 34.31428571428572,
+ "grad_norm": 28.012819290161133,
+ "learning_rate": 1.742857142857143e-05,
+ "loss": 0.2131,
+ "step": 6005
+ },
+ {
+ "epoch": 34.32,
+ "grad_norm": 35.5361442565918,
+ "learning_rate": 1.7422222222222222e-05,
+ "loss": 0.1771,
+ "step": 6006
+ },
+ {
+ "epoch": 34.325714285714284,
+ "grad_norm": 37.48767852783203,
+ "learning_rate": 1.7415873015873014e-05,
+ "loss": 0.1776,
+ "step": 6007
+ },
+ {
+ "epoch": 34.331428571428575,
+ "grad_norm": 1077.1754150390625,
+ "learning_rate": 1.740952380952381e-05,
+ "loss": 0.2416,
+ "step": 6008
+ },
+ {
+ "epoch": 34.33714285714286,
+ "grad_norm": 23.403268814086914,
+ "learning_rate": 1.7403174603174603e-05,
+ "loss": 0.2405,
+ "step": 6009
+ },
+ {
+ "epoch": 34.34285714285714,
+ "grad_norm": 30.210250854492188,
+ "learning_rate": 1.73968253968254e-05,
+ "loss": 0.2152,
+ "step": 6010
+ },
+ {
+ "epoch": 34.348571428571425,
+ "grad_norm": 70.3405990600586,
+ "learning_rate": 1.7390476190476192e-05,
+ "loss": 0.1559,
+ "step": 6011
+ },
+ {
+ "epoch": 34.354285714285716,
+ "grad_norm": 41.96592330932617,
+ "learning_rate": 1.7384126984126984e-05,
+ "loss": 0.2466,
+ "step": 6012
+ },
+ {
+ "epoch": 34.36,
+ "grad_norm": 56.36053466796875,
+ "learning_rate": 1.737777777777778e-05,
+ "loss": 0.2149,
+ "step": 6013
+ },
+ {
+ "epoch": 34.36571428571428,
+ "grad_norm": 37.00887680053711,
+ "learning_rate": 1.7371428571428573e-05,
+ "loss": 0.2144,
+ "step": 6014
+ },
+ {
+ "epoch": 34.371428571428574,
+ "grad_norm": 24.215007781982422,
+ "learning_rate": 1.7365079365079366e-05,
+ "loss": 0.2389,
+ "step": 6015
+ },
+ {
+ "epoch": 34.37714285714286,
+ "grad_norm": 46.74251937866211,
+ "learning_rate": 1.7358730158730162e-05,
+ "loss": 0.2139,
+ "step": 6016
+ },
+ {
+ "epoch": 34.38285714285714,
+ "grad_norm": 55.992042541503906,
+ "learning_rate": 1.7352380952380954e-05,
+ "loss": 0.1324,
+ "step": 6017
+ },
+ {
+ "epoch": 34.38857142857143,
+ "grad_norm": 29.065580368041992,
+ "learning_rate": 1.7346031746031747e-05,
+ "loss": 0.2013,
+ "step": 6018
+ },
+ {
+ "epoch": 34.394285714285715,
+ "grad_norm": 62.71901321411133,
+ "learning_rate": 1.7339682539682543e-05,
+ "loss": 0.16,
+ "step": 6019
+ },
+ {
+ "epoch": 34.4,
+ "grad_norm": 52.24561309814453,
+ "learning_rate": 1.7333333333333336e-05,
+ "loss": 0.2242,
+ "step": 6020
+ },
+ {
+ "epoch": 34.40571428571428,
+ "grad_norm": 40.833763122558594,
+ "learning_rate": 1.7326984126984128e-05,
+ "loss": 0.2674,
+ "step": 6021
+ },
+ {
+ "epoch": 34.41142857142857,
+ "grad_norm": 27.984975814819336,
+ "learning_rate": 1.732063492063492e-05,
+ "loss": 0.2379,
+ "step": 6022
+ },
+ {
+ "epoch": 34.417142857142856,
+ "grad_norm": 74.86846923828125,
+ "learning_rate": 1.7314285714285717e-05,
+ "loss": 0.3131,
+ "step": 6023
+ },
+ {
+ "epoch": 34.42285714285714,
+ "grad_norm": 21.960285186767578,
+ "learning_rate": 1.730793650793651e-05,
+ "loss": 0.1784,
+ "step": 6024
+ },
+ {
+ "epoch": 34.42857142857143,
+ "grad_norm": 206.73475646972656,
+ "learning_rate": 1.7301587301587302e-05,
+ "loss": 0.1801,
+ "step": 6025
+ },
+ {
+ "epoch": 34.434285714285714,
+ "grad_norm": 53.96074676513672,
+ "learning_rate": 1.7295238095238095e-05,
+ "loss": 0.2141,
+ "step": 6026
+ },
+ {
+ "epoch": 34.44,
+ "grad_norm": 33.00743865966797,
+ "learning_rate": 1.728888888888889e-05,
+ "loss": 0.2226,
+ "step": 6027
+ },
+ {
+ "epoch": 34.44571428571429,
+ "grad_norm": 59.796897888183594,
+ "learning_rate": 1.7282539682539684e-05,
+ "loss": 0.2167,
+ "step": 6028
+ },
+ {
+ "epoch": 34.45142857142857,
+ "grad_norm": 98.43783569335938,
+ "learning_rate": 1.7276190476190476e-05,
+ "loss": 0.2007,
+ "step": 6029
+ },
+ {
+ "epoch": 34.457142857142856,
+ "grad_norm": 65.62691497802734,
+ "learning_rate": 1.7269841269841272e-05,
+ "loss": 0.2217,
+ "step": 6030
+ },
+ {
+ "epoch": 34.462857142857146,
+ "grad_norm": 25.618061065673828,
+ "learning_rate": 1.7263492063492065e-05,
+ "loss": 0.2301,
+ "step": 6031
+ },
+ {
+ "epoch": 34.46857142857143,
+ "grad_norm": 36.77336120605469,
+ "learning_rate": 1.7257142857142857e-05,
+ "loss": 0.2109,
+ "step": 6032
+ },
+ {
+ "epoch": 34.47428571428571,
+ "grad_norm": 81.1491470336914,
+ "learning_rate": 1.725079365079365e-05,
+ "loss": 0.4455,
+ "step": 6033
+ },
+ {
+ "epoch": 34.48,
+ "grad_norm": 76.56790924072266,
+ "learning_rate": 1.7244444444444446e-05,
+ "loss": 0.2304,
+ "step": 6034
+ },
+ {
+ "epoch": 34.48571428571429,
+ "grad_norm": 72.3931655883789,
+ "learning_rate": 1.723809523809524e-05,
+ "loss": 0.1967,
+ "step": 6035
+ },
+ {
+ "epoch": 34.49142857142857,
+ "grad_norm": 45.4559211730957,
+ "learning_rate": 1.723174603174603e-05,
+ "loss": 0.2656,
+ "step": 6036
+ },
+ {
+ "epoch": 34.497142857142855,
+ "grad_norm": 11.608014106750488,
+ "learning_rate": 1.7225396825396824e-05,
+ "loss": 0.254,
+ "step": 6037
+ },
+ {
+ "epoch": 34.502857142857145,
+ "grad_norm": 33.927425384521484,
+ "learning_rate": 1.721904761904762e-05,
+ "loss": 0.1657,
+ "step": 6038
+ },
+ {
+ "epoch": 34.50857142857143,
+ "grad_norm": 51.072715759277344,
+ "learning_rate": 1.7212698412698413e-05,
+ "loss": 0.2104,
+ "step": 6039
+ },
+ {
+ "epoch": 34.51428571428571,
+ "grad_norm": 22.702539443969727,
+ "learning_rate": 1.7206349206349205e-05,
+ "loss": 0.2078,
+ "step": 6040
+ },
+ {
+ "epoch": 34.52,
+ "grad_norm": 44.61540985107422,
+ "learning_rate": 1.7199999999999998e-05,
+ "loss": 0.2692,
+ "step": 6041
+ },
+ {
+ "epoch": 34.52571428571429,
+ "grad_norm": 47.26350402832031,
+ "learning_rate": 1.7193650793650794e-05,
+ "loss": 0.2449,
+ "step": 6042
+ },
+ {
+ "epoch": 34.53142857142857,
+ "grad_norm": 76.13980865478516,
+ "learning_rate": 1.7187301587301587e-05,
+ "loss": 0.1774,
+ "step": 6043
+ },
+ {
+ "epoch": 34.537142857142854,
+ "grad_norm": 61.66749954223633,
+ "learning_rate": 1.718095238095238e-05,
+ "loss": 0.2082,
+ "step": 6044
+ },
+ {
+ "epoch": 34.542857142857144,
+ "grad_norm": 38.39115905761719,
+ "learning_rate": 1.7174603174603175e-05,
+ "loss": 0.1778,
+ "step": 6045
+ },
+ {
+ "epoch": 34.54857142857143,
+ "grad_norm": 24.32440757751465,
+ "learning_rate": 1.7168253968253968e-05,
+ "loss": 0.2897,
+ "step": 6046
+ },
+ {
+ "epoch": 34.55428571428571,
+ "grad_norm": 48.9962272644043,
+ "learning_rate": 1.7161904761904764e-05,
+ "loss": 0.1896,
+ "step": 6047
+ },
+ {
+ "epoch": 34.56,
+ "grad_norm": 40.588623046875,
+ "learning_rate": 1.7155555555555557e-05,
+ "loss": 0.2037,
+ "step": 6048
+ },
+ {
+ "epoch": 34.565714285714286,
+ "grad_norm": 105.08045196533203,
+ "learning_rate": 1.7149206349206353e-05,
+ "loss": 0.1645,
+ "step": 6049
+ },
+ {
+ "epoch": 34.57142857142857,
+ "grad_norm": 84.69966888427734,
+ "learning_rate": 1.7142857142857145e-05,
+ "loss": 0.1833,
+ "step": 6050
+ },
+ {
+ "epoch": 34.57714285714286,
+ "grad_norm": 46.13557815551758,
+ "learning_rate": 1.7136507936507938e-05,
+ "loss": 0.2534,
+ "step": 6051
+ },
+ {
+ "epoch": 34.582857142857144,
+ "grad_norm": 92.61463928222656,
+ "learning_rate": 1.713015873015873e-05,
+ "loss": 0.2749,
+ "step": 6052
+ },
+ {
+ "epoch": 34.58857142857143,
+ "grad_norm": 27.787425994873047,
+ "learning_rate": 1.7123809523809527e-05,
+ "loss": 0.2859,
+ "step": 6053
+ },
+ {
+ "epoch": 34.59428571428572,
+ "grad_norm": 22.343608856201172,
+ "learning_rate": 1.711746031746032e-05,
+ "loss": 0.1846,
+ "step": 6054
+ },
+ {
+ "epoch": 34.6,
+ "grad_norm": 104.19557189941406,
+ "learning_rate": 1.7111111111111112e-05,
+ "loss": 0.3087,
+ "step": 6055
+ },
+ {
+ "epoch": 34.605714285714285,
+ "grad_norm": 107.66898345947266,
+ "learning_rate": 1.7104761904761908e-05,
+ "loss": 0.2458,
+ "step": 6056
+ },
+ {
+ "epoch": 34.61142857142857,
+ "grad_norm": 34.40952682495117,
+ "learning_rate": 1.70984126984127e-05,
+ "loss": 0.3493,
+ "step": 6057
+ },
+ {
+ "epoch": 34.61714285714286,
+ "grad_norm": 47.86616516113281,
+ "learning_rate": 1.7092063492063493e-05,
+ "loss": 0.2501,
+ "step": 6058
+ },
+ {
+ "epoch": 34.62285714285714,
+ "grad_norm": 35.95183181762695,
+ "learning_rate": 1.7085714285714286e-05,
+ "loss": 0.1773,
+ "step": 6059
+ },
+ {
+ "epoch": 34.628571428571426,
+ "grad_norm": 237.09732055664062,
+ "learning_rate": 1.7079365079365082e-05,
+ "loss": 0.2854,
+ "step": 6060
+ },
+ {
+ "epoch": 34.63428571428572,
+ "grad_norm": 42.27085876464844,
+ "learning_rate": 1.7073015873015874e-05,
+ "loss": 0.2858,
+ "step": 6061
+ },
+ {
+ "epoch": 34.64,
+ "grad_norm": 36.25700759887695,
+ "learning_rate": 1.7066666666666667e-05,
+ "loss": 0.1894,
+ "step": 6062
+ },
+ {
+ "epoch": 34.645714285714284,
+ "grad_norm": 50.62672805786133,
+ "learning_rate": 1.706031746031746e-05,
+ "loss": 0.2577,
+ "step": 6063
+ },
+ {
+ "epoch": 34.651428571428575,
+ "grad_norm": 237.9781951904297,
+ "learning_rate": 1.7053968253968256e-05,
+ "loss": 0.2226,
+ "step": 6064
+ },
+ {
+ "epoch": 34.65714285714286,
+ "grad_norm": 22.847118377685547,
+ "learning_rate": 1.704761904761905e-05,
+ "loss": 0.1686,
+ "step": 6065
+ },
+ {
+ "epoch": 34.66285714285714,
+ "grad_norm": 89.11709594726562,
+ "learning_rate": 1.704126984126984e-05,
+ "loss": 0.1984,
+ "step": 6066
+ },
+ {
+ "epoch": 34.668571428571425,
+ "grad_norm": 40.28227233886719,
+ "learning_rate": 1.7034920634920637e-05,
+ "loss": 0.2338,
+ "step": 6067
+ },
+ {
+ "epoch": 34.674285714285716,
+ "grad_norm": 40.13176727294922,
+ "learning_rate": 1.702857142857143e-05,
+ "loss": 0.2999,
+ "step": 6068
+ },
+ {
+ "epoch": 34.68,
+ "grad_norm": 40.50751495361328,
+ "learning_rate": 1.7022222222222222e-05,
+ "loss": 0.2266,
+ "step": 6069
+ },
+ {
+ "epoch": 34.68571428571428,
+ "grad_norm": 207.8365020751953,
+ "learning_rate": 1.7015873015873015e-05,
+ "loss": 0.2694,
+ "step": 6070
+ },
+ {
+ "epoch": 34.691428571428574,
+ "grad_norm": 78.13711547851562,
+ "learning_rate": 1.700952380952381e-05,
+ "loss": 0.1755,
+ "step": 6071
+ },
+ {
+ "epoch": 34.69714285714286,
+ "grad_norm": 49.28493881225586,
+ "learning_rate": 1.7003174603174604e-05,
+ "loss": 0.2557,
+ "step": 6072
+ },
+ {
+ "epoch": 34.70285714285714,
+ "grad_norm": 22.53261375427246,
+ "learning_rate": 1.6996825396825396e-05,
+ "loss": 0.2611,
+ "step": 6073
+ },
+ {
+ "epoch": 34.70857142857143,
+ "grad_norm": 27.282390594482422,
+ "learning_rate": 1.699047619047619e-05,
+ "loss": 0.2037,
+ "step": 6074
+ },
+ {
+ "epoch": 34.714285714285715,
+ "grad_norm": 126.64105987548828,
+ "learning_rate": 1.6984126984126985e-05,
+ "loss": 0.2526,
+ "step": 6075
+ },
+ {
+ "epoch": 34.72,
+ "grad_norm": 31.60548973083496,
+ "learning_rate": 1.6977777777777777e-05,
+ "loss": 0.2563,
+ "step": 6076
+ },
+ {
+ "epoch": 34.72571428571428,
+ "grad_norm": 29.386547088623047,
+ "learning_rate": 1.697142857142857e-05,
+ "loss": 0.2285,
+ "step": 6077
+ },
+ {
+ "epoch": 34.73142857142857,
+ "grad_norm": 55.01473617553711,
+ "learning_rate": 1.6965079365079366e-05,
+ "loss": 0.1949,
+ "step": 6078
+ },
+ {
+ "epoch": 34.73714285714286,
+ "grad_norm": 55.4376220703125,
+ "learning_rate": 1.695873015873016e-05,
+ "loss": 0.1849,
+ "step": 6079
+ },
+ {
+ "epoch": 34.74285714285714,
+ "grad_norm": 22.210613250732422,
+ "learning_rate": 1.695238095238095e-05,
+ "loss": 0.2912,
+ "step": 6080
+ },
+ {
+ "epoch": 34.74857142857143,
+ "grad_norm": 121.5806884765625,
+ "learning_rate": 1.6946031746031747e-05,
+ "loss": 0.2269,
+ "step": 6081
+ },
+ {
+ "epoch": 34.754285714285714,
+ "grad_norm": 75.6827621459961,
+ "learning_rate": 1.693968253968254e-05,
+ "loss": 0.2901,
+ "step": 6082
+ },
+ {
+ "epoch": 34.76,
+ "grad_norm": 66.75840759277344,
+ "learning_rate": 1.6933333333333333e-05,
+ "loss": 0.2228,
+ "step": 6083
+ },
+ {
+ "epoch": 34.76571428571429,
+ "grad_norm": 46.37446975708008,
+ "learning_rate": 1.692698412698413e-05,
+ "loss": 0.156,
+ "step": 6084
+ },
+ {
+ "epoch": 34.77142857142857,
+ "grad_norm": 45.42128372192383,
+ "learning_rate": 1.692063492063492e-05,
+ "loss": 0.2107,
+ "step": 6085
+ },
+ {
+ "epoch": 34.777142857142856,
+ "grad_norm": 41.54645919799805,
+ "learning_rate": 1.6914285714285717e-05,
+ "loss": 0.1854,
+ "step": 6086
+ },
+ {
+ "epoch": 34.78285714285714,
+ "grad_norm": 86.29766082763672,
+ "learning_rate": 1.690793650793651e-05,
+ "loss": 0.2166,
+ "step": 6087
+ },
+ {
+ "epoch": 34.78857142857143,
+ "grad_norm": 104.64183044433594,
+ "learning_rate": 1.6901587301587303e-05,
+ "loss": 0.2194,
+ "step": 6088
+ },
+ {
+ "epoch": 34.794285714285714,
+ "grad_norm": 42.5262451171875,
+ "learning_rate": 1.68952380952381e-05,
+ "loss": 0.2445,
+ "step": 6089
+ },
+ {
+ "epoch": 34.8,
+ "grad_norm": 23.71393585205078,
+ "learning_rate": 1.688888888888889e-05,
+ "loss": 0.1826,
+ "step": 6090
+ },
+ {
+ "epoch": 34.80571428571429,
+ "grad_norm": 39.36594009399414,
+ "learning_rate": 1.6882539682539684e-05,
+ "loss": 0.2796,
+ "step": 6091
+ },
+ {
+ "epoch": 34.81142857142857,
+ "grad_norm": 91.96893310546875,
+ "learning_rate": 1.6876190476190477e-05,
+ "loss": 0.1858,
+ "step": 6092
+ },
+ {
+ "epoch": 34.817142857142855,
+ "grad_norm": 252.6929168701172,
+ "learning_rate": 1.6869841269841273e-05,
+ "loss": 0.1824,
+ "step": 6093
+ },
+ {
+ "epoch": 34.822857142857146,
+ "grad_norm": 145.4113006591797,
+ "learning_rate": 1.6863492063492065e-05,
+ "loss": 0.2359,
+ "step": 6094
+ },
+ {
+ "epoch": 34.82857142857143,
+ "grad_norm": 70.6640396118164,
+ "learning_rate": 1.6857142857142858e-05,
+ "loss": 0.1619,
+ "step": 6095
+ },
+ {
+ "epoch": 34.83428571428571,
+ "grad_norm": 28.273479461669922,
+ "learning_rate": 1.685079365079365e-05,
+ "loss": 0.1944,
+ "step": 6096
+ },
+ {
+ "epoch": 34.84,
+ "grad_norm": 63.15889358520508,
+ "learning_rate": 1.6844444444444447e-05,
+ "loss": 0.2183,
+ "step": 6097
+ },
+ {
+ "epoch": 34.84571428571429,
+ "grad_norm": 73.52220916748047,
+ "learning_rate": 1.683809523809524e-05,
+ "loss": 0.1997,
+ "step": 6098
+ },
+ {
+ "epoch": 34.85142857142857,
+ "grad_norm": 17.628963470458984,
+ "learning_rate": 1.6831746031746032e-05,
+ "loss": 0.1901,
+ "step": 6099
+ },
+ {
+ "epoch": 34.857142857142854,
+ "grad_norm": 35.90450668334961,
+ "learning_rate": 1.6825396825396828e-05,
+ "loss": 0.2232,
+ "step": 6100
+ },
+ {
+ "epoch": 34.862857142857145,
+ "grad_norm": 57.68938446044922,
+ "learning_rate": 1.681904761904762e-05,
+ "loss": 0.3033,
+ "step": 6101
+ },
+ {
+ "epoch": 34.86857142857143,
+ "grad_norm": 53.79425811767578,
+ "learning_rate": 1.6812698412698413e-05,
+ "loss": 0.2516,
+ "step": 6102
+ },
+ {
+ "epoch": 34.87428571428571,
+ "grad_norm": 68.87535095214844,
+ "learning_rate": 1.6806349206349206e-05,
+ "loss": 0.2352,
+ "step": 6103
+ },
+ {
+ "epoch": 34.88,
+ "grad_norm": 74.88392639160156,
+ "learning_rate": 1.6800000000000002e-05,
+ "loss": 0.2601,
+ "step": 6104
+ },
+ {
+ "epoch": 34.885714285714286,
+ "grad_norm": 85.81791687011719,
+ "learning_rate": 1.6793650793650794e-05,
+ "loss": 0.1613,
+ "step": 6105
+ },
+ {
+ "epoch": 34.89142857142857,
+ "grad_norm": 45.918296813964844,
+ "learning_rate": 1.6787301587301587e-05,
+ "loss": 0.2329,
+ "step": 6106
+ },
+ {
+ "epoch": 34.89714285714286,
+ "grad_norm": 36.77676010131836,
+ "learning_rate": 1.678095238095238e-05,
+ "loss": 0.268,
+ "step": 6107
+ },
+ {
+ "epoch": 34.902857142857144,
+ "grad_norm": 26.147130966186523,
+ "learning_rate": 1.6774603174603176e-05,
+ "loss": 0.2399,
+ "step": 6108
+ },
+ {
+ "epoch": 34.90857142857143,
+ "grad_norm": 57.6148681640625,
+ "learning_rate": 1.676825396825397e-05,
+ "loss": 0.3074,
+ "step": 6109
+ },
+ {
+ "epoch": 34.91428571428571,
+ "grad_norm": 25.31715202331543,
+ "learning_rate": 1.676190476190476e-05,
+ "loss": 0.1813,
+ "step": 6110
+ },
+ {
+ "epoch": 34.92,
+ "grad_norm": 101.95768737792969,
+ "learning_rate": 1.6755555555555557e-05,
+ "loss": 0.3566,
+ "step": 6111
+ },
+ {
+ "epoch": 34.925714285714285,
+ "grad_norm": 41.54034423828125,
+ "learning_rate": 1.674920634920635e-05,
+ "loss": 0.1578,
+ "step": 6112
+ },
+ {
+ "epoch": 34.93142857142857,
+ "grad_norm": 130.59205627441406,
+ "learning_rate": 1.6742857142857142e-05,
+ "loss": 0.3017,
+ "step": 6113
+ },
+ {
+ "epoch": 34.93714285714286,
+ "grad_norm": 64.66847229003906,
+ "learning_rate": 1.6736507936507935e-05,
+ "loss": 0.2254,
+ "step": 6114
+ },
+ {
+ "epoch": 34.94285714285714,
+ "grad_norm": 250.42233276367188,
+ "learning_rate": 1.673015873015873e-05,
+ "loss": 0.3019,
+ "step": 6115
+ },
+ {
+ "epoch": 34.94857142857143,
+ "grad_norm": 75.5509033203125,
+ "learning_rate": 1.6723809523809524e-05,
+ "loss": 0.2631,
+ "step": 6116
+ },
+ {
+ "epoch": 34.95428571428572,
+ "grad_norm": 421.3606262207031,
+ "learning_rate": 1.6717460317460316e-05,
+ "loss": 0.2681,
+ "step": 6117
+ },
+ {
+ "epoch": 34.96,
+ "grad_norm": 41.41328048706055,
+ "learning_rate": 1.6711111111111112e-05,
+ "loss": 0.2661,
+ "step": 6118
+ },
+ {
+ "epoch": 34.965714285714284,
+ "grad_norm": 49.59128952026367,
+ "learning_rate": 1.6704761904761905e-05,
+ "loss": 0.2326,
+ "step": 6119
+ },
+ {
+ "epoch": 34.97142857142857,
+ "grad_norm": 71.13414764404297,
+ "learning_rate": 1.66984126984127e-05,
+ "loss": 0.1903,
+ "step": 6120
+ },
+ {
+ "epoch": 34.97714285714286,
+ "grad_norm": 66.1566390991211,
+ "learning_rate": 1.6692063492063494e-05,
+ "loss": 0.1926,
+ "step": 6121
+ },
+ {
+ "epoch": 34.98285714285714,
+ "grad_norm": 36.81869888305664,
+ "learning_rate": 1.6685714285714286e-05,
+ "loss": 0.1765,
+ "step": 6122
+ },
+ {
+ "epoch": 34.988571428571426,
+ "grad_norm": 32.54508590698242,
+ "learning_rate": 1.6679365079365082e-05,
+ "loss": 0.1183,
+ "step": 6123
+ },
+ {
+ "epoch": 34.994285714285716,
+ "grad_norm": 60.68364334106445,
+ "learning_rate": 1.6673015873015875e-05,
+ "loss": 0.2048,
+ "step": 6124
+ },
+ {
+ "epoch": 35.0,
+ "grad_norm": 39.2964973449707,
+ "learning_rate": 1.6666666666666667e-05,
+ "loss": 0.2149,
+ "step": 6125
+ },
+ {
+ "epoch": 35.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5942349433898926,
+ "eval_map": 0.9361,
+ "eval_map_50": 0.9684,
+ "eval_map_75": 0.9612,
+ "eval_map_large": 0.9361,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9361,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7917,
+ "eval_mar_10": 0.973,
+ "eval_mar_100": 0.9756,
+ "eval_mar_100_per_class": 0.9756,
+ "eval_mar_large": 0.9756,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.3791,
+ "eval_samples_per_second": 21.975,
+ "eval_steps_per_second": 2.766,
+ "step": 6125
+ },
+ {
+ "epoch": 35.005714285714284,
+ "grad_norm": 73.07441711425781,
+ "learning_rate": 1.6660317460317463e-05,
+ "loss": 0.1722,
+ "step": 6126
+ },
+ {
+ "epoch": 35.011428571428574,
+ "grad_norm": 30.19472312927246,
+ "learning_rate": 1.6653968253968256e-05,
+ "loss": 0.1864,
+ "step": 6127
+ },
+ {
+ "epoch": 35.01714285714286,
+ "grad_norm": 97.52613830566406,
+ "learning_rate": 1.664761904761905e-05,
+ "loss": 0.1768,
+ "step": 6128
+ },
+ {
+ "epoch": 35.02285714285714,
+ "grad_norm": 70.80941009521484,
+ "learning_rate": 1.664126984126984e-05,
+ "loss": 0.2576,
+ "step": 6129
+ },
+ {
+ "epoch": 35.02857142857143,
+ "grad_norm": 31.63057518005371,
+ "learning_rate": 1.6634920634920637e-05,
+ "loss": 0.1829,
+ "step": 6130
+ },
+ {
+ "epoch": 35.034285714285716,
+ "grad_norm": 57.87783432006836,
+ "learning_rate": 1.662857142857143e-05,
+ "loss": 0.1627,
+ "step": 6131
+ },
+ {
+ "epoch": 35.04,
+ "grad_norm": 25.989816665649414,
+ "learning_rate": 1.6622222222222223e-05,
+ "loss": 0.2545,
+ "step": 6132
+ },
+ {
+ "epoch": 35.04571428571428,
+ "grad_norm": 74.0960922241211,
+ "learning_rate": 1.661587301587302e-05,
+ "loss": 0.2245,
+ "step": 6133
+ },
+ {
+ "epoch": 35.05142857142857,
+ "grad_norm": 36.5538215637207,
+ "learning_rate": 1.660952380952381e-05,
+ "loss": 0.1737,
+ "step": 6134
+ },
+ {
+ "epoch": 35.05714285714286,
+ "grad_norm": 102.22315979003906,
+ "learning_rate": 1.6603174603174604e-05,
+ "loss": 0.1774,
+ "step": 6135
+ },
+ {
+ "epoch": 35.06285714285714,
+ "grad_norm": 20.221057891845703,
+ "learning_rate": 1.6596825396825397e-05,
+ "loss": 0.2787,
+ "step": 6136
+ },
+ {
+ "epoch": 35.06857142857143,
+ "grad_norm": 22.389572143554688,
+ "learning_rate": 1.6590476190476193e-05,
+ "loss": 0.169,
+ "step": 6137
+ },
+ {
+ "epoch": 35.074285714285715,
+ "grad_norm": 134.66050720214844,
+ "learning_rate": 1.6584126984126985e-05,
+ "loss": 0.2379,
+ "step": 6138
+ },
+ {
+ "epoch": 35.08,
+ "grad_norm": 68.95519256591797,
+ "learning_rate": 1.6577777777777778e-05,
+ "loss": 0.1671,
+ "step": 6139
+ },
+ {
+ "epoch": 35.08571428571429,
+ "grad_norm": 84.9073257446289,
+ "learning_rate": 1.657142857142857e-05,
+ "loss": 0.1476,
+ "step": 6140
+ },
+ {
+ "epoch": 35.09142857142857,
+ "grad_norm": 50.36892318725586,
+ "learning_rate": 1.6565079365079367e-05,
+ "loss": 0.2112,
+ "step": 6141
+ },
+ {
+ "epoch": 35.097142857142856,
+ "grad_norm": 103.01759338378906,
+ "learning_rate": 1.655873015873016e-05,
+ "loss": 0.1881,
+ "step": 6142
+ },
+ {
+ "epoch": 35.10285714285714,
+ "grad_norm": 31.765804290771484,
+ "learning_rate": 1.6552380952380952e-05,
+ "loss": 0.1686,
+ "step": 6143
+ },
+ {
+ "epoch": 35.10857142857143,
+ "grad_norm": 32.143699645996094,
+ "learning_rate": 1.6546031746031744e-05,
+ "loss": 0.25,
+ "step": 6144
+ },
+ {
+ "epoch": 35.114285714285714,
+ "grad_norm": 24.49603271484375,
+ "learning_rate": 1.653968253968254e-05,
+ "loss": 0.3411,
+ "step": 6145
+ },
+ {
+ "epoch": 35.12,
+ "grad_norm": 67.13269805908203,
+ "learning_rate": 1.6533333333333333e-05,
+ "loss": 0.1517,
+ "step": 6146
+ },
+ {
+ "epoch": 35.12571428571429,
+ "grad_norm": 63.90729904174805,
+ "learning_rate": 1.6526984126984126e-05,
+ "loss": 0.1716,
+ "step": 6147
+ },
+ {
+ "epoch": 35.13142857142857,
+ "grad_norm": 55.39335250854492,
+ "learning_rate": 1.6520634920634922e-05,
+ "loss": 0.2807,
+ "step": 6148
+ },
+ {
+ "epoch": 35.137142857142855,
+ "grad_norm": 73.53040313720703,
+ "learning_rate": 1.6514285714285714e-05,
+ "loss": 0.2062,
+ "step": 6149
+ },
+ {
+ "epoch": 35.142857142857146,
+ "grad_norm": 97.12939453125,
+ "learning_rate": 1.6507936507936507e-05,
+ "loss": 0.1838,
+ "step": 6150
+ },
+ {
+ "epoch": 35.14857142857143,
+ "grad_norm": 41.98698043823242,
+ "learning_rate": 1.65015873015873e-05,
+ "loss": 0.2443,
+ "step": 6151
+ },
+ {
+ "epoch": 35.15428571428571,
+ "grad_norm": 50.828304290771484,
+ "learning_rate": 1.6495238095238096e-05,
+ "loss": 0.1269,
+ "step": 6152
+ },
+ {
+ "epoch": 35.16,
+ "grad_norm": 53.13596725463867,
+ "learning_rate": 1.648888888888889e-05,
+ "loss": 0.3944,
+ "step": 6153
+ },
+ {
+ "epoch": 35.16571428571429,
+ "grad_norm": 100.52580261230469,
+ "learning_rate": 1.6482539682539684e-05,
+ "loss": 0.2598,
+ "step": 6154
+ },
+ {
+ "epoch": 35.17142857142857,
+ "grad_norm": 126.7484130859375,
+ "learning_rate": 1.6476190476190477e-05,
+ "loss": 0.2223,
+ "step": 6155
+ },
+ {
+ "epoch": 35.177142857142854,
+ "grad_norm": 38.768341064453125,
+ "learning_rate": 1.646984126984127e-05,
+ "loss": 0.2195,
+ "step": 6156
+ },
+ {
+ "epoch": 35.182857142857145,
+ "grad_norm": 65.1585693359375,
+ "learning_rate": 1.6463492063492066e-05,
+ "loss": 0.2112,
+ "step": 6157
+ },
+ {
+ "epoch": 35.18857142857143,
+ "grad_norm": 60.47264099121094,
+ "learning_rate": 1.645714285714286e-05,
+ "loss": 0.1769,
+ "step": 6158
+ },
+ {
+ "epoch": 35.19428571428571,
+ "grad_norm": 48.974544525146484,
+ "learning_rate": 1.6450793650793654e-05,
+ "loss": 0.2144,
+ "step": 6159
+ },
+ {
+ "epoch": 35.2,
+ "grad_norm": 93.70869445800781,
+ "learning_rate": 1.6444444444444447e-05,
+ "loss": 0.2033,
+ "step": 6160
+ },
+ {
+ "epoch": 35.205714285714286,
+ "grad_norm": 50.43521499633789,
+ "learning_rate": 1.643809523809524e-05,
+ "loss": 0.1514,
+ "step": 6161
+ },
+ {
+ "epoch": 35.21142857142857,
+ "grad_norm": 19.45742416381836,
+ "learning_rate": 1.6431746031746032e-05,
+ "loss": 0.2133,
+ "step": 6162
+ },
+ {
+ "epoch": 35.21714285714286,
+ "grad_norm": 35.2542839050293,
+ "learning_rate": 1.6425396825396828e-05,
+ "loss": 0.181,
+ "step": 6163
+ },
+ {
+ "epoch": 35.222857142857144,
+ "grad_norm": 51.425270080566406,
+ "learning_rate": 1.641904761904762e-05,
+ "loss": 0.2242,
+ "step": 6164
+ },
+ {
+ "epoch": 35.22857142857143,
+ "grad_norm": 49.880558013916016,
+ "learning_rate": 1.6412698412698414e-05,
+ "loss": 0.3576,
+ "step": 6165
+ },
+ {
+ "epoch": 35.23428571428571,
+ "grad_norm": 66.03699493408203,
+ "learning_rate": 1.6406349206349206e-05,
+ "loss": 0.2637,
+ "step": 6166
+ },
+ {
+ "epoch": 35.24,
+ "grad_norm": 75.09126281738281,
+ "learning_rate": 1.6400000000000002e-05,
+ "loss": 0.2208,
+ "step": 6167
+ },
+ {
+ "epoch": 35.245714285714286,
+ "grad_norm": 47.65190124511719,
+ "learning_rate": 1.6393650793650795e-05,
+ "loss": 0.1784,
+ "step": 6168
+ },
+ {
+ "epoch": 35.25142857142857,
+ "grad_norm": 45.626792907714844,
+ "learning_rate": 1.6387301587301587e-05,
+ "loss": 0.2506,
+ "step": 6169
+ },
+ {
+ "epoch": 35.25714285714286,
+ "grad_norm": 35.04150390625,
+ "learning_rate": 1.6380952380952384e-05,
+ "loss": 0.2119,
+ "step": 6170
+ },
+ {
+ "epoch": 35.26285714285714,
+ "grad_norm": 24.93942642211914,
+ "learning_rate": 1.6374603174603176e-05,
+ "loss": 0.2261,
+ "step": 6171
+ },
+ {
+ "epoch": 35.26857142857143,
+ "grad_norm": 501.3238830566406,
+ "learning_rate": 1.636825396825397e-05,
+ "loss": 0.2102,
+ "step": 6172
+ },
+ {
+ "epoch": 35.27428571428572,
+ "grad_norm": 31.831348419189453,
+ "learning_rate": 1.636190476190476e-05,
+ "loss": 0.1745,
+ "step": 6173
+ },
+ {
+ "epoch": 35.28,
+ "grad_norm": 43.44385528564453,
+ "learning_rate": 1.6355555555555557e-05,
+ "loss": 0.1931,
+ "step": 6174
+ },
+ {
+ "epoch": 35.285714285714285,
+ "grad_norm": 58.899925231933594,
+ "learning_rate": 1.634920634920635e-05,
+ "loss": 0.2839,
+ "step": 6175
+ },
+ {
+ "epoch": 35.29142857142857,
+ "grad_norm": 32.3404655456543,
+ "learning_rate": 1.6342857142857143e-05,
+ "loss": 0.3113,
+ "step": 6176
+ },
+ {
+ "epoch": 35.29714285714286,
+ "grad_norm": 39.052772521972656,
+ "learning_rate": 1.6336507936507935e-05,
+ "loss": 0.1863,
+ "step": 6177
+ },
+ {
+ "epoch": 35.30285714285714,
+ "grad_norm": 23.971147537231445,
+ "learning_rate": 1.633015873015873e-05,
+ "loss": 0.1415,
+ "step": 6178
+ },
+ {
+ "epoch": 35.308571428571426,
+ "grad_norm": 1262.6932373046875,
+ "learning_rate": 1.6323809523809524e-05,
+ "loss": 0.1709,
+ "step": 6179
+ },
+ {
+ "epoch": 35.31428571428572,
+ "grad_norm": 39.73323059082031,
+ "learning_rate": 1.6317460317460317e-05,
+ "loss": 0.2033,
+ "step": 6180
+ },
+ {
+ "epoch": 35.32,
+ "grad_norm": 20.002708435058594,
+ "learning_rate": 1.6311111111111113e-05,
+ "loss": 0.2234,
+ "step": 6181
+ },
+ {
+ "epoch": 35.325714285714284,
+ "grad_norm": 43.954872131347656,
+ "learning_rate": 1.6304761904761905e-05,
+ "loss": 0.2385,
+ "step": 6182
+ },
+ {
+ "epoch": 35.331428571428575,
+ "grad_norm": 58.16795349121094,
+ "learning_rate": 1.6298412698412698e-05,
+ "loss": 0.2301,
+ "step": 6183
+ },
+ {
+ "epoch": 35.33714285714286,
+ "grad_norm": 22.84110450744629,
+ "learning_rate": 1.629206349206349e-05,
+ "loss": 0.1901,
+ "step": 6184
+ },
+ {
+ "epoch": 35.34285714285714,
+ "grad_norm": 18.52191925048828,
+ "learning_rate": 1.6285714285714287e-05,
+ "loss": 0.2187,
+ "step": 6185
+ },
+ {
+ "epoch": 35.348571428571425,
+ "grad_norm": 75.45260620117188,
+ "learning_rate": 1.627936507936508e-05,
+ "loss": 0.279,
+ "step": 6186
+ },
+ {
+ "epoch": 35.354285714285716,
+ "grad_norm": 30.47543716430664,
+ "learning_rate": 1.6273015873015872e-05,
+ "loss": 0.2449,
+ "step": 6187
+ },
+ {
+ "epoch": 35.36,
+ "grad_norm": 184.71424865722656,
+ "learning_rate": 1.6266666666666665e-05,
+ "loss": 0.2356,
+ "step": 6188
+ },
+ {
+ "epoch": 35.36571428571428,
+ "grad_norm": 43.9375114440918,
+ "learning_rate": 1.626031746031746e-05,
+ "loss": 0.2007,
+ "step": 6189
+ },
+ {
+ "epoch": 35.371428571428574,
+ "grad_norm": 59.61788558959961,
+ "learning_rate": 1.6253968253968253e-05,
+ "loss": 0.2017,
+ "step": 6190
+ },
+ {
+ "epoch": 35.37714285714286,
+ "grad_norm": 32.35559844970703,
+ "learning_rate": 1.624761904761905e-05,
+ "loss": 0.2449,
+ "step": 6191
+ },
+ {
+ "epoch": 35.38285714285714,
+ "grad_norm": 29.847366333007812,
+ "learning_rate": 1.6241269841269842e-05,
+ "loss": 0.1943,
+ "step": 6192
+ },
+ {
+ "epoch": 35.38857142857143,
+ "grad_norm": 41.44870376586914,
+ "learning_rate": 1.6234920634920638e-05,
+ "loss": 0.1667,
+ "step": 6193
+ },
+ {
+ "epoch": 35.394285714285715,
+ "grad_norm": 82.03839874267578,
+ "learning_rate": 1.622857142857143e-05,
+ "loss": 0.1517,
+ "step": 6194
+ },
+ {
+ "epoch": 35.4,
+ "grad_norm": 37.83160400390625,
+ "learning_rate": 1.6222222222222223e-05,
+ "loss": 0.1896,
+ "step": 6195
+ },
+ {
+ "epoch": 35.40571428571428,
+ "grad_norm": 14.38239574432373,
+ "learning_rate": 1.621587301587302e-05,
+ "loss": 0.2064,
+ "step": 6196
+ },
+ {
+ "epoch": 35.41142857142857,
+ "grad_norm": 31.069854736328125,
+ "learning_rate": 1.6209523809523812e-05,
+ "loss": 0.2047,
+ "step": 6197
+ },
+ {
+ "epoch": 35.417142857142856,
+ "grad_norm": 26.791519165039062,
+ "learning_rate": 1.6203174603174604e-05,
+ "loss": 0.1788,
+ "step": 6198
+ },
+ {
+ "epoch": 35.42285714285714,
+ "grad_norm": 68.69419860839844,
+ "learning_rate": 1.6196825396825397e-05,
+ "loss": 0.1696,
+ "step": 6199
+ },
+ {
+ "epoch": 35.42857142857143,
+ "grad_norm": 41.01641845703125,
+ "learning_rate": 1.6190476190476193e-05,
+ "loss": 0.2361,
+ "step": 6200
+ },
+ {
+ "epoch": 35.434285714285714,
+ "grad_norm": 23.218536376953125,
+ "learning_rate": 1.6184126984126986e-05,
+ "loss": 0.2061,
+ "step": 6201
+ },
+ {
+ "epoch": 35.44,
+ "grad_norm": 580.6075439453125,
+ "learning_rate": 1.617777777777778e-05,
+ "loss": 0.2366,
+ "step": 6202
+ },
+ {
+ "epoch": 35.44571428571429,
+ "grad_norm": 44.08100891113281,
+ "learning_rate": 1.6171428571428574e-05,
+ "loss": 0.1888,
+ "step": 6203
+ },
+ {
+ "epoch": 35.45142857142857,
+ "grad_norm": 56.76536178588867,
+ "learning_rate": 1.6165079365079367e-05,
+ "loss": 0.2554,
+ "step": 6204
+ },
+ {
+ "epoch": 35.457142857142856,
+ "grad_norm": 51.591609954833984,
+ "learning_rate": 1.615873015873016e-05,
+ "loss": 0.2221,
+ "step": 6205
+ },
+ {
+ "epoch": 35.462857142857146,
+ "grad_norm": 29.525714874267578,
+ "learning_rate": 1.6152380952380952e-05,
+ "loss": 0.2147,
+ "step": 6206
+ },
+ {
+ "epoch": 35.46857142857143,
+ "grad_norm": 36.911781311035156,
+ "learning_rate": 1.614603174603175e-05,
+ "loss": 0.2882,
+ "step": 6207
+ },
+ {
+ "epoch": 35.47428571428571,
+ "grad_norm": 44.829017639160156,
+ "learning_rate": 1.613968253968254e-05,
+ "loss": 0.2356,
+ "step": 6208
+ },
+ {
+ "epoch": 35.48,
+ "grad_norm": 50.94928741455078,
+ "learning_rate": 1.6133333333333334e-05,
+ "loss": 0.2629,
+ "step": 6209
+ },
+ {
+ "epoch": 35.48571428571429,
+ "grad_norm": 33.35788345336914,
+ "learning_rate": 1.6126984126984126e-05,
+ "loss": 0.2352,
+ "step": 6210
+ },
+ {
+ "epoch": 35.49142857142857,
+ "grad_norm": 33.71529769897461,
+ "learning_rate": 1.6120634920634922e-05,
+ "loss": 0.3129,
+ "step": 6211
+ },
+ {
+ "epoch": 35.497142857142855,
+ "grad_norm": 28.31825065612793,
+ "learning_rate": 1.6114285714285715e-05,
+ "loss": 0.2175,
+ "step": 6212
+ },
+ {
+ "epoch": 35.502857142857145,
+ "grad_norm": 30.960386276245117,
+ "learning_rate": 1.6107936507936508e-05,
+ "loss": 0.1477,
+ "step": 6213
+ },
+ {
+ "epoch": 35.50857142857143,
+ "grad_norm": 33.10350799560547,
+ "learning_rate": 1.6101587301587304e-05,
+ "loss": 0.2215,
+ "step": 6214
+ },
+ {
+ "epoch": 35.51428571428571,
+ "grad_norm": 41.282249450683594,
+ "learning_rate": 1.6095238095238096e-05,
+ "loss": 0.3657,
+ "step": 6215
+ },
+ {
+ "epoch": 35.52,
+ "grad_norm": 53.24930953979492,
+ "learning_rate": 1.608888888888889e-05,
+ "loss": 0.2471,
+ "step": 6216
+ },
+ {
+ "epoch": 35.52571428571429,
+ "grad_norm": 37.373260498046875,
+ "learning_rate": 1.608253968253968e-05,
+ "loss": 0.1919,
+ "step": 6217
+ },
+ {
+ "epoch": 35.53142857142857,
+ "grad_norm": 35.30415344238281,
+ "learning_rate": 1.6076190476190477e-05,
+ "loss": 0.2584,
+ "step": 6218
+ },
+ {
+ "epoch": 35.537142857142854,
+ "grad_norm": 58.4691047668457,
+ "learning_rate": 1.606984126984127e-05,
+ "loss": 0.3271,
+ "step": 6219
+ },
+ {
+ "epoch": 35.542857142857144,
+ "grad_norm": 85.24468994140625,
+ "learning_rate": 1.6063492063492063e-05,
+ "loss": 0.1821,
+ "step": 6220
+ },
+ {
+ "epoch": 35.54857142857143,
+ "grad_norm": 30.816890716552734,
+ "learning_rate": 1.6057142857142855e-05,
+ "loss": 0.1627,
+ "step": 6221
+ },
+ {
+ "epoch": 35.55428571428571,
+ "grad_norm": 771.9216918945312,
+ "learning_rate": 1.605079365079365e-05,
+ "loss": 0.2432,
+ "step": 6222
+ },
+ {
+ "epoch": 35.56,
+ "grad_norm": 49.051143646240234,
+ "learning_rate": 1.6044444444444444e-05,
+ "loss": 0.1995,
+ "step": 6223
+ },
+ {
+ "epoch": 35.565714285714286,
+ "grad_norm": 98.13201904296875,
+ "learning_rate": 1.6038095238095237e-05,
+ "loss": 0.3006,
+ "step": 6224
+ },
+ {
+ "epoch": 35.57142857142857,
+ "grad_norm": 24.999088287353516,
+ "learning_rate": 1.6031746031746033e-05,
+ "loss": 0.2711,
+ "step": 6225
+ },
+ {
+ "epoch": 35.57714285714286,
+ "grad_norm": 37.10238265991211,
+ "learning_rate": 1.6025396825396825e-05,
+ "loss": 0.1812,
+ "step": 6226
+ },
+ {
+ "epoch": 35.582857142857144,
+ "grad_norm": 40.84230041503906,
+ "learning_rate": 1.6019047619047618e-05,
+ "loss": 0.1913,
+ "step": 6227
+ },
+ {
+ "epoch": 35.58857142857143,
+ "grad_norm": 120.78560638427734,
+ "learning_rate": 1.6012698412698414e-05,
+ "loss": 0.1983,
+ "step": 6228
+ },
+ {
+ "epoch": 35.59428571428572,
+ "grad_norm": 92.3857421875,
+ "learning_rate": 1.6006349206349207e-05,
+ "loss": 0.2357,
+ "step": 6229
+ },
+ {
+ "epoch": 35.6,
+ "grad_norm": 36.8802604675293,
+ "learning_rate": 1.6000000000000003e-05,
+ "loss": 0.2196,
+ "step": 6230
+ },
+ {
+ "epoch": 35.605714285714285,
+ "grad_norm": 47.57221984863281,
+ "learning_rate": 1.5993650793650795e-05,
+ "loss": 0.2572,
+ "step": 6231
+ },
+ {
+ "epoch": 35.61142857142857,
+ "grad_norm": 54.17245864868164,
+ "learning_rate": 1.5987301587301588e-05,
+ "loss": 0.2616,
+ "step": 6232
+ },
+ {
+ "epoch": 35.61714285714286,
+ "grad_norm": 56.34675979614258,
+ "learning_rate": 1.5980952380952384e-05,
+ "loss": 0.1948,
+ "step": 6233
+ },
+ {
+ "epoch": 35.62285714285714,
+ "grad_norm": 56.1422233581543,
+ "learning_rate": 1.5974603174603177e-05,
+ "loss": 0.1275,
+ "step": 6234
+ },
+ {
+ "epoch": 35.628571428571426,
+ "grad_norm": 39.908538818359375,
+ "learning_rate": 1.596825396825397e-05,
+ "loss": 0.2365,
+ "step": 6235
+ },
+ {
+ "epoch": 35.63428571428572,
+ "grad_norm": 26.011695861816406,
+ "learning_rate": 1.5961904761904765e-05,
+ "loss": 0.1796,
+ "step": 6236
+ },
+ {
+ "epoch": 35.64,
+ "grad_norm": 74.88202667236328,
+ "learning_rate": 1.5955555555555558e-05,
+ "loss": 0.1827,
+ "step": 6237
+ },
+ {
+ "epoch": 35.645714285714284,
+ "grad_norm": 37.50237274169922,
+ "learning_rate": 1.594920634920635e-05,
+ "loss": 0.1882,
+ "step": 6238
+ },
+ {
+ "epoch": 35.651428571428575,
+ "grad_norm": 19.13360595703125,
+ "learning_rate": 1.5942857142857143e-05,
+ "loss": 0.2526,
+ "step": 6239
+ },
+ {
+ "epoch": 35.65714285714286,
+ "grad_norm": 78.09122467041016,
+ "learning_rate": 1.593650793650794e-05,
+ "loss": 0.1978,
+ "step": 6240
+ },
+ {
+ "epoch": 35.66285714285714,
+ "grad_norm": 49.35946273803711,
+ "learning_rate": 1.5930158730158732e-05,
+ "loss": 0.1773,
+ "step": 6241
+ },
+ {
+ "epoch": 35.668571428571425,
+ "grad_norm": 58.68070983886719,
+ "learning_rate": 1.5923809523809524e-05,
+ "loss": 0.2185,
+ "step": 6242
+ },
+ {
+ "epoch": 35.674285714285716,
+ "grad_norm": 56.76746368408203,
+ "learning_rate": 1.5917460317460317e-05,
+ "loss": 0.2128,
+ "step": 6243
+ },
+ {
+ "epoch": 35.68,
+ "grad_norm": 38.83905792236328,
+ "learning_rate": 1.5911111111111113e-05,
+ "loss": 0.2239,
+ "step": 6244
+ },
+ {
+ "epoch": 35.68571428571428,
+ "grad_norm": 50.337825775146484,
+ "learning_rate": 1.5904761904761906e-05,
+ "loss": 0.2765,
+ "step": 6245
+ },
+ {
+ "epoch": 35.691428571428574,
+ "grad_norm": 24.240882873535156,
+ "learning_rate": 1.58984126984127e-05,
+ "loss": 0.2335,
+ "step": 6246
+ },
+ {
+ "epoch": 35.69714285714286,
+ "grad_norm": 497.1015930175781,
+ "learning_rate": 1.5892063492063494e-05,
+ "loss": 0.202,
+ "step": 6247
+ },
+ {
+ "epoch": 35.70285714285714,
+ "grad_norm": 64.22380065917969,
+ "learning_rate": 1.5885714285714287e-05,
+ "loss": 0.212,
+ "step": 6248
+ },
+ {
+ "epoch": 35.70857142857143,
+ "grad_norm": 40.884334564208984,
+ "learning_rate": 1.587936507936508e-05,
+ "loss": 0.1923,
+ "step": 6249
+ },
+ {
+ "epoch": 35.714285714285715,
+ "grad_norm": 64.15213012695312,
+ "learning_rate": 1.5873015873015872e-05,
+ "loss": 0.2336,
+ "step": 6250
+ },
+ {
+ "epoch": 35.72,
+ "grad_norm": 46.503936767578125,
+ "learning_rate": 1.586666666666667e-05,
+ "loss": 0.2912,
+ "step": 6251
+ },
+ {
+ "epoch": 35.72571428571428,
+ "grad_norm": 48.539791107177734,
+ "learning_rate": 1.586031746031746e-05,
+ "loss": 0.1813,
+ "step": 6252
+ },
+ {
+ "epoch": 35.73142857142857,
+ "grad_norm": 30.292924880981445,
+ "learning_rate": 1.5853968253968254e-05,
+ "loss": 0.422,
+ "step": 6253
+ },
+ {
+ "epoch": 35.73714285714286,
+ "grad_norm": 23.619461059570312,
+ "learning_rate": 1.5847619047619046e-05,
+ "loss": 0.1958,
+ "step": 6254
+ },
+ {
+ "epoch": 35.74285714285714,
+ "grad_norm": 101.78134155273438,
+ "learning_rate": 1.5841269841269842e-05,
+ "loss": 0.2327,
+ "step": 6255
+ },
+ {
+ "epoch": 35.74857142857143,
+ "grad_norm": 30.111948013305664,
+ "learning_rate": 1.5834920634920635e-05,
+ "loss": 0.187,
+ "step": 6256
+ },
+ {
+ "epoch": 35.754285714285714,
+ "grad_norm": 47.025508880615234,
+ "learning_rate": 1.5828571428571428e-05,
+ "loss": 0.3453,
+ "step": 6257
+ },
+ {
+ "epoch": 35.76,
+ "grad_norm": 87.50485229492188,
+ "learning_rate": 1.582222222222222e-05,
+ "loss": 0.2542,
+ "step": 6258
+ },
+ {
+ "epoch": 35.76571428571429,
+ "grad_norm": 54.43192672729492,
+ "learning_rate": 1.5815873015873016e-05,
+ "loss": 0.2674,
+ "step": 6259
+ },
+ {
+ "epoch": 35.77142857142857,
+ "grad_norm": 26.362632751464844,
+ "learning_rate": 1.580952380952381e-05,
+ "loss": 0.1999,
+ "step": 6260
+ },
+ {
+ "epoch": 35.777142857142856,
+ "grad_norm": 42.240299224853516,
+ "learning_rate": 1.58031746031746e-05,
+ "loss": 0.1975,
+ "step": 6261
+ },
+ {
+ "epoch": 35.78285714285714,
+ "grad_norm": 35.53541946411133,
+ "learning_rate": 1.5796825396825398e-05,
+ "loss": 0.2892,
+ "step": 6262
+ },
+ {
+ "epoch": 35.78857142857143,
+ "grad_norm": 34.09052658081055,
+ "learning_rate": 1.579047619047619e-05,
+ "loss": 0.2585,
+ "step": 6263
+ },
+ {
+ "epoch": 35.794285714285714,
+ "grad_norm": 1790.7254638671875,
+ "learning_rate": 1.5784126984126986e-05,
+ "loss": 0.2906,
+ "step": 6264
+ },
+ {
+ "epoch": 35.8,
+ "grad_norm": 24.057857513427734,
+ "learning_rate": 1.577777777777778e-05,
+ "loss": 0.1914,
+ "step": 6265
+ },
+ {
+ "epoch": 35.80571428571429,
+ "grad_norm": 46.7984504699707,
+ "learning_rate": 1.577142857142857e-05,
+ "loss": 0.1992,
+ "step": 6266
+ },
+ {
+ "epoch": 35.81142857142857,
+ "grad_norm": 32.84125518798828,
+ "learning_rate": 1.5765079365079367e-05,
+ "loss": 0.2153,
+ "step": 6267
+ },
+ {
+ "epoch": 35.817142857142855,
+ "grad_norm": 40.810821533203125,
+ "learning_rate": 1.575873015873016e-05,
+ "loss": 0.2667,
+ "step": 6268
+ },
+ {
+ "epoch": 35.822857142857146,
+ "grad_norm": 42.42055892944336,
+ "learning_rate": 1.5752380952380953e-05,
+ "loss": 0.1929,
+ "step": 6269
+ },
+ {
+ "epoch": 35.82857142857143,
+ "grad_norm": 29.975252151489258,
+ "learning_rate": 1.574603174603175e-05,
+ "loss": 0.2226,
+ "step": 6270
+ },
+ {
+ "epoch": 35.83428571428571,
+ "grad_norm": 22.78770637512207,
+ "learning_rate": 1.573968253968254e-05,
+ "loss": 0.2817,
+ "step": 6271
+ },
+ {
+ "epoch": 35.84,
+ "grad_norm": 38.62025451660156,
+ "learning_rate": 1.5733333333333334e-05,
+ "loss": 0.2161,
+ "step": 6272
+ },
+ {
+ "epoch": 35.84571428571429,
+ "grad_norm": 39.60796356201172,
+ "learning_rate": 1.572698412698413e-05,
+ "loss": 0.2045,
+ "step": 6273
+ },
+ {
+ "epoch": 35.85142857142857,
+ "grad_norm": 633.5014038085938,
+ "learning_rate": 1.5720634920634923e-05,
+ "loss": 0.219,
+ "step": 6274
+ },
+ {
+ "epoch": 35.857142857142854,
+ "grad_norm": 49.83086013793945,
+ "learning_rate": 1.5714285714285715e-05,
+ "loss": 0.2216,
+ "step": 6275
+ },
+ {
+ "epoch": 35.862857142857145,
+ "grad_norm": 32.118247985839844,
+ "learning_rate": 1.5707936507936508e-05,
+ "loss": 0.199,
+ "step": 6276
+ },
+ {
+ "epoch": 35.86857142857143,
+ "grad_norm": 16.761796951293945,
+ "learning_rate": 1.5701587301587304e-05,
+ "loss": 0.2241,
+ "step": 6277
+ },
+ {
+ "epoch": 35.87428571428571,
+ "grad_norm": 32.82276153564453,
+ "learning_rate": 1.5695238095238097e-05,
+ "loss": 0.2014,
+ "step": 6278
+ },
+ {
+ "epoch": 35.88,
+ "grad_norm": 139.1734161376953,
+ "learning_rate": 1.568888888888889e-05,
+ "loss": 0.239,
+ "step": 6279
+ },
+ {
+ "epoch": 35.885714285714286,
+ "grad_norm": 60.02780532836914,
+ "learning_rate": 1.5682539682539682e-05,
+ "loss": 0.1493,
+ "step": 6280
+ },
+ {
+ "epoch": 35.89142857142857,
+ "grad_norm": 35.52342987060547,
+ "learning_rate": 1.5676190476190478e-05,
+ "loss": 0.1954,
+ "step": 6281
+ },
+ {
+ "epoch": 35.89714285714286,
+ "grad_norm": 61.01224136352539,
+ "learning_rate": 1.566984126984127e-05,
+ "loss": 0.2354,
+ "step": 6282
+ },
+ {
+ "epoch": 35.902857142857144,
+ "grad_norm": 98.2846450805664,
+ "learning_rate": 1.5663492063492063e-05,
+ "loss": 0.2863,
+ "step": 6283
+ },
+ {
+ "epoch": 35.90857142857143,
+ "grad_norm": 54.908931732177734,
+ "learning_rate": 1.565714285714286e-05,
+ "loss": 0.1334,
+ "step": 6284
+ },
+ {
+ "epoch": 35.91428571428571,
+ "grad_norm": 53.13249206542969,
+ "learning_rate": 1.5650793650793652e-05,
+ "loss": 0.2535,
+ "step": 6285
+ },
+ {
+ "epoch": 35.92,
+ "grad_norm": 1143.5648193359375,
+ "learning_rate": 1.5644444444444444e-05,
+ "loss": 0.3104,
+ "step": 6286
+ },
+ {
+ "epoch": 35.925714285714285,
+ "grad_norm": 38.51319885253906,
+ "learning_rate": 1.5638095238095237e-05,
+ "loss": 0.2201,
+ "step": 6287
+ },
+ {
+ "epoch": 35.93142857142857,
+ "grad_norm": 41.195770263671875,
+ "learning_rate": 1.5631746031746033e-05,
+ "loss": 0.2406,
+ "step": 6288
+ },
+ {
+ "epoch": 35.93714285714286,
+ "grad_norm": 46.77116394042969,
+ "learning_rate": 1.5625396825396826e-05,
+ "loss": 0.2352,
+ "step": 6289
+ },
+ {
+ "epoch": 35.94285714285714,
+ "grad_norm": 113.30989837646484,
+ "learning_rate": 1.561904761904762e-05,
+ "loss": 0.2114,
+ "step": 6290
+ },
+ {
+ "epoch": 35.94857142857143,
+ "grad_norm": 34.49736404418945,
+ "learning_rate": 1.561269841269841e-05,
+ "loss": 0.231,
+ "step": 6291
+ },
+ {
+ "epoch": 35.95428571428572,
+ "grad_norm": 69.27482604980469,
+ "learning_rate": 1.5606349206349207e-05,
+ "loss": 0.2033,
+ "step": 6292
+ },
+ {
+ "epoch": 35.96,
+ "grad_norm": 428.3207702636719,
+ "learning_rate": 1.56e-05,
+ "loss": 0.2879,
+ "step": 6293
+ },
+ {
+ "epoch": 35.965714285714284,
+ "grad_norm": 40.86140823364258,
+ "learning_rate": 1.5593650793650792e-05,
+ "loss": 0.1804,
+ "step": 6294
+ },
+ {
+ "epoch": 35.97142857142857,
+ "grad_norm": 97.99716186523438,
+ "learning_rate": 1.558730158730159e-05,
+ "loss": 0.2291,
+ "step": 6295
+ },
+ {
+ "epoch": 35.97714285714286,
+ "grad_norm": 101.12525939941406,
+ "learning_rate": 1.558095238095238e-05,
+ "loss": 0.2111,
+ "step": 6296
+ },
+ {
+ "epoch": 35.98285714285714,
+ "grad_norm": 39.819671630859375,
+ "learning_rate": 1.5574603174603174e-05,
+ "loss": 0.2151,
+ "step": 6297
+ },
+ {
+ "epoch": 35.988571428571426,
+ "grad_norm": 34.32330322265625,
+ "learning_rate": 1.556825396825397e-05,
+ "loss": 0.2041,
+ "step": 6298
+ },
+ {
+ "epoch": 35.994285714285716,
+ "grad_norm": 57.05809020996094,
+ "learning_rate": 1.5561904761904762e-05,
+ "loss": 0.268,
+ "step": 6299
+ },
+ {
+ "epoch": 36.0,
+ "grad_norm": 44.08351516723633,
+ "learning_rate": 1.5555555555555555e-05,
+ "loss": 0.1798,
+ "step": 6300
+ },
+ {
+ "epoch": 36.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5695643424987793,
+ "eval_map": 0.9409,
+ "eval_map_50": 0.971,
+ "eval_map_75": 0.9667,
+ "eval_map_large": 0.9409,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9409,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7943,
+ "eval_mar_10": 0.9743,
+ "eval_mar_100": 0.9759,
+ "eval_mar_100_per_class": 0.9759,
+ "eval_mar_large": 0.9759,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.4622,
+ "eval_samples_per_second": 21.839,
+ "eval_steps_per_second": 2.748,
+ "step": 6300
+ },
+ {
+ "epoch": 36.005714285714284,
+ "grad_norm": 60.560211181640625,
+ "learning_rate": 1.554920634920635e-05,
+ "loss": 0.1635,
+ "step": 6301
+ },
+ {
+ "epoch": 36.011428571428574,
+ "grad_norm": 30.342844009399414,
+ "learning_rate": 1.5542857142857144e-05,
+ "loss": 0.1777,
+ "step": 6302
+ },
+ {
+ "epoch": 36.01714285714286,
+ "grad_norm": 80.23675537109375,
+ "learning_rate": 1.553650793650794e-05,
+ "loss": 0.1611,
+ "step": 6303
+ },
+ {
+ "epoch": 36.02285714285714,
+ "grad_norm": 101.42436981201172,
+ "learning_rate": 1.5530158730158732e-05,
+ "loss": 0.2678,
+ "step": 6304
+ },
+ {
+ "epoch": 36.02857142857143,
+ "grad_norm": 25.97087860107422,
+ "learning_rate": 1.5523809523809525e-05,
+ "loss": 0.137,
+ "step": 6305
+ },
+ {
+ "epoch": 36.034285714285716,
+ "grad_norm": 54.747676849365234,
+ "learning_rate": 1.551746031746032e-05,
+ "loss": 0.2697,
+ "step": 6306
+ },
+ {
+ "epoch": 36.04,
+ "grad_norm": 34.6653938293457,
+ "learning_rate": 1.5511111111111114e-05,
+ "loss": 0.1973,
+ "step": 6307
+ },
+ {
+ "epoch": 36.04571428571428,
+ "grad_norm": 43.137142181396484,
+ "learning_rate": 1.5504761904761906e-05,
+ "loss": 0.225,
+ "step": 6308
+ },
+ {
+ "epoch": 36.05142857142857,
+ "grad_norm": 73.97096252441406,
+ "learning_rate": 1.54984126984127e-05,
+ "loss": 0.1988,
+ "step": 6309
+ },
+ {
+ "epoch": 36.05714285714286,
+ "grad_norm": 309.857421875,
+ "learning_rate": 1.5492063492063495e-05,
+ "loss": 0.2243,
+ "step": 6310
+ },
+ {
+ "epoch": 36.06285714285714,
+ "grad_norm": 80.37860870361328,
+ "learning_rate": 1.5485714285714287e-05,
+ "loss": 0.1729,
+ "step": 6311
+ },
+ {
+ "epoch": 36.06857142857143,
+ "grad_norm": 19.095983505249023,
+ "learning_rate": 1.547936507936508e-05,
+ "loss": 0.1457,
+ "step": 6312
+ },
+ {
+ "epoch": 36.074285714285715,
+ "grad_norm": 27.1456298828125,
+ "learning_rate": 1.5473015873015873e-05,
+ "loss": 0.1896,
+ "step": 6313
+ },
+ {
+ "epoch": 36.08,
+ "grad_norm": 65.52925109863281,
+ "learning_rate": 1.546666666666667e-05,
+ "loss": 0.2088,
+ "step": 6314
+ },
+ {
+ "epoch": 36.08571428571429,
+ "grad_norm": 47.01498031616211,
+ "learning_rate": 1.546031746031746e-05,
+ "loss": 0.1597,
+ "step": 6315
+ },
+ {
+ "epoch": 36.09142857142857,
+ "grad_norm": 27.8057918548584,
+ "learning_rate": 1.5453968253968254e-05,
+ "loss": 0.1789,
+ "step": 6316
+ },
+ {
+ "epoch": 36.097142857142856,
+ "grad_norm": 138.99972534179688,
+ "learning_rate": 1.544761904761905e-05,
+ "loss": 0.3318,
+ "step": 6317
+ },
+ {
+ "epoch": 36.10285714285714,
+ "grad_norm": 2374.276611328125,
+ "learning_rate": 1.5441269841269843e-05,
+ "loss": 0.3334,
+ "step": 6318
+ },
+ {
+ "epoch": 36.10857142857143,
+ "grad_norm": 92.5726089477539,
+ "learning_rate": 1.5434920634920635e-05,
+ "loss": 0.2459,
+ "step": 6319
+ },
+ {
+ "epoch": 36.114285714285714,
+ "grad_norm": 61.678016662597656,
+ "learning_rate": 1.5428571428571428e-05,
+ "loss": 0.2149,
+ "step": 6320
+ },
+ {
+ "epoch": 36.12,
+ "grad_norm": 35.04982376098633,
+ "learning_rate": 1.5422222222222224e-05,
+ "loss": 0.2917,
+ "step": 6321
+ },
+ {
+ "epoch": 36.12571428571429,
+ "grad_norm": 90.88272857666016,
+ "learning_rate": 1.5415873015873017e-05,
+ "loss": 0.1778,
+ "step": 6322
+ },
+ {
+ "epoch": 36.13142857142857,
+ "grad_norm": 39.97517013549805,
+ "learning_rate": 1.540952380952381e-05,
+ "loss": 0.3294,
+ "step": 6323
+ },
+ {
+ "epoch": 36.137142857142855,
+ "grad_norm": 29.622541427612305,
+ "learning_rate": 1.5403174603174602e-05,
+ "loss": 0.2512,
+ "step": 6324
+ },
+ {
+ "epoch": 36.142857142857146,
+ "grad_norm": 211.47042846679688,
+ "learning_rate": 1.5396825396825398e-05,
+ "loss": 0.2083,
+ "step": 6325
+ },
+ {
+ "epoch": 36.14857142857143,
+ "grad_norm": 76.01374816894531,
+ "learning_rate": 1.539047619047619e-05,
+ "loss": 0.2405,
+ "step": 6326
+ },
+ {
+ "epoch": 36.15428571428571,
+ "grad_norm": 48.560489654541016,
+ "learning_rate": 1.5384126984126983e-05,
+ "loss": 0.2085,
+ "step": 6327
+ },
+ {
+ "epoch": 36.16,
+ "grad_norm": 39.614715576171875,
+ "learning_rate": 1.537777777777778e-05,
+ "loss": 0.151,
+ "step": 6328
+ },
+ {
+ "epoch": 36.16571428571429,
+ "grad_norm": 165.3211669921875,
+ "learning_rate": 1.5371428571428572e-05,
+ "loss": 0.3179,
+ "step": 6329
+ },
+ {
+ "epoch": 36.17142857142857,
+ "grad_norm": 36.70731735229492,
+ "learning_rate": 1.5365079365079365e-05,
+ "loss": 0.0889,
+ "step": 6330
+ },
+ {
+ "epoch": 36.177142857142854,
+ "grad_norm": 22.209880828857422,
+ "learning_rate": 1.5358730158730157e-05,
+ "loss": 0.2098,
+ "step": 6331
+ },
+ {
+ "epoch": 36.182857142857145,
+ "grad_norm": 25.109262466430664,
+ "learning_rate": 1.5352380952380953e-05,
+ "loss": 0.2304,
+ "step": 6332
+ },
+ {
+ "epoch": 36.18857142857143,
+ "grad_norm": 40.98749542236328,
+ "learning_rate": 1.5346031746031746e-05,
+ "loss": 0.1794,
+ "step": 6333
+ },
+ {
+ "epoch": 36.19428571428571,
+ "grad_norm": 93.3605728149414,
+ "learning_rate": 1.533968253968254e-05,
+ "loss": 0.2494,
+ "step": 6334
+ },
+ {
+ "epoch": 36.2,
+ "grad_norm": 53.41642761230469,
+ "learning_rate": 1.5333333333333334e-05,
+ "loss": 0.2813,
+ "step": 6335
+ },
+ {
+ "epoch": 36.205714285714286,
+ "grad_norm": 20.14623260498047,
+ "learning_rate": 1.5326984126984127e-05,
+ "loss": 0.2313,
+ "step": 6336
+ },
+ {
+ "epoch": 36.21142857142857,
+ "grad_norm": 48.9773063659668,
+ "learning_rate": 1.5320634920634923e-05,
+ "loss": 0.1613,
+ "step": 6337
+ },
+ {
+ "epoch": 36.21714285714286,
+ "grad_norm": 123.74031829833984,
+ "learning_rate": 1.5314285714285716e-05,
+ "loss": 0.226,
+ "step": 6338
+ },
+ {
+ "epoch": 36.222857142857144,
+ "grad_norm": 22.779130935668945,
+ "learning_rate": 1.530793650793651e-05,
+ "loss": 0.1843,
+ "step": 6339
+ },
+ {
+ "epoch": 36.22857142857143,
+ "grad_norm": 98.26277160644531,
+ "learning_rate": 1.5301587301587304e-05,
+ "loss": 0.3517,
+ "step": 6340
+ },
+ {
+ "epoch": 36.23428571428571,
+ "grad_norm": 35.74067687988281,
+ "learning_rate": 1.5295238095238097e-05,
+ "loss": 0.1514,
+ "step": 6341
+ },
+ {
+ "epoch": 36.24,
+ "grad_norm": 95.01213073730469,
+ "learning_rate": 1.528888888888889e-05,
+ "loss": 0.1429,
+ "step": 6342
+ },
+ {
+ "epoch": 36.245714285714286,
+ "grad_norm": 48.54473114013672,
+ "learning_rate": 1.5282539682539686e-05,
+ "loss": 0.2102,
+ "step": 6343
+ },
+ {
+ "epoch": 36.25142857142857,
+ "grad_norm": 50.201717376708984,
+ "learning_rate": 1.527619047619048e-05,
+ "loss": 0.1812,
+ "step": 6344
+ },
+ {
+ "epoch": 36.25714285714286,
+ "grad_norm": 1488.32763671875,
+ "learning_rate": 1.526984126984127e-05,
+ "loss": 0.1958,
+ "step": 6345
+ },
+ {
+ "epoch": 36.26285714285714,
+ "grad_norm": 54.18905258178711,
+ "learning_rate": 1.5263492063492064e-05,
+ "loss": 0.1536,
+ "step": 6346
+ },
+ {
+ "epoch": 36.26857142857143,
+ "grad_norm": 39.101261138916016,
+ "learning_rate": 1.5257142857142858e-05,
+ "loss": 0.183,
+ "step": 6347
+ },
+ {
+ "epoch": 36.27428571428572,
+ "grad_norm": 92.49483489990234,
+ "learning_rate": 1.5250793650793652e-05,
+ "loss": 0.181,
+ "step": 6348
+ },
+ {
+ "epoch": 36.28,
+ "grad_norm": 32.887935638427734,
+ "learning_rate": 1.5244444444444445e-05,
+ "loss": 0.2423,
+ "step": 6349
+ },
+ {
+ "epoch": 36.285714285714285,
+ "grad_norm": 177.63185119628906,
+ "learning_rate": 1.5238095238095241e-05,
+ "loss": 0.2762,
+ "step": 6350
+ },
+ {
+ "epoch": 36.29142857142857,
+ "grad_norm": 77.8404541015625,
+ "learning_rate": 1.5231746031746034e-05,
+ "loss": 0.1804,
+ "step": 6351
+ },
+ {
+ "epoch": 36.29714285714286,
+ "grad_norm": 32.94889831542969,
+ "learning_rate": 1.5225396825396826e-05,
+ "loss": 0.2209,
+ "step": 6352
+ },
+ {
+ "epoch": 36.30285714285714,
+ "grad_norm": 63.828739166259766,
+ "learning_rate": 1.5219047619047619e-05,
+ "loss": 0.226,
+ "step": 6353
+ },
+ {
+ "epoch": 36.308571428571426,
+ "grad_norm": 43.031734466552734,
+ "learning_rate": 1.5212698412698415e-05,
+ "loss": 0.2416,
+ "step": 6354
+ },
+ {
+ "epoch": 36.31428571428572,
+ "grad_norm": 22.917098999023438,
+ "learning_rate": 1.5206349206349208e-05,
+ "loss": 0.1953,
+ "step": 6355
+ },
+ {
+ "epoch": 36.32,
+ "grad_norm": 54.05869674682617,
+ "learning_rate": 1.52e-05,
+ "loss": 0.1965,
+ "step": 6356
+ },
+ {
+ "epoch": 36.325714285714284,
+ "grad_norm": 107.59943389892578,
+ "learning_rate": 1.5193650793650793e-05,
+ "loss": 0.2637,
+ "step": 6357
+ },
+ {
+ "epoch": 36.331428571428575,
+ "grad_norm": 46.563541412353516,
+ "learning_rate": 1.5187301587301589e-05,
+ "loss": 0.2627,
+ "step": 6358
+ },
+ {
+ "epoch": 36.33714285714286,
+ "grad_norm": 31.009504318237305,
+ "learning_rate": 1.5180952380952381e-05,
+ "loss": 0.228,
+ "step": 6359
+ },
+ {
+ "epoch": 36.34285714285714,
+ "grad_norm": 27.440643310546875,
+ "learning_rate": 1.5174603174603174e-05,
+ "loss": 0.2416,
+ "step": 6360
+ },
+ {
+ "epoch": 36.348571428571425,
+ "grad_norm": 59.989654541015625,
+ "learning_rate": 1.5168253968253968e-05,
+ "loss": 0.1932,
+ "step": 6361
+ },
+ {
+ "epoch": 36.354285714285716,
+ "grad_norm": 83.95958709716797,
+ "learning_rate": 1.5161904761904763e-05,
+ "loss": 0.26,
+ "step": 6362
+ },
+ {
+ "epoch": 36.36,
+ "grad_norm": 48.0991096496582,
+ "learning_rate": 1.5155555555555555e-05,
+ "loss": 0.2212,
+ "step": 6363
+ },
+ {
+ "epoch": 36.36571428571428,
+ "grad_norm": 44.27768325805664,
+ "learning_rate": 1.514920634920635e-05,
+ "loss": 0.2014,
+ "step": 6364
+ },
+ {
+ "epoch": 36.371428571428574,
+ "grad_norm": 45.149261474609375,
+ "learning_rate": 1.5142857142857144e-05,
+ "loss": 0.1485,
+ "step": 6365
+ },
+ {
+ "epoch": 36.37714285714286,
+ "grad_norm": 90.59193420410156,
+ "learning_rate": 1.5136507936507938e-05,
+ "loss": 0.1746,
+ "step": 6366
+ },
+ {
+ "epoch": 36.38285714285714,
+ "grad_norm": 67.58226013183594,
+ "learning_rate": 1.5130158730158731e-05,
+ "loss": 0.3087,
+ "step": 6367
+ },
+ {
+ "epoch": 36.38857142857143,
+ "grad_norm": 68.11504364013672,
+ "learning_rate": 1.5123809523809524e-05,
+ "loss": 0.172,
+ "step": 6368
+ },
+ {
+ "epoch": 36.394285714285715,
+ "grad_norm": 49.809810638427734,
+ "learning_rate": 1.511746031746032e-05,
+ "loss": 0.2154,
+ "step": 6369
+ },
+ {
+ "epoch": 36.4,
+ "grad_norm": 53.1560173034668,
+ "learning_rate": 1.5111111111111112e-05,
+ "loss": 0.1507,
+ "step": 6370
+ },
+ {
+ "epoch": 36.40571428571428,
+ "grad_norm": 45.67121124267578,
+ "learning_rate": 1.5104761904761905e-05,
+ "loss": 0.1551,
+ "step": 6371
+ },
+ {
+ "epoch": 36.41142857142857,
+ "grad_norm": 105.23786163330078,
+ "learning_rate": 1.5098412698412698e-05,
+ "loss": 0.2679,
+ "step": 6372
+ },
+ {
+ "epoch": 36.417142857142856,
+ "grad_norm": 88.73975372314453,
+ "learning_rate": 1.5092063492063494e-05,
+ "loss": 0.1415,
+ "step": 6373
+ },
+ {
+ "epoch": 36.42285714285714,
+ "grad_norm": 84.12977600097656,
+ "learning_rate": 1.5085714285714286e-05,
+ "loss": 0.2191,
+ "step": 6374
+ },
+ {
+ "epoch": 36.42857142857143,
+ "grad_norm": 23.05473518371582,
+ "learning_rate": 1.5079365079365079e-05,
+ "loss": 0.1559,
+ "step": 6375
+ },
+ {
+ "epoch": 36.434285714285714,
+ "grad_norm": 28.77918815612793,
+ "learning_rate": 1.5073015873015875e-05,
+ "loss": 0.1839,
+ "step": 6376
+ },
+ {
+ "epoch": 36.44,
+ "grad_norm": 37.2401008605957,
+ "learning_rate": 1.5066666666666668e-05,
+ "loss": 0.1908,
+ "step": 6377
+ },
+ {
+ "epoch": 36.44571428571429,
+ "grad_norm": 53.14858627319336,
+ "learning_rate": 1.506031746031746e-05,
+ "loss": 0.2473,
+ "step": 6378
+ },
+ {
+ "epoch": 36.45142857142857,
+ "grad_norm": 30.43804168701172,
+ "learning_rate": 1.5053968253968253e-05,
+ "loss": 0.1916,
+ "step": 6379
+ },
+ {
+ "epoch": 36.457142857142856,
+ "grad_norm": 50.2374153137207,
+ "learning_rate": 1.5047619047619049e-05,
+ "loss": 0.2085,
+ "step": 6380
+ },
+ {
+ "epoch": 36.462857142857146,
+ "grad_norm": 28.409648895263672,
+ "learning_rate": 1.5041269841269841e-05,
+ "loss": 0.2252,
+ "step": 6381
+ },
+ {
+ "epoch": 36.46857142857143,
+ "grad_norm": 67.96338653564453,
+ "learning_rate": 1.5034920634920636e-05,
+ "loss": 0.1687,
+ "step": 6382
+ },
+ {
+ "epoch": 36.47428571428571,
+ "grad_norm": 17.873964309692383,
+ "learning_rate": 1.5028571428571428e-05,
+ "loss": 0.1904,
+ "step": 6383
+ },
+ {
+ "epoch": 36.48,
+ "grad_norm": 1682.106201171875,
+ "learning_rate": 1.5022222222222224e-05,
+ "loss": 0.2735,
+ "step": 6384
+ },
+ {
+ "epoch": 36.48571428571429,
+ "grad_norm": 51.79334259033203,
+ "learning_rate": 1.5015873015873017e-05,
+ "loss": 0.276,
+ "step": 6385
+ },
+ {
+ "epoch": 36.49142857142857,
+ "grad_norm": 38.2587890625,
+ "learning_rate": 1.500952380952381e-05,
+ "loss": 0.217,
+ "step": 6386
+ },
+ {
+ "epoch": 36.497142857142855,
+ "grad_norm": 29.614765167236328,
+ "learning_rate": 1.5003174603174606e-05,
+ "loss": 0.2729,
+ "step": 6387
+ },
+ {
+ "epoch": 36.502857142857145,
+ "grad_norm": 49.26810836791992,
+ "learning_rate": 1.4996825396825398e-05,
+ "loss": 0.2213,
+ "step": 6388
+ },
+ {
+ "epoch": 36.50857142857143,
+ "grad_norm": 79.2518539428711,
+ "learning_rate": 1.4990476190476191e-05,
+ "loss": 0.2216,
+ "step": 6389
+ },
+ {
+ "epoch": 36.51428571428571,
+ "grad_norm": 180.6911163330078,
+ "learning_rate": 1.4984126984126984e-05,
+ "loss": 0.2687,
+ "step": 6390
+ },
+ {
+ "epoch": 36.52,
+ "grad_norm": 51.260284423828125,
+ "learning_rate": 1.497777777777778e-05,
+ "loss": 0.2545,
+ "step": 6391
+ },
+ {
+ "epoch": 36.52571428571429,
+ "grad_norm": 21.773143768310547,
+ "learning_rate": 1.4971428571428572e-05,
+ "loss": 0.2171,
+ "step": 6392
+ },
+ {
+ "epoch": 36.53142857142857,
+ "grad_norm": 47.9932746887207,
+ "learning_rate": 1.4965079365079365e-05,
+ "loss": 0.1611,
+ "step": 6393
+ },
+ {
+ "epoch": 36.537142857142854,
+ "grad_norm": 19.89521598815918,
+ "learning_rate": 1.4958730158730158e-05,
+ "loss": 0.1706,
+ "step": 6394
+ },
+ {
+ "epoch": 36.542857142857144,
+ "grad_norm": 20.085758209228516,
+ "learning_rate": 1.4952380952380954e-05,
+ "loss": 0.138,
+ "step": 6395
+ },
+ {
+ "epoch": 36.54857142857143,
+ "grad_norm": 33.8851203918457,
+ "learning_rate": 1.4946031746031746e-05,
+ "loss": 0.2037,
+ "step": 6396
+ },
+ {
+ "epoch": 36.55428571428571,
+ "grad_norm": 49.09003448486328,
+ "learning_rate": 1.4939682539682539e-05,
+ "loss": 0.2313,
+ "step": 6397
+ },
+ {
+ "epoch": 36.56,
+ "grad_norm": 452.2305603027344,
+ "learning_rate": 1.4933333333333335e-05,
+ "loss": 0.2614,
+ "step": 6398
+ },
+ {
+ "epoch": 36.565714285714286,
+ "grad_norm": 47.73259353637695,
+ "learning_rate": 1.4926984126984128e-05,
+ "loss": 0.2196,
+ "step": 6399
+ },
+ {
+ "epoch": 36.57142857142857,
+ "grad_norm": 19.27354621887207,
+ "learning_rate": 1.4920634920634922e-05,
+ "loss": 0.28,
+ "step": 6400
+ },
+ {
+ "epoch": 36.57714285714286,
+ "grad_norm": 19.988956451416016,
+ "learning_rate": 1.4914285714285715e-05,
+ "loss": 0.1931,
+ "step": 6401
+ },
+ {
+ "epoch": 36.582857142857144,
+ "grad_norm": 219.86468505859375,
+ "learning_rate": 1.4907936507936509e-05,
+ "loss": 0.2611,
+ "step": 6402
+ },
+ {
+ "epoch": 36.58857142857143,
+ "grad_norm": 973.735595703125,
+ "learning_rate": 1.4901587301587303e-05,
+ "loss": 0.2148,
+ "step": 6403
+ },
+ {
+ "epoch": 36.59428571428572,
+ "grad_norm": 39.37485122680664,
+ "learning_rate": 1.4895238095238096e-05,
+ "loss": 0.1662,
+ "step": 6404
+ },
+ {
+ "epoch": 36.6,
+ "grad_norm": 22.795944213867188,
+ "learning_rate": 1.4888888888888888e-05,
+ "loss": 0.3078,
+ "step": 6405
+ },
+ {
+ "epoch": 36.605714285714285,
+ "grad_norm": 37.41624069213867,
+ "learning_rate": 1.4882539682539684e-05,
+ "loss": 0.1794,
+ "step": 6406
+ },
+ {
+ "epoch": 36.61142857142857,
+ "grad_norm": 54.39463806152344,
+ "learning_rate": 1.4876190476190477e-05,
+ "loss": 0.2123,
+ "step": 6407
+ },
+ {
+ "epoch": 36.61714285714286,
+ "grad_norm": 18.27862548828125,
+ "learning_rate": 1.486984126984127e-05,
+ "loss": 0.1803,
+ "step": 6408
+ },
+ {
+ "epoch": 36.62285714285714,
+ "grad_norm": 50.36140060424805,
+ "learning_rate": 1.4863492063492066e-05,
+ "loss": 0.2265,
+ "step": 6409
+ },
+ {
+ "epoch": 36.628571428571426,
+ "grad_norm": 26.36475372314453,
+ "learning_rate": 1.4857142857142858e-05,
+ "loss": 0.2193,
+ "step": 6410
+ },
+ {
+ "epoch": 36.63428571428572,
+ "grad_norm": 33.291900634765625,
+ "learning_rate": 1.4850793650793651e-05,
+ "loss": 0.2075,
+ "step": 6411
+ },
+ {
+ "epoch": 36.64,
+ "grad_norm": 42.37389373779297,
+ "learning_rate": 1.4844444444444444e-05,
+ "loss": 0.2289,
+ "step": 6412
+ },
+ {
+ "epoch": 36.645714285714284,
+ "grad_norm": 27.10552978515625,
+ "learning_rate": 1.483809523809524e-05,
+ "loss": 0.2085,
+ "step": 6413
+ },
+ {
+ "epoch": 36.651428571428575,
+ "grad_norm": 16.50041389465332,
+ "learning_rate": 1.4831746031746032e-05,
+ "loss": 0.1601,
+ "step": 6414
+ },
+ {
+ "epoch": 36.65714285714286,
+ "grad_norm": 24.146190643310547,
+ "learning_rate": 1.4825396825396825e-05,
+ "loss": 0.1922,
+ "step": 6415
+ },
+ {
+ "epoch": 36.66285714285714,
+ "grad_norm": 36.586544036865234,
+ "learning_rate": 1.481904761904762e-05,
+ "loss": 0.1325,
+ "step": 6416
+ },
+ {
+ "epoch": 36.668571428571425,
+ "grad_norm": 40.86701583862305,
+ "learning_rate": 1.4812698412698414e-05,
+ "loss": 0.1962,
+ "step": 6417
+ },
+ {
+ "epoch": 36.674285714285716,
+ "grad_norm": 65.42292022705078,
+ "learning_rate": 1.4806349206349206e-05,
+ "loss": 0.2012,
+ "step": 6418
+ },
+ {
+ "epoch": 36.68,
+ "grad_norm": 57.85270690917969,
+ "learning_rate": 1.48e-05,
+ "loss": 0.2726,
+ "step": 6419
+ },
+ {
+ "epoch": 36.68571428571428,
+ "grad_norm": 35.29873275756836,
+ "learning_rate": 1.4793650793650795e-05,
+ "loss": 0.2092,
+ "step": 6420
+ },
+ {
+ "epoch": 36.691428571428574,
+ "grad_norm": 135.01405334472656,
+ "learning_rate": 1.478730158730159e-05,
+ "loss": 0.22,
+ "step": 6421
+ },
+ {
+ "epoch": 36.69714285714286,
+ "grad_norm": 78.34668731689453,
+ "learning_rate": 1.4780952380952382e-05,
+ "loss": 0.195,
+ "step": 6422
+ },
+ {
+ "epoch": 36.70285714285714,
+ "grad_norm": 19.63154411315918,
+ "learning_rate": 1.4774603174603175e-05,
+ "loss": 0.205,
+ "step": 6423
+ },
+ {
+ "epoch": 36.70857142857143,
+ "grad_norm": 66.94962310791016,
+ "learning_rate": 1.476825396825397e-05,
+ "loss": 0.1626,
+ "step": 6424
+ },
+ {
+ "epoch": 36.714285714285715,
+ "grad_norm": 126.7664566040039,
+ "learning_rate": 1.4761904761904763e-05,
+ "loss": 0.2621,
+ "step": 6425
+ },
+ {
+ "epoch": 36.72,
+ "grad_norm": 21.17292022705078,
+ "learning_rate": 1.4755555555555556e-05,
+ "loss": 0.1923,
+ "step": 6426
+ },
+ {
+ "epoch": 36.72571428571428,
+ "grad_norm": 59.32046127319336,
+ "learning_rate": 1.4749206349206348e-05,
+ "loss": 0.2314,
+ "step": 6427
+ },
+ {
+ "epoch": 36.73142857142857,
+ "grad_norm": 192.17601013183594,
+ "learning_rate": 1.4742857142857144e-05,
+ "loss": 0.1811,
+ "step": 6428
+ },
+ {
+ "epoch": 36.73714285714286,
+ "grad_norm": 50.06969451904297,
+ "learning_rate": 1.4736507936507937e-05,
+ "loss": 0.2278,
+ "step": 6429
+ },
+ {
+ "epoch": 36.74285714285714,
+ "grad_norm": 29.369829177856445,
+ "learning_rate": 1.473015873015873e-05,
+ "loss": 0.1904,
+ "step": 6430
+ },
+ {
+ "epoch": 36.74857142857143,
+ "grad_norm": 18.54184341430664,
+ "learning_rate": 1.4723809523809526e-05,
+ "loss": 0.2018,
+ "step": 6431
+ },
+ {
+ "epoch": 36.754285714285714,
+ "grad_norm": 37.97724533081055,
+ "learning_rate": 1.4717460317460318e-05,
+ "loss": 0.2711,
+ "step": 6432
+ },
+ {
+ "epoch": 36.76,
+ "grad_norm": 116.49739837646484,
+ "learning_rate": 1.4711111111111111e-05,
+ "loss": 0.2082,
+ "step": 6433
+ },
+ {
+ "epoch": 36.76571428571429,
+ "grad_norm": 99.23430633544922,
+ "learning_rate": 1.4704761904761904e-05,
+ "loss": 0.2001,
+ "step": 6434
+ },
+ {
+ "epoch": 36.77142857142857,
+ "grad_norm": 51.099456787109375,
+ "learning_rate": 1.46984126984127e-05,
+ "loss": 0.2481,
+ "step": 6435
+ },
+ {
+ "epoch": 36.777142857142856,
+ "grad_norm": 59.015628814697266,
+ "learning_rate": 1.4692063492063492e-05,
+ "loss": 0.1419,
+ "step": 6436
+ },
+ {
+ "epoch": 36.78285714285714,
+ "grad_norm": 88.4302749633789,
+ "learning_rate": 1.4685714285714287e-05,
+ "loss": 0.3196,
+ "step": 6437
+ },
+ {
+ "epoch": 36.78857142857143,
+ "grad_norm": 42.81243133544922,
+ "learning_rate": 1.467936507936508e-05,
+ "loss": 0.1468,
+ "step": 6438
+ },
+ {
+ "epoch": 36.794285714285714,
+ "grad_norm": 23.068126678466797,
+ "learning_rate": 1.4673015873015875e-05,
+ "loss": 0.19,
+ "step": 6439
+ },
+ {
+ "epoch": 36.8,
+ "grad_norm": 44.7575798034668,
+ "learning_rate": 1.4666666666666668e-05,
+ "loss": 0.207,
+ "step": 6440
+ },
+ {
+ "epoch": 36.80571428571429,
+ "grad_norm": 35.08080291748047,
+ "learning_rate": 1.466031746031746e-05,
+ "loss": 0.1938,
+ "step": 6441
+ },
+ {
+ "epoch": 36.81142857142857,
+ "grad_norm": 991.8641357421875,
+ "learning_rate": 1.4653968253968257e-05,
+ "loss": 0.2564,
+ "step": 6442
+ },
+ {
+ "epoch": 36.817142857142855,
+ "grad_norm": 35.81486129760742,
+ "learning_rate": 1.464761904761905e-05,
+ "loss": 0.2436,
+ "step": 6443
+ },
+ {
+ "epoch": 36.822857142857146,
+ "grad_norm": 72.99882507324219,
+ "learning_rate": 1.4641269841269842e-05,
+ "loss": 0.2167,
+ "step": 6444
+ },
+ {
+ "epoch": 36.82857142857143,
+ "grad_norm": 70.42816925048828,
+ "learning_rate": 1.4634920634920635e-05,
+ "loss": 0.2139,
+ "step": 6445
+ },
+ {
+ "epoch": 36.83428571428571,
+ "grad_norm": 52.384498596191406,
+ "learning_rate": 1.462857142857143e-05,
+ "loss": 0.2429,
+ "step": 6446
+ },
+ {
+ "epoch": 36.84,
+ "grad_norm": 325.8592224121094,
+ "learning_rate": 1.4622222222222223e-05,
+ "loss": 0.2057,
+ "step": 6447
+ },
+ {
+ "epoch": 36.84571428571429,
+ "grad_norm": 77.25653076171875,
+ "learning_rate": 1.4615873015873016e-05,
+ "loss": 0.1883,
+ "step": 6448
+ },
+ {
+ "epoch": 36.85142857142857,
+ "grad_norm": 25.819324493408203,
+ "learning_rate": 1.4609523809523808e-05,
+ "loss": 0.3005,
+ "step": 6449
+ },
+ {
+ "epoch": 36.857142857142854,
+ "grad_norm": 34.40315246582031,
+ "learning_rate": 1.4603174603174605e-05,
+ "loss": 0.1597,
+ "step": 6450
+ },
+ {
+ "epoch": 36.862857142857145,
+ "grad_norm": 55.17396545410156,
+ "learning_rate": 1.4596825396825397e-05,
+ "loss": 0.1773,
+ "step": 6451
+ },
+ {
+ "epoch": 36.86857142857143,
+ "grad_norm": 50.16841125488281,
+ "learning_rate": 1.459047619047619e-05,
+ "loss": 0.1644,
+ "step": 6452
+ },
+ {
+ "epoch": 36.87428571428571,
+ "grad_norm": 38.792442321777344,
+ "learning_rate": 1.4584126984126986e-05,
+ "loss": 0.174,
+ "step": 6453
+ },
+ {
+ "epoch": 36.88,
+ "grad_norm": 70.29129791259766,
+ "learning_rate": 1.4577777777777778e-05,
+ "loss": 0.2756,
+ "step": 6454
+ },
+ {
+ "epoch": 36.885714285714286,
+ "grad_norm": 31.52997398376465,
+ "learning_rate": 1.4571428571428573e-05,
+ "loss": 0.1333,
+ "step": 6455
+ },
+ {
+ "epoch": 36.89142857142857,
+ "grad_norm": 54.99339294433594,
+ "learning_rate": 1.4565079365079365e-05,
+ "loss": 0.1977,
+ "step": 6456
+ },
+ {
+ "epoch": 36.89714285714286,
+ "grad_norm": 17.953866958618164,
+ "learning_rate": 1.455873015873016e-05,
+ "loss": 0.2214,
+ "step": 6457
+ },
+ {
+ "epoch": 36.902857142857144,
+ "grad_norm": 163.7252197265625,
+ "learning_rate": 1.4552380952380954e-05,
+ "loss": 0.2708,
+ "step": 6458
+ },
+ {
+ "epoch": 36.90857142857143,
+ "grad_norm": 33.827884674072266,
+ "learning_rate": 1.4546031746031747e-05,
+ "loss": 0.2193,
+ "step": 6459
+ },
+ {
+ "epoch": 36.91428571428571,
+ "grad_norm": 60.707435607910156,
+ "learning_rate": 1.453968253968254e-05,
+ "loss": 0.1865,
+ "step": 6460
+ },
+ {
+ "epoch": 36.92,
+ "grad_norm": 55.701087951660156,
+ "learning_rate": 1.4533333333333335e-05,
+ "loss": 0.1736,
+ "step": 6461
+ },
+ {
+ "epoch": 36.925714285714285,
+ "grad_norm": 21.98332977294922,
+ "learning_rate": 1.4526984126984128e-05,
+ "loss": 0.1634,
+ "step": 6462
+ },
+ {
+ "epoch": 36.93142857142857,
+ "grad_norm": 75.26737213134766,
+ "learning_rate": 1.452063492063492e-05,
+ "loss": 0.1949,
+ "step": 6463
+ },
+ {
+ "epoch": 36.93714285714286,
+ "grad_norm": 29.80234718322754,
+ "learning_rate": 1.4514285714285713e-05,
+ "loss": 0.1972,
+ "step": 6464
+ },
+ {
+ "epoch": 36.94285714285714,
+ "grad_norm": 68.53266143798828,
+ "learning_rate": 1.450793650793651e-05,
+ "loss": 0.1938,
+ "step": 6465
+ },
+ {
+ "epoch": 36.94857142857143,
+ "grad_norm": 96.14524841308594,
+ "learning_rate": 1.4501587301587302e-05,
+ "loss": 0.1771,
+ "step": 6466
+ },
+ {
+ "epoch": 36.95428571428572,
+ "grad_norm": 31.954814910888672,
+ "learning_rate": 1.4495238095238095e-05,
+ "loss": 0.1835,
+ "step": 6467
+ },
+ {
+ "epoch": 36.96,
+ "grad_norm": 103.53461456298828,
+ "learning_rate": 1.448888888888889e-05,
+ "loss": 0.2957,
+ "step": 6468
+ },
+ {
+ "epoch": 36.965714285714284,
+ "grad_norm": 154.65966796875,
+ "learning_rate": 1.4482539682539683e-05,
+ "loss": 0.2546,
+ "step": 6469
+ },
+ {
+ "epoch": 36.97142857142857,
+ "grad_norm": 27.442543029785156,
+ "learning_rate": 1.4476190476190476e-05,
+ "loss": 0.3516,
+ "step": 6470
+ },
+ {
+ "epoch": 36.97714285714286,
+ "grad_norm": 21.102581024169922,
+ "learning_rate": 1.446984126984127e-05,
+ "loss": 0.1881,
+ "step": 6471
+ },
+ {
+ "epoch": 36.98285714285714,
+ "grad_norm": 140.93621826171875,
+ "learning_rate": 1.4463492063492065e-05,
+ "loss": 0.2462,
+ "step": 6472
+ },
+ {
+ "epoch": 36.988571428571426,
+ "grad_norm": 20.03640365600586,
+ "learning_rate": 1.4457142857142857e-05,
+ "loss": 0.1546,
+ "step": 6473
+ },
+ {
+ "epoch": 36.994285714285716,
+ "grad_norm": 24.340862274169922,
+ "learning_rate": 1.4450793650793651e-05,
+ "loss": 0.1989,
+ "step": 6474
+ },
+ {
+ "epoch": 37.0,
+ "grad_norm": 33.04745864868164,
+ "learning_rate": 1.4444444444444444e-05,
+ "loss": 0.2284,
+ "step": 6475
+ },
+ {
+ "epoch": 37.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5924772024154663,
+ "eval_map": 0.9348,
+ "eval_map_50": 0.9709,
+ "eval_map_75": 0.9658,
+ "eval_map_large": 0.9348,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9348,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7867,
+ "eval_mar_10": 0.9698,
+ "eval_mar_100": 0.9733,
+ "eval_mar_100_per_class": 0.9733,
+ "eval_mar_large": 0.9733,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.3816,
+ "eval_samples_per_second": 21.97,
+ "eval_steps_per_second": 2.765,
+ "step": 6475
+ },
+ {
+ "epoch": 37.005714285714284,
+ "grad_norm": 42.18199920654297,
+ "learning_rate": 1.443809523809524e-05,
+ "loss": 0.166,
+ "step": 6476
+ },
+ {
+ "epoch": 37.011428571428574,
+ "grad_norm": 33.291439056396484,
+ "learning_rate": 1.4431746031746033e-05,
+ "loss": 0.1766,
+ "step": 6477
+ },
+ {
+ "epoch": 37.01714285714286,
+ "grad_norm": 73.50809478759766,
+ "learning_rate": 1.4425396825396825e-05,
+ "loss": 0.2126,
+ "step": 6478
+ },
+ {
+ "epoch": 37.02285714285714,
+ "grad_norm": 102.65695190429688,
+ "learning_rate": 1.4419047619047621e-05,
+ "loss": 0.356,
+ "step": 6479
+ },
+ {
+ "epoch": 37.02857142857143,
+ "grad_norm": 486.31036376953125,
+ "learning_rate": 1.4412698412698414e-05,
+ "loss": 0.2182,
+ "step": 6480
+ },
+ {
+ "epoch": 37.034285714285716,
+ "grad_norm": 32.039756774902344,
+ "learning_rate": 1.4406349206349207e-05,
+ "loss": 0.1329,
+ "step": 6481
+ },
+ {
+ "epoch": 37.04,
+ "grad_norm": 83.30311584472656,
+ "learning_rate": 1.44e-05,
+ "loss": 0.1776,
+ "step": 6482
+ },
+ {
+ "epoch": 37.04571428571428,
+ "grad_norm": 42.36127853393555,
+ "learning_rate": 1.4393650793650795e-05,
+ "loss": 0.1668,
+ "step": 6483
+ },
+ {
+ "epoch": 37.05142857142857,
+ "grad_norm": 57.76752853393555,
+ "learning_rate": 1.4387301587301588e-05,
+ "loss": 0.2634,
+ "step": 6484
+ },
+ {
+ "epoch": 37.05714285714286,
+ "grad_norm": 25.50920295715332,
+ "learning_rate": 1.438095238095238e-05,
+ "loss": 0.1957,
+ "step": 6485
+ },
+ {
+ "epoch": 37.06285714285714,
+ "grad_norm": 34.5374641418457,
+ "learning_rate": 1.4374603174603173e-05,
+ "loss": 0.1514,
+ "step": 6486
+ },
+ {
+ "epoch": 37.06857142857143,
+ "grad_norm": 64.24982452392578,
+ "learning_rate": 1.436825396825397e-05,
+ "loss": 0.1747,
+ "step": 6487
+ },
+ {
+ "epoch": 37.074285714285715,
+ "grad_norm": 33.088592529296875,
+ "learning_rate": 1.4361904761904762e-05,
+ "loss": 0.1769,
+ "step": 6488
+ },
+ {
+ "epoch": 37.08,
+ "grad_norm": 48.71031188964844,
+ "learning_rate": 1.4355555555555556e-05,
+ "loss": 0.2206,
+ "step": 6489
+ },
+ {
+ "epoch": 37.08571428571429,
+ "grad_norm": 46.32160186767578,
+ "learning_rate": 1.434920634920635e-05,
+ "loss": 0.1781,
+ "step": 6490
+ },
+ {
+ "epoch": 37.09142857142857,
+ "grad_norm": 57.94861602783203,
+ "learning_rate": 1.4342857142857143e-05,
+ "loss": 0.1981,
+ "step": 6491
+ },
+ {
+ "epoch": 37.097142857142856,
+ "grad_norm": 33.29441833496094,
+ "learning_rate": 1.4336507936507938e-05,
+ "loss": 0.1875,
+ "step": 6492
+ },
+ {
+ "epoch": 37.10285714285714,
+ "grad_norm": 85.9415512084961,
+ "learning_rate": 1.433015873015873e-05,
+ "loss": 0.2358,
+ "step": 6493
+ },
+ {
+ "epoch": 37.10857142857143,
+ "grad_norm": 30.147438049316406,
+ "learning_rate": 1.4323809523809526e-05,
+ "loss": 0.1804,
+ "step": 6494
+ },
+ {
+ "epoch": 37.114285714285714,
+ "grad_norm": 70.6728744506836,
+ "learning_rate": 1.4317460317460319e-05,
+ "loss": 0.1938,
+ "step": 6495
+ },
+ {
+ "epoch": 37.12,
+ "grad_norm": 45.146568298339844,
+ "learning_rate": 1.4311111111111111e-05,
+ "loss": 0.1775,
+ "step": 6496
+ },
+ {
+ "epoch": 37.12571428571429,
+ "grad_norm": 25.29806137084961,
+ "learning_rate": 1.4304761904761904e-05,
+ "loss": 0.1952,
+ "step": 6497
+ },
+ {
+ "epoch": 37.13142857142857,
+ "grad_norm": 62.408451080322266,
+ "learning_rate": 1.42984126984127e-05,
+ "loss": 0.246,
+ "step": 6498
+ },
+ {
+ "epoch": 37.137142857142855,
+ "grad_norm": 56.218135833740234,
+ "learning_rate": 1.4292063492063493e-05,
+ "loss": 0.2028,
+ "step": 6499
+ },
+ {
+ "epoch": 37.142857142857146,
+ "grad_norm": 30.245328903198242,
+ "learning_rate": 1.4285714285714285e-05,
+ "loss": 0.16,
+ "step": 6500
+ },
+ {
+ "epoch": 37.14857142857143,
+ "grad_norm": 40.37526321411133,
+ "learning_rate": 1.4279365079365081e-05,
+ "loss": 0.1785,
+ "step": 6501
+ },
+ {
+ "epoch": 37.15428571428571,
+ "grad_norm": 28.770414352416992,
+ "learning_rate": 1.4273015873015874e-05,
+ "loss": 0.2346,
+ "step": 6502
+ },
+ {
+ "epoch": 37.16,
+ "grad_norm": 47.9056510925293,
+ "learning_rate": 1.4266666666666667e-05,
+ "loss": 0.2143,
+ "step": 6503
+ },
+ {
+ "epoch": 37.16571428571429,
+ "grad_norm": 35.36516189575195,
+ "learning_rate": 1.426031746031746e-05,
+ "loss": 0.2356,
+ "step": 6504
+ },
+ {
+ "epoch": 37.17142857142857,
+ "grad_norm": 30.85645294189453,
+ "learning_rate": 1.4253968253968255e-05,
+ "loss": 0.2061,
+ "step": 6505
+ },
+ {
+ "epoch": 37.177142857142854,
+ "grad_norm": 21.12093162536621,
+ "learning_rate": 1.4247619047619048e-05,
+ "loss": 0.1627,
+ "step": 6506
+ },
+ {
+ "epoch": 37.182857142857145,
+ "grad_norm": 38.55308532714844,
+ "learning_rate": 1.424126984126984e-05,
+ "loss": 0.1425,
+ "step": 6507
+ },
+ {
+ "epoch": 37.18857142857143,
+ "grad_norm": 22.23093032836914,
+ "learning_rate": 1.4234920634920635e-05,
+ "loss": 0.2234,
+ "step": 6508
+ },
+ {
+ "epoch": 37.19428571428571,
+ "grad_norm": 138.44908142089844,
+ "learning_rate": 1.422857142857143e-05,
+ "loss": 0.2175,
+ "step": 6509
+ },
+ {
+ "epoch": 37.2,
+ "grad_norm": 29.336397171020508,
+ "learning_rate": 1.4222222222222224e-05,
+ "loss": 0.2913,
+ "step": 6510
+ },
+ {
+ "epoch": 37.205714285714286,
+ "grad_norm": 114.45040130615234,
+ "learning_rate": 1.4215873015873016e-05,
+ "loss": 0.2199,
+ "step": 6511
+ },
+ {
+ "epoch": 37.21142857142857,
+ "grad_norm": 35.597721099853516,
+ "learning_rate": 1.420952380952381e-05,
+ "loss": 0.2706,
+ "step": 6512
+ },
+ {
+ "epoch": 37.21714285714286,
+ "grad_norm": 72.16755676269531,
+ "learning_rate": 1.4203174603174605e-05,
+ "loss": 0.1653,
+ "step": 6513
+ },
+ {
+ "epoch": 37.222857142857144,
+ "grad_norm": 27.48973274230957,
+ "learning_rate": 1.4196825396825398e-05,
+ "loss": 0.2711,
+ "step": 6514
+ },
+ {
+ "epoch": 37.22857142857143,
+ "grad_norm": 44.54720687866211,
+ "learning_rate": 1.419047619047619e-05,
+ "loss": 0.1249,
+ "step": 6515
+ },
+ {
+ "epoch": 37.23428571428571,
+ "grad_norm": 48.198299407958984,
+ "learning_rate": 1.4184126984126986e-05,
+ "loss": 0.1662,
+ "step": 6516
+ },
+ {
+ "epoch": 37.24,
+ "grad_norm": 49.05499267578125,
+ "learning_rate": 1.4177777777777779e-05,
+ "loss": 0.1691,
+ "step": 6517
+ },
+ {
+ "epoch": 37.245714285714286,
+ "grad_norm": 128.78123474121094,
+ "learning_rate": 1.4171428571428572e-05,
+ "loss": 0.2054,
+ "step": 6518
+ },
+ {
+ "epoch": 37.25142857142857,
+ "grad_norm": 56.933284759521484,
+ "learning_rate": 1.4165079365079364e-05,
+ "loss": 0.2684,
+ "step": 6519
+ },
+ {
+ "epoch": 37.25714285714286,
+ "grad_norm": 21.544979095458984,
+ "learning_rate": 1.415873015873016e-05,
+ "loss": 0.1621,
+ "step": 6520
+ },
+ {
+ "epoch": 37.26285714285714,
+ "grad_norm": 63.549198150634766,
+ "learning_rate": 1.4152380952380953e-05,
+ "loss": 0.1631,
+ "step": 6521
+ },
+ {
+ "epoch": 37.26857142857143,
+ "grad_norm": 80.13479614257812,
+ "learning_rate": 1.4146031746031745e-05,
+ "loss": 0.2853,
+ "step": 6522
+ },
+ {
+ "epoch": 37.27428571428572,
+ "grad_norm": 28.332656860351562,
+ "learning_rate": 1.4139682539682541e-05,
+ "loss": 0.1501,
+ "step": 6523
+ },
+ {
+ "epoch": 37.28,
+ "grad_norm": 33.4707145690918,
+ "learning_rate": 1.4133333333333334e-05,
+ "loss": 0.358,
+ "step": 6524
+ },
+ {
+ "epoch": 37.285714285714285,
+ "grad_norm": 40.02132797241211,
+ "learning_rate": 1.4126984126984127e-05,
+ "loss": 0.2312,
+ "step": 6525
+ },
+ {
+ "epoch": 37.29142857142857,
+ "grad_norm": 22.119464874267578,
+ "learning_rate": 1.4120634920634921e-05,
+ "loss": 0.176,
+ "step": 6526
+ },
+ {
+ "epoch": 37.29714285714286,
+ "grad_norm": 48.0070686340332,
+ "learning_rate": 1.4114285714285715e-05,
+ "loss": 0.2432,
+ "step": 6527
+ },
+ {
+ "epoch": 37.30285714285714,
+ "grad_norm": 40.382179260253906,
+ "learning_rate": 1.410793650793651e-05,
+ "loss": 0.1805,
+ "step": 6528
+ },
+ {
+ "epoch": 37.308571428571426,
+ "grad_norm": 67.17555236816406,
+ "learning_rate": 1.4101587301587302e-05,
+ "loss": 0.2174,
+ "step": 6529
+ },
+ {
+ "epoch": 37.31428571428572,
+ "grad_norm": 35.86476516723633,
+ "learning_rate": 1.4095238095238095e-05,
+ "loss": 0.2377,
+ "step": 6530
+ },
+ {
+ "epoch": 37.32,
+ "grad_norm": 1549.185791015625,
+ "learning_rate": 1.4088888888888891e-05,
+ "loss": 0.1954,
+ "step": 6531
+ },
+ {
+ "epoch": 37.325714285714284,
+ "grad_norm": 22.005220413208008,
+ "learning_rate": 1.4082539682539684e-05,
+ "loss": 0.2031,
+ "step": 6532
+ },
+ {
+ "epoch": 37.331428571428575,
+ "grad_norm": 84.3892593383789,
+ "learning_rate": 1.4076190476190476e-05,
+ "loss": 0.1654,
+ "step": 6533
+ },
+ {
+ "epoch": 37.33714285714286,
+ "grad_norm": 34.993690490722656,
+ "learning_rate": 1.4069841269841272e-05,
+ "loss": 0.1242,
+ "step": 6534
+ },
+ {
+ "epoch": 37.34285714285714,
+ "grad_norm": 70.59294891357422,
+ "learning_rate": 1.4063492063492065e-05,
+ "loss": 0.1917,
+ "step": 6535
+ },
+ {
+ "epoch": 37.348571428571425,
+ "grad_norm": 85.08328247070312,
+ "learning_rate": 1.4057142857142858e-05,
+ "loss": 0.1536,
+ "step": 6536
+ },
+ {
+ "epoch": 37.354285714285716,
+ "grad_norm": 42.231712341308594,
+ "learning_rate": 1.405079365079365e-05,
+ "loss": 0.1806,
+ "step": 6537
+ },
+ {
+ "epoch": 37.36,
+ "grad_norm": 29.59331512451172,
+ "learning_rate": 1.4044444444444446e-05,
+ "loss": 0.271,
+ "step": 6538
+ },
+ {
+ "epoch": 37.36571428571428,
+ "grad_norm": 62.4417610168457,
+ "learning_rate": 1.4038095238095239e-05,
+ "loss": 0.2398,
+ "step": 6539
+ },
+ {
+ "epoch": 37.371428571428574,
+ "grad_norm": 733.9818725585938,
+ "learning_rate": 1.4031746031746032e-05,
+ "loss": 0.1833,
+ "step": 6540
+ },
+ {
+ "epoch": 37.37714285714286,
+ "grad_norm": 68.64158630371094,
+ "learning_rate": 1.4025396825396824e-05,
+ "loss": 0.193,
+ "step": 6541
+ },
+ {
+ "epoch": 37.38285714285714,
+ "grad_norm": 127.94831085205078,
+ "learning_rate": 1.401904761904762e-05,
+ "loss": 0.2419,
+ "step": 6542
+ },
+ {
+ "epoch": 37.38857142857143,
+ "grad_norm": 79.00579071044922,
+ "learning_rate": 1.4012698412698413e-05,
+ "loss": 0.218,
+ "step": 6543
+ },
+ {
+ "epoch": 37.394285714285715,
+ "grad_norm": 45.025455474853516,
+ "learning_rate": 1.4006349206349207e-05,
+ "loss": 0.2585,
+ "step": 6544
+ },
+ {
+ "epoch": 37.4,
+ "grad_norm": 48.02553939819336,
+ "learning_rate": 1.4000000000000001e-05,
+ "loss": 0.2066,
+ "step": 6545
+ },
+ {
+ "epoch": 37.40571428571428,
+ "grad_norm": 80.67889404296875,
+ "learning_rate": 1.3993650793650794e-05,
+ "loss": 0.1473,
+ "step": 6546
+ },
+ {
+ "epoch": 37.41142857142857,
+ "grad_norm": 680.8949584960938,
+ "learning_rate": 1.3987301587301588e-05,
+ "loss": 0.2644,
+ "step": 6547
+ },
+ {
+ "epoch": 37.417142857142856,
+ "grad_norm": 35.802101135253906,
+ "learning_rate": 1.3980952380952381e-05,
+ "loss": 0.1428,
+ "step": 6548
+ },
+ {
+ "epoch": 37.42285714285714,
+ "grad_norm": 28.695444107055664,
+ "learning_rate": 1.3974603174603177e-05,
+ "loss": 0.2259,
+ "step": 6549
+ },
+ {
+ "epoch": 37.42857142857143,
+ "grad_norm": 43.1397705078125,
+ "learning_rate": 1.396825396825397e-05,
+ "loss": 0.3066,
+ "step": 6550
+ },
+ {
+ "epoch": 37.434285714285714,
+ "grad_norm": 45.93376922607422,
+ "learning_rate": 1.3961904761904762e-05,
+ "loss": 0.1508,
+ "step": 6551
+ },
+ {
+ "epoch": 37.44,
+ "grad_norm": 67.8795394897461,
+ "learning_rate": 1.3955555555555555e-05,
+ "loss": 0.1917,
+ "step": 6552
+ },
+ {
+ "epoch": 37.44571428571429,
+ "grad_norm": 22.79017448425293,
+ "learning_rate": 1.3949206349206351e-05,
+ "loss": 0.1577,
+ "step": 6553
+ },
+ {
+ "epoch": 37.45142857142857,
+ "grad_norm": 328.4523620605469,
+ "learning_rate": 1.3942857142857144e-05,
+ "loss": 0.2288,
+ "step": 6554
+ },
+ {
+ "epoch": 37.457142857142856,
+ "grad_norm": 57.735652923583984,
+ "learning_rate": 1.3936507936507936e-05,
+ "loss": 0.1693,
+ "step": 6555
+ },
+ {
+ "epoch": 37.462857142857146,
+ "grad_norm": 46.90785217285156,
+ "learning_rate": 1.3930158730158732e-05,
+ "loss": 0.1789,
+ "step": 6556
+ },
+ {
+ "epoch": 37.46857142857143,
+ "grad_norm": 28.03649139404297,
+ "learning_rate": 1.3923809523809525e-05,
+ "loss": 0.2141,
+ "step": 6557
+ },
+ {
+ "epoch": 37.47428571428571,
+ "grad_norm": 76.3325424194336,
+ "learning_rate": 1.3917460317460318e-05,
+ "loss": 0.1989,
+ "step": 6558
+ },
+ {
+ "epoch": 37.48,
+ "grad_norm": 40.22255325317383,
+ "learning_rate": 1.391111111111111e-05,
+ "loss": 0.2684,
+ "step": 6559
+ },
+ {
+ "epoch": 37.48571428571429,
+ "grad_norm": 47.31599044799805,
+ "learning_rate": 1.3904761904761906e-05,
+ "loss": 0.2641,
+ "step": 6560
+ },
+ {
+ "epoch": 37.49142857142857,
+ "grad_norm": 96.81246185302734,
+ "learning_rate": 1.3898412698412699e-05,
+ "loss": 0.2587,
+ "step": 6561
+ },
+ {
+ "epoch": 37.497142857142855,
+ "grad_norm": 62.16279983520508,
+ "learning_rate": 1.3892063492063492e-05,
+ "loss": 0.2024,
+ "step": 6562
+ },
+ {
+ "epoch": 37.502857142857145,
+ "grad_norm": 62.037052154541016,
+ "learning_rate": 1.3885714285714286e-05,
+ "loss": 0.1812,
+ "step": 6563
+ },
+ {
+ "epoch": 37.50857142857143,
+ "grad_norm": 25.21396255493164,
+ "learning_rate": 1.387936507936508e-05,
+ "loss": 0.2129,
+ "step": 6564
+ },
+ {
+ "epoch": 37.51428571428571,
+ "grad_norm": 77.27635192871094,
+ "learning_rate": 1.3873015873015875e-05,
+ "loss": 0.2085,
+ "step": 6565
+ },
+ {
+ "epoch": 37.52,
+ "grad_norm": 54.68073272705078,
+ "learning_rate": 1.3866666666666667e-05,
+ "loss": 0.2428,
+ "step": 6566
+ },
+ {
+ "epoch": 37.52571428571429,
+ "grad_norm": 62.48573303222656,
+ "learning_rate": 1.3860317460317463e-05,
+ "loss": 0.194,
+ "step": 6567
+ },
+ {
+ "epoch": 37.53142857142857,
+ "grad_norm": 17.646411895751953,
+ "learning_rate": 1.3853968253968256e-05,
+ "loss": 0.1942,
+ "step": 6568
+ },
+ {
+ "epoch": 37.537142857142854,
+ "grad_norm": 18.57720184326172,
+ "learning_rate": 1.3847619047619048e-05,
+ "loss": 0.1564,
+ "step": 6569
+ },
+ {
+ "epoch": 37.542857142857144,
+ "grad_norm": 48.49040985107422,
+ "learning_rate": 1.3841269841269841e-05,
+ "loss": 0.1695,
+ "step": 6570
+ },
+ {
+ "epoch": 37.54857142857143,
+ "grad_norm": 33.05989456176758,
+ "learning_rate": 1.3834920634920637e-05,
+ "loss": 0.2705,
+ "step": 6571
+ },
+ {
+ "epoch": 37.55428571428571,
+ "grad_norm": 85.05858612060547,
+ "learning_rate": 1.382857142857143e-05,
+ "loss": 0.1861,
+ "step": 6572
+ },
+ {
+ "epoch": 37.56,
+ "grad_norm": 39.465545654296875,
+ "learning_rate": 1.3822222222222222e-05,
+ "loss": 0.2664,
+ "step": 6573
+ },
+ {
+ "epoch": 37.565714285714286,
+ "grad_norm": 26.797693252563477,
+ "learning_rate": 1.3815873015873015e-05,
+ "loss": 0.1232,
+ "step": 6574
+ },
+ {
+ "epoch": 37.57142857142857,
+ "grad_norm": 48.3971061706543,
+ "learning_rate": 1.3809523809523811e-05,
+ "loss": 0.2674,
+ "step": 6575
+ },
+ {
+ "epoch": 37.57714285714286,
+ "grad_norm": 72.57957458496094,
+ "learning_rate": 1.3803174603174604e-05,
+ "loss": 0.2684,
+ "step": 6576
+ },
+ {
+ "epoch": 37.582857142857144,
+ "grad_norm": 61.59852981567383,
+ "learning_rate": 1.3796825396825396e-05,
+ "loss": 0.2086,
+ "step": 6577
+ },
+ {
+ "epoch": 37.58857142857143,
+ "grad_norm": 57.468589782714844,
+ "learning_rate": 1.3790476190476189e-05,
+ "loss": 0.2249,
+ "step": 6578
+ },
+ {
+ "epoch": 37.59428571428572,
+ "grad_norm": 127.7385482788086,
+ "learning_rate": 1.3784126984126985e-05,
+ "loss": 0.2528,
+ "step": 6579
+ },
+ {
+ "epoch": 37.6,
+ "grad_norm": 43.52939224243164,
+ "learning_rate": 1.3777777777777778e-05,
+ "loss": 0.2149,
+ "step": 6580
+ },
+ {
+ "epoch": 37.605714285714285,
+ "grad_norm": 73.5933609008789,
+ "learning_rate": 1.3771428571428572e-05,
+ "loss": 0.2148,
+ "step": 6581
+ },
+ {
+ "epoch": 37.61142857142857,
+ "grad_norm": 32.99773025512695,
+ "learning_rate": 1.3765079365079366e-05,
+ "loss": 0.2071,
+ "step": 6582
+ },
+ {
+ "epoch": 37.61714285714286,
+ "grad_norm": 39.80093002319336,
+ "learning_rate": 1.375873015873016e-05,
+ "loss": 0.239,
+ "step": 6583
+ },
+ {
+ "epoch": 37.62285714285714,
+ "grad_norm": 23.65448570251465,
+ "learning_rate": 1.3752380952380953e-05,
+ "loss": 0.1962,
+ "step": 6584
+ },
+ {
+ "epoch": 37.628571428571426,
+ "grad_norm": 63.404449462890625,
+ "learning_rate": 1.3746031746031746e-05,
+ "loss": 0.2124,
+ "step": 6585
+ },
+ {
+ "epoch": 37.63428571428572,
+ "grad_norm": 40.21116256713867,
+ "learning_rate": 1.3739682539682542e-05,
+ "loss": 0.1729,
+ "step": 6586
+ },
+ {
+ "epoch": 37.64,
+ "grad_norm": 116.45388793945312,
+ "learning_rate": 1.3733333333333335e-05,
+ "loss": 0.3224,
+ "step": 6587
+ },
+ {
+ "epoch": 37.645714285714284,
+ "grad_norm": 38.228187561035156,
+ "learning_rate": 1.3726984126984127e-05,
+ "loss": 0.2296,
+ "step": 6588
+ },
+ {
+ "epoch": 37.651428571428575,
+ "grad_norm": 26.770566940307617,
+ "learning_rate": 1.372063492063492e-05,
+ "loss": 0.2197,
+ "step": 6589
+ },
+ {
+ "epoch": 37.65714285714286,
+ "grad_norm": 71.01809692382812,
+ "learning_rate": 1.3714285714285716e-05,
+ "loss": 0.114,
+ "step": 6590
+ },
+ {
+ "epoch": 37.66285714285714,
+ "grad_norm": 61.19960021972656,
+ "learning_rate": 1.3707936507936508e-05,
+ "loss": 0.2393,
+ "step": 6591
+ },
+ {
+ "epoch": 37.668571428571425,
+ "grad_norm": 69.12433624267578,
+ "learning_rate": 1.3701587301587301e-05,
+ "loss": 0.2768,
+ "step": 6592
+ },
+ {
+ "epoch": 37.674285714285716,
+ "grad_norm": 28.768465042114258,
+ "learning_rate": 1.3695238095238097e-05,
+ "loss": 0.1579,
+ "step": 6593
+ },
+ {
+ "epoch": 37.68,
+ "grad_norm": 71.30499267578125,
+ "learning_rate": 1.368888888888889e-05,
+ "loss": 0.2484,
+ "step": 6594
+ },
+ {
+ "epoch": 37.68571428571428,
+ "grad_norm": 37.34684371948242,
+ "learning_rate": 1.3682539682539682e-05,
+ "loss": 0.1406,
+ "step": 6595
+ },
+ {
+ "epoch": 37.691428571428574,
+ "grad_norm": 36.19123077392578,
+ "learning_rate": 1.3676190476190475e-05,
+ "loss": 0.1883,
+ "step": 6596
+ },
+ {
+ "epoch": 37.69714285714286,
+ "grad_norm": 31.692096710205078,
+ "learning_rate": 1.3669841269841271e-05,
+ "loss": 0.1799,
+ "step": 6597
+ },
+ {
+ "epoch": 37.70285714285714,
+ "grad_norm": 23.94115447998047,
+ "learning_rate": 1.3663492063492064e-05,
+ "loss": 0.2304,
+ "step": 6598
+ },
+ {
+ "epoch": 37.70857142857143,
+ "grad_norm": 67.50762939453125,
+ "learning_rate": 1.3657142857142858e-05,
+ "loss": 0.2755,
+ "step": 6599
+ },
+ {
+ "epoch": 37.714285714285715,
+ "grad_norm": 62.673072814941406,
+ "learning_rate": 1.365079365079365e-05,
+ "loss": 0.1291,
+ "step": 6600
+ },
+ {
+ "epoch": 37.72,
+ "grad_norm": 25.134071350097656,
+ "learning_rate": 1.3644444444444445e-05,
+ "loss": 0.2184,
+ "step": 6601
+ },
+ {
+ "epoch": 37.72571428571428,
+ "grad_norm": 48.198097229003906,
+ "learning_rate": 1.363809523809524e-05,
+ "loss": 0.1866,
+ "step": 6602
+ },
+ {
+ "epoch": 37.73142857142857,
+ "grad_norm": 68.64222717285156,
+ "learning_rate": 1.3631746031746032e-05,
+ "loss": 0.2145,
+ "step": 6603
+ },
+ {
+ "epoch": 37.73714285714286,
+ "grad_norm": 51.29213333129883,
+ "learning_rate": 1.3625396825396828e-05,
+ "loss": 0.1511,
+ "step": 6604
+ },
+ {
+ "epoch": 37.74285714285714,
+ "grad_norm": 37.644134521484375,
+ "learning_rate": 1.361904761904762e-05,
+ "loss": 0.3308,
+ "step": 6605
+ },
+ {
+ "epoch": 37.74857142857143,
+ "grad_norm": 25.032150268554688,
+ "learning_rate": 1.3612698412698413e-05,
+ "loss": 0.2497,
+ "step": 6606
+ },
+ {
+ "epoch": 37.754285714285714,
+ "grad_norm": 88.25428009033203,
+ "learning_rate": 1.3606349206349206e-05,
+ "loss": 0.1934,
+ "step": 6607
+ },
+ {
+ "epoch": 37.76,
+ "grad_norm": 30.80008316040039,
+ "learning_rate": 1.3600000000000002e-05,
+ "loss": 0.143,
+ "step": 6608
+ },
+ {
+ "epoch": 37.76571428571429,
+ "grad_norm": 51.51103973388672,
+ "learning_rate": 1.3593650793650795e-05,
+ "loss": 0.1618,
+ "step": 6609
+ },
+ {
+ "epoch": 37.77142857142857,
+ "grad_norm": 38.2564811706543,
+ "learning_rate": 1.3587301587301587e-05,
+ "loss": 0.2607,
+ "step": 6610
+ },
+ {
+ "epoch": 37.777142857142856,
+ "grad_norm": 51.70465087890625,
+ "learning_rate": 1.358095238095238e-05,
+ "loss": 0.2431,
+ "step": 6611
+ },
+ {
+ "epoch": 37.78285714285714,
+ "grad_norm": 39.037296295166016,
+ "learning_rate": 1.3574603174603176e-05,
+ "loss": 0.1339,
+ "step": 6612
+ },
+ {
+ "epoch": 37.78857142857143,
+ "grad_norm": 143.49896240234375,
+ "learning_rate": 1.3568253968253968e-05,
+ "loss": 0.1926,
+ "step": 6613
+ },
+ {
+ "epoch": 37.794285714285714,
+ "grad_norm": 82.4721450805664,
+ "learning_rate": 1.3561904761904761e-05,
+ "loss": 0.1531,
+ "step": 6614
+ },
+ {
+ "epoch": 37.8,
+ "grad_norm": 30.966434478759766,
+ "learning_rate": 1.3555555555555557e-05,
+ "loss": 0.1682,
+ "step": 6615
+ },
+ {
+ "epoch": 37.80571428571429,
+ "grad_norm": 133.61900329589844,
+ "learning_rate": 1.354920634920635e-05,
+ "loss": 0.1517,
+ "step": 6616
+ },
+ {
+ "epoch": 37.81142857142857,
+ "grad_norm": 77.1509017944336,
+ "learning_rate": 1.3542857142857142e-05,
+ "loss": 0.2169,
+ "step": 6617
+ },
+ {
+ "epoch": 37.817142857142855,
+ "grad_norm": 51.897560119628906,
+ "learning_rate": 1.3536507936507937e-05,
+ "loss": 0.1983,
+ "step": 6618
+ },
+ {
+ "epoch": 37.822857142857146,
+ "grad_norm": 38.72932434082031,
+ "learning_rate": 1.3530158730158731e-05,
+ "loss": 0.2047,
+ "step": 6619
+ },
+ {
+ "epoch": 37.82857142857143,
+ "grad_norm": 55.455074310302734,
+ "learning_rate": 1.3523809523809525e-05,
+ "loss": 0.1845,
+ "step": 6620
+ },
+ {
+ "epoch": 37.83428571428571,
+ "grad_norm": 52.98893356323242,
+ "learning_rate": 1.3517460317460318e-05,
+ "loss": 0.1929,
+ "step": 6621
+ },
+ {
+ "epoch": 37.84,
+ "grad_norm": 89.14239501953125,
+ "learning_rate": 1.351111111111111e-05,
+ "loss": 0.2095,
+ "step": 6622
+ },
+ {
+ "epoch": 37.84571428571429,
+ "grad_norm": 28.368410110473633,
+ "learning_rate": 1.3504761904761907e-05,
+ "loss": 0.126,
+ "step": 6623
+ },
+ {
+ "epoch": 37.85142857142857,
+ "grad_norm": 34.06582260131836,
+ "learning_rate": 1.34984126984127e-05,
+ "loss": 0.1847,
+ "step": 6624
+ },
+ {
+ "epoch": 37.857142857142854,
+ "grad_norm": 71.48139190673828,
+ "learning_rate": 1.3492063492063492e-05,
+ "loss": 0.1883,
+ "step": 6625
+ },
+ {
+ "epoch": 37.862857142857145,
+ "grad_norm": 69.56444549560547,
+ "learning_rate": 1.3485714285714288e-05,
+ "loss": 0.219,
+ "step": 6626
+ },
+ {
+ "epoch": 37.86857142857143,
+ "grad_norm": 29.609966278076172,
+ "learning_rate": 1.347936507936508e-05,
+ "loss": 0.1876,
+ "step": 6627
+ },
+ {
+ "epoch": 37.87428571428571,
+ "grad_norm": 18.406742095947266,
+ "learning_rate": 1.3473015873015873e-05,
+ "loss": 0.1945,
+ "step": 6628
+ },
+ {
+ "epoch": 37.88,
+ "grad_norm": 46.326324462890625,
+ "learning_rate": 1.3466666666666666e-05,
+ "loss": 0.2001,
+ "step": 6629
+ },
+ {
+ "epoch": 37.885714285714286,
+ "grad_norm": 54.06023406982422,
+ "learning_rate": 1.3460317460317462e-05,
+ "loss": 0.2189,
+ "step": 6630
+ },
+ {
+ "epoch": 37.89142857142857,
+ "grad_norm": 17.997806549072266,
+ "learning_rate": 1.3453968253968255e-05,
+ "loss": 0.2359,
+ "step": 6631
+ },
+ {
+ "epoch": 37.89714285714286,
+ "grad_norm": 32.82901382446289,
+ "learning_rate": 1.3447619047619047e-05,
+ "loss": 0.1972,
+ "step": 6632
+ },
+ {
+ "epoch": 37.902857142857144,
+ "grad_norm": 86.90510559082031,
+ "learning_rate": 1.344126984126984e-05,
+ "loss": 0.2213,
+ "step": 6633
+ },
+ {
+ "epoch": 37.90857142857143,
+ "grad_norm": 28.77446937561035,
+ "learning_rate": 1.3434920634920636e-05,
+ "loss": 0.1452,
+ "step": 6634
+ },
+ {
+ "epoch": 37.91428571428571,
+ "grad_norm": 41.217227935791016,
+ "learning_rate": 1.3428571428571429e-05,
+ "loss": 0.1812,
+ "step": 6635
+ },
+ {
+ "epoch": 37.92,
+ "grad_norm": 85.83415222167969,
+ "learning_rate": 1.3422222222222223e-05,
+ "loss": 0.2149,
+ "step": 6636
+ },
+ {
+ "epoch": 37.925714285714285,
+ "grad_norm": 32.781455993652344,
+ "learning_rate": 1.3415873015873017e-05,
+ "loss": 0.1277,
+ "step": 6637
+ },
+ {
+ "epoch": 37.93142857142857,
+ "grad_norm": 33.95721435546875,
+ "learning_rate": 1.3409523809523811e-05,
+ "loss": 0.1987,
+ "step": 6638
+ },
+ {
+ "epoch": 37.93714285714286,
+ "grad_norm": 277.95037841796875,
+ "learning_rate": 1.3403174603174604e-05,
+ "loss": 0.2591,
+ "step": 6639
+ },
+ {
+ "epoch": 37.94285714285714,
+ "grad_norm": 126.31138610839844,
+ "learning_rate": 1.3396825396825397e-05,
+ "loss": 0.2452,
+ "step": 6640
+ },
+ {
+ "epoch": 37.94857142857143,
+ "grad_norm": 55.9384880065918,
+ "learning_rate": 1.3390476190476193e-05,
+ "loss": 0.1754,
+ "step": 6641
+ },
+ {
+ "epoch": 37.95428571428572,
+ "grad_norm": 43.94575881958008,
+ "learning_rate": 1.3384126984126985e-05,
+ "loss": 0.4073,
+ "step": 6642
+ },
+ {
+ "epoch": 37.96,
+ "grad_norm": 36.381553649902344,
+ "learning_rate": 1.3377777777777778e-05,
+ "loss": 0.1684,
+ "step": 6643
+ },
+ {
+ "epoch": 37.965714285714284,
+ "grad_norm": 25.80427360534668,
+ "learning_rate": 1.337142857142857e-05,
+ "loss": 0.1604,
+ "step": 6644
+ },
+ {
+ "epoch": 37.97142857142857,
+ "grad_norm": 40.44108200073242,
+ "learning_rate": 1.3365079365079367e-05,
+ "loss": 0.1649,
+ "step": 6645
+ },
+ {
+ "epoch": 37.97714285714286,
+ "grad_norm": 60.07323455810547,
+ "learning_rate": 1.335873015873016e-05,
+ "loss": 0.1774,
+ "step": 6646
+ },
+ {
+ "epoch": 37.98285714285714,
+ "grad_norm": 39.3565559387207,
+ "learning_rate": 1.3352380952380952e-05,
+ "loss": 0.2106,
+ "step": 6647
+ },
+ {
+ "epoch": 37.988571428571426,
+ "grad_norm": 23.168386459350586,
+ "learning_rate": 1.3346031746031748e-05,
+ "loss": 0.1758,
+ "step": 6648
+ },
+ {
+ "epoch": 37.994285714285716,
+ "grad_norm": 28.189754486083984,
+ "learning_rate": 1.333968253968254e-05,
+ "loss": 0.1882,
+ "step": 6649
+ },
+ {
+ "epoch": 38.0,
+ "grad_norm": 62.98371124267578,
+ "learning_rate": 1.3333333333333333e-05,
+ "loss": 0.254,
+ "step": 6650
+ },
+ {
+ "epoch": 38.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5931137800216675,
+ "eval_map": 0.9322,
+ "eval_map_50": 0.9651,
+ "eval_map_75": 0.9605,
+ "eval_map_large": 0.9322,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9322,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7854,
+ "eval_mar_10": 0.9737,
+ "eval_mar_100": 0.9746,
+ "eval_mar_100_per_class": 0.9746,
+ "eval_mar_large": 0.9746,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 12.7097,
+ "eval_samples_per_second": 23.132,
+ "eval_steps_per_second": 2.911,
+ "step": 6650
+ },
+ {
+ "epoch": 38.005714285714284,
+ "grad_norm": 65.34095764160156,
+ "learning_rate": 1.3326984126984126e-05,
+ "loss": 0.2391,
+ "step": 6651
+ },
+ {
+ "epoch": 38.011428571428574,
+ "grad_norm": 44.28903579711914,
+ "learning_rate": 1.3320634920634922e-05,
+ "loss": 0.1608,
+ "step": 6652
+ },
+ {
+ "epoch": 38.01714285714286,
+ "grad_norm": 39.059871673583984,
+ "learning_rate": 1.3314285714285715e-05,
+ "loss": 0.1272,
+ "step": 6653
+ },
+ {
+ "epoch": 38.02285714285714,
+ "grad_norm": 99.69107818603516,
+ "learning_rate": 1.3307936507936509e-05,
+ "loss": 0.2719,
+ "step": 6654
+ },
+ {
+ "epoch": 38.02857142857143,
+ "grad_norm": 84.99723052978516,
+ "learning_rate": 1.3301587301587302e-05,
+ "loss": 0.2298,
+ "step": 6655
+ },
+ {
+ "epoch": 38.034285714285716,
+ "grad_norm": 50.17849349975586,
+ "learning_rate": 1.3295238095238096e-05,
+ "loss": 0.2187,
+ "step": 6656
+ },
+ {
+ "epoch": 38.04,
+ "grad_norm": 76.07999420166016,
+ "learning_rate": 1.328888888888889e-05,
+ "loss": 0.2283,
+ "step": 6657
+ },
+ {
+ "epoch": 38.04571428571428,
+ "grad_norm": 113.50109100341797,
+ "learning_rate": 1.3282539682539683e-05,
+ "loss": 0.1673,
+ "step": 6658
+ },
+ {
+ "epoch": 38.05142857142857,
+ "grad_norm": 31.31656265258789,
+ "learning_rate": 1.3276190476190479e-05,
+ "loss": 0.1588,
+ "step": 6659
+ },
+ {
+ "epoch": 38.05714285714286,
+ "grad_norm": 74.41105651855469,
+ "learning_rate": 1.3269841269841272e-05,
+ "loss": 0.2309,
+ "step": 6660
+ },
+ {
+ "epoch": 38.06285714285714,
+ "grad_norm": 160.22439575195312,
+ "learning_rate": 1.3263492063492064e-05,
+ "loss": 0.1336,
+ "step": 6661
+ },
+ {
+ "epoch": 38.06857142857143,
+ "grad_norm": 60.0269660949707,
+ "learning_rate": 1.3257142857142857e-05,
+ "loss": 0.1297,
+ "step": 6662
+ },
+ {
+ "epoch": 38.074285714285715,
+ "grad_norm": 58.124393463134766,
+ "learning_rate": 1.3250793650793653e-05,
+ "loss": 0.1698,
+ "step": 6663
+ },
+ {
+ "epoch": 38.08,
+ "grad_norm": 44.31272888183594,
+ "learning_rate": 1.3244444444444445e-05,
+ "loss": 0.2337,
+ "step": 6664
+ },
+ {
+ "epoch": 38.08571428571429,
+ "grad_norm": 34.844215393066406,
+ "learning_rate": 1.3238095238095238e-05,
+ "loss": 0.1659,
+ "step": 6665
+ },
+ {
+ "epoch": 38.09142857142857,
+ "grad_norm": 18.713459014892578,
+ "learning_rate": 1.323174603174603e-05,
+ "loss": 0.1737,
+ "step": 6666
+ },
+ {
+ "epoch": 38.097142857142856,
+ "grad_norm": 49.55125427246094,
+ "learning_rate": 1.3225396825396827e-05,
+ "loss": 0.1242,
+ "step": 6667
+ },
+ {
+ "epoch": 38.10285714285714,
+ "grad_norm": 62.112083435058594,
+ "learning_rate": 1.321904761904762e-05,
+ "loss": 0.3271,
+ "step": 6668
+ },
+ {
+ "epoch": 38.10857142857143,
+ "grad_norm": 24.108840942382812,
+ "learning_rate": 1.3212698412698412e-05,
+ "loss": 0.2122,
+ "step": 6669
+ },
+ {
+ "epoch": 38.114285714285714,
+ "grad_norm": 63.91169738769531,
+ "learning_rate": 1.3206349206349208e-05,
+ "loss": 0.1323,
+ "step": 6670
+ },
+ {
+ "epoch": 38.12,
+ "grad_norm": 52.37907028198242,
+ "learning_rate": 1.32e-05,
+ "loss": 0.201,
+ "step": 6671
+ },
+ {
+ "epoch": 38.12571428571429,
+ "grad_norm": 29.8988037109375,
+ "learning_rate": 1.3193650793650793e-05,
+ "loss": 0.2407,
+ "step": 6672
+ },
+ {
+ "epoch": 38.13142857142857,
+ "grad_norm": 20.84734344482422,
+ "learning_rate": 1.3187301587301588e-05,
+ "loss": 0.1484,
+ "step": 6673
+ },
+ {
+ "epoch": 38.137142857142855,
+ "grad_norm": 45.14535903930664,
+ "learning_rate": 1.3180952380952382e-05,
+ "loss": 0.2555,
+ "step": 6674
+ },
+ {
+ "epoch": 38.142857142857146,
+ "grad_norm": 1290.4554443359375,
+ "learning_rate": 1.3174603174603176e-05,
+ "loss": 0.192,
+ "step": 6675
+ },
+ {
+ "epoch": 38.14857142857143,
+ "grad_norm": 26.882118225097656,
+ "learning_rate": 1.3168253968253969e-05,
+ "loss": 0.1516,
+ "step": 6676
+ },
+ {
+ "epoch": 38.15428571428571,
+ "grad_norm": 56.91200637817383,
+ "learning_rate": 1.3161904761904762e-05,
+ "loss": 0.27,
+ "step": 6677
+ },
+ {
+ "epoch": 38.16,
+ "grad_norm": 47.5983772277832,
+ "learning_rate": 1.3155555555555558e-05,
+ "loss": 0.1391,
+ "step": 6678
+ },
+ {
+ "epoch": 38.16571428571429,
+ "grad_norm": 13.200896263122559,
+ "learning_rate": 1.314920634920635e-05,
+ "loss": 0.1617,
+ "step": 6679
+ },
+ {
+ "epoch": 38.17142857142857,
+ "grad_norm": 67.04146575927734,
+ "learning_rate": 1.3142857142857143e-05,
+ "loss": 0.2372,
+ "step": 6680
+ },
+ {
+ "epoch": 38.177142857142854,
+ "grad_norm": 38.76677322387695,
+ "learning_rate": 1.3136507936507936e-05,
+ "loss": 0.1755,
+ "step": 6681
+ },
+ {
+ "epoch": 38.182857142857145,
+ "grad_norm": 36.118228912353516,
+ "learning_rate": 1.3130158730158732e-05,
+ "loss": 0.1801,
+ "step": 6682
+ },
+ {
+ "epoch": 38.18857142857143,
+ "grad_norm": 61.32090377807617,
+ "learning_rate": 1.3123809523809524e-05,
+ "loss": 0.2072,
+ "step": 6683
+ },
+ {
+ "epoch": 38.19428571428571,
+ "grad_norm": 82.47146606445312,
+ "learning_rate": 1.3117460317460317e-05,
+ "loss": 0.2513,
+ "step": 6684
+ },
+ {
+ "epoch": 38.2,
+ "grad_norm": 40.535369873046875,
+ "learning_rate": 1.3111111111111113e-05,
+ "loss": 0.2075,
+ "step": 6685
+ },
+ {
+ "epoch": 38.205714285714286,
+ "grad_norm": 19.5318546295166,
+ "learning_rate": 1.3104761904761905e-05,
+ "loss": 0.1199,
+ "step": 6686
+ },
+ {
+ "epoch": 38.21142857142857,
+ "grad_norm": 61.96049499511719,
+ "learning_rate": 1.3098412698412698e-05,
+ "loss": 0.1723,
+ "step": 6687
+ },
+ {
+ "epoch": 38.21714285714286,
+ "grad_norm": 55.3178596496582,
+ "learning_rate": 1.3092063492063492e-05,
+ "loss": 0.1558,
+ "step": 6688
+ },
+ {
+ "epoch": 38.222857142857144,
+ "grad_norm": 52.19147491455078,
+ "learning_rate": 1.3085714285714287e-05,
+ "loss": 0.2442,
+ "step": 6689
+ },
+ {
+ "epoch": 38.22857142857143,
+ "grad_norm": 58.848388671875,
+ "learning_rate": 1.307936507936508e-05,
+ "loss": 0.2005,
+ "step": 6690
+ },
+ {
+ "epoch": 38.23428571428571,
+ "grad_norm": 28.111515045166016,
+ "learning_rate": 1.3073015873015874e-05,
+ "loss": 0.1551,
+ "step": 6691
+ },
+ {
+ "epoch": 38.24,
+ "grad_norm": 38.20722579956055,
+ "learning_rate": 1.3066666666666666e-05,
+ "loss": 0.1225,
+ "step": 6692
+ },
+ {
+ "epoch": 38.245714285714286,
+ "grad_norm": 16.029094696044922,
+ "learning_rate": 1.3060317460317462e-05,
+ "loss": 0.1604,
+ "step": 6693
+ },
+ {
+ "epoch": 38.25142857142857,
+ "grad_norm": 17.04863739013672,
+ "learning_rate": 1.3053968253968255e-05,
+ "loss": 0.2181,
+ "step": 6694
+ },
+ {
+ "epoch": 38.25714285714286,
+ "grad_norm": 54.93702697753906,
+ "learning_rate": 1.3047619047619048e-05,
+ "loss": 0.1461,
+ "step": 6695
+ },
+ {
+ "epoch": 38.26285714285714,
+ "grad_norm": 36.58909606933594,
+ "learning_rate": 1.3041269841269844e-05,
+ "loss": 0.134,
+ "step": 6696
+ },
+ {
+ "epoch": 38.26857142857143,
+ "grad_norm": 50.77967834472656,
+ "learning_rate": 1.3034920634920636e-05,
+ "loss": 0.2437,
+ "step": 6697
+ },
+ {
+ "epoch": 38.27428571428572,
+ "grad_norm": 41.20557403564453,
+ "learning_rate": 1.3028571428571429e-05,
+ "loss": 0.1681,
+ "step": 6698
+ },
+ {
+ "epoch": 38.28,
+ "grad_norm": 56.51957702636719,
+ "learning_rate": 1.3022222222222222e-05,
+ "loss": 0.1245,
+ "step": 6699
+ },
+ {
+ "epoch": 38.285714285714285,
+ "grad_norm": 95.08709716796875,
+ "learning_rate": 1.3015873015873018e-05,
+ "loss": 0.1658,
+ "step": 6700
+ },
+ {
+ "epoch": 38.29142857142857,
+ "grad_norm": 18.885896682739258,
+ "learning_rate": 1.300952380952381e-05,
+ "loss": 0.1287,
+ "step": 6701
+ },
+ {
+ "epoch": 38.29714285714286,
+ "grad_norm": 27.77571678161621,
+ "learning_rate": 1.3003174603174603e-05,
+ "loss": 0.1597,
+ "step": 6702
+ },
+ {
+ "epoch": 38.30285714285714,
+ "grad_norm": 59.84832763671875,
+ "learning_rate": 1.2996825396825396e-05,
+ "loss": 0.1875,
+ "step": 6703
+ },
+ {
+ "epoch": 38.308571428571426,
+ "grad_norm": 79.235107421875,
+ "learning_rate": 1.2990476190476192e-05,
+ "loss": 0.2608,
+ "step": 6704
+ },
+ {
+ "epoch": 38.31428571428572,
+ "grad_norm": 40.934234619140625,
+ "learning_rate": 1.2984126984126984e-05,
+ "loss": 0.1936,
+ "step": 6705
+ },
+ {
+ "epoch": 38.32,
+ "grad_norm": 45.221771240234375,
+ "learning_rate": 1.2977777777777777e-05,
+ "loss": 0.2032,
+ "step": 6706
+ },
+ {
+ "epoch": 38.325714285714284,
+ "grad_norm": 30.7657527923584,
+ "learning_rate": 1.2971428571428573e-05,
+ "loss": 0.1654,
+ "step": 6707
+ },
+ {
+ "epoch": 38.331428571428575,
+ "grad_norm": 84.09855651855469,
+ "learning_rate": 1.2965079365079365e-05,
+ "loss": 0.2027,
+ "step": 6708
+ },
+ {
+ "epoch": 38.33714285714286,
+ "grad_norm": 34.646461486816406,
+ "learning_rate": 1.295873015873016e-05,
+ "loss": 0.1374,
+ "step": 6709
+ },
+ {
+ "epoch": 38.34285714285714,
+ "grad_norm": 36.283782958984375,
+ "learning_rate": 1.2952380952380952e-05,
+ "loss": 0.2018,
+ "step": 6710
+ },
+ {
+ "epoch": 38.348571428571425,
+ "grad_norm": 33.82155227661133,
+ "learning_rate": 1.2946031746031748e-05,
+ "loss": 0.1951,
+ "step": 6711
+ },
+ {
+ "epoch": 38.354285714285716,
+ "grad_norm": 20.42057991027832,
+ "learning_rate": 1.2939682539682541e-05,
+ "loss": 0.1787,
+ "step": 6712
+ },
+ {
+ "epoch": 38.36,
+ "grad_norm": 62.836246490478516,
+ "learning_rate": 1.2933333333333334e-05,
+ "loss": 0.2216,
+ "step": 6713
+ },
+ {
+ "epoch": 38.36571428571428,
+ "grad_norm": 44.87102127075195,
+ "learning_rate": 1.2926984126984126e-05,
+ "loss": 0.1178,
+ "step": 6714
+ },
+ {
+ "epoch": 38.371428571428574,
+ "grad_norm": 55.18440628051758,
+ "learning_rate": 1.2920634920634922e-05,
+ "loss": 0.1506,
+ "step": 6715
+ },
+ {
+ "epoch": 38.37714285714286,
+ "grad_norm": 38.266483306884766,
+ "learning_rate": 1.2914285714285715e-05,
+ "loss": 0.1882,
+ "step": 6716
+ },
+ {
+ "epoch": 38.38285714285714,
+ "grad_norm": 77.53422546386719,
+ "learning_rate": 1.2907936507936508e-05,
+ "loss": 0.2116,
+ "step": 6717
+ },
+ {
+ "epoch": 38.38857142857143,
+ "grad_norm": 128.25515747070312,
+ "learning_rate": 1.2901587301587304e-05,
+ "loss": 0.268,
+ "step": 6718
+ },
+ {
+ "epoch": 38.394285714285715,
+ "grad_norm": 21.647573471069336,
+ "learning_rate": 1.2895238095238096e-05,
+ "loss": 0.1649,
+ "step": 6719
+ },
+ {
+ "epoch": 38.4,
+ "grad_norm": 27.34562110900879,
+ "learning_rate": 1.2888888888888889e-05,
+ "loss": 0.214,
+ "step": 6720
+ },
+ {
+ "epoch": 38.40571428571428,
+ "grad_norm": 221.07102966308594,
+ "learning_rate": 1.2882539682539682e-05,
+ "loss": 0.1625,
+ "step": 6721
+ },
+ {
+ "epoch": 38.41142857142857,
+ "grad_norm": 53.76383972167969,
+ "learning_rate": 1.2876190476190478e-05,
+ "loss": 0.1781,
+ "step": 6722
+ },
+ {
+ "epoch": 38.417142857142856,
+ "grad_norm": 55.51323318481445,
+ "learning_rate": 1.286984126984127e-05,
+ "loss": 0.1353,
+ "step": 6723
+ },
+ {
+ "epoch": 38.42285714285714,
+ "grad_norm": 192.99331665039062,
+ "learning_rate": 1.2863492063492063e-05,
+ "loss": 0.1696,
+ "step": 6724
+ },
+ {
+ "epoch": 38.42857142857143,
+ "grad_norm": 38.05833053588867,
+ "learning_rate": 1.2857142857142857e-05,
+ "loss": 0.2002,
+ "step": 6725
+ },
+ {
+ "epoch": 38.434285714285714,
+ "grad_norm": 24.698301315307617,
+ "learning_rate": 1.2850793650793652e-05,
+ "loss": 0.1436,
+ "step": 6726
+ },
+ {
+ "epoch": 38.44,
+ "grad_norm": 27.2955322265625,
+ "learning_rate": 1.2844444444444446e-05,
+ "loss": 0.1325,
+ "step": 6727
+ },
+ {
+ "epoch": 38.44571428571429,
+ "grad_norm": 29.796966552734375,
+ "learning_rate": 1.2838095238095239e-05,
+ "loss": 0.1694,
+ "step": 6728
+ },
+ {
+ "epoch": 38.45142857142857,
+ "grad_norm": 46.824832916259766,
+ "learning_rate": 1.2831746031746033e-05,
+ "loss": 0.1103,
+ "step": 6729
+ },
+ {
+ "epoch": 38.457142857142856,
+ "grad_norm": 31.10439109802246,
+ "learning_rate": 1.2825396825396827e-05,
+ "loss": 0.2151,
+ "step": 6730
+ },
+ {
+ "epoch": 38.462857142857146,
+ "grad_norm": 60.536861419677734,
+ "learning_rate": 1.281904761904762e-05,
+ "loss": 0.1598,
+ "step": 6731
+ },
+ {
+ "epoch": 38.46857142857143,
+ "grad_norm": 42.14023208618164,
+ "learning_rate": 1.2812698412698412e-05,
+ "loss": 0.185,
+ "step": 6732
+ },
+ {
+ "epoch": 38.47428571428571,
+ "grad_norm": 44.388004302978516,
+ "learning_rate": 1.2806349206349208e-05,
+ "loss": 0.1841,
+ "step": 6733
+ },
+ {
+ "epoch": 38.48,
+ "grad_norm": 50.36425018310547,
+ "learning_rate": 1.2800000000000001e-05,
+ "loss": 0.1711,
+ "step": 6734
+ },
+ {
+ "epoch": 38.48571428571429,
+ "grad_norm": 33.617488861083984,
+ "learning_rate": 1.2793650793650794e-05,
+ "loss": 0.2432,
+ "step": 6735
+ },
+ {
+ "epoch": 38.49142857142857,
+ "grad_norm": 72.66381072998047,
+ "learning_rate": 1.2787301587301586e-05,
+ "loss": 0.2402,
+ "step": 6736
+ },
+ {
+ "epoch": 38.497142857142855,
+ "grad_norm": 35.83653259277344,
+ "learning_rate": 1.2780952380952382e-05,
+ "loss": 0.14,
+ "step": 6737
+ },
+ {
+ "epoch": 38.502857142857145,
+ "grad_norm": 53.331642150878906,
+ "learning_rate": 1.2774603174603175e-05,
+ "loss": 0.1875,
+ "step": 6738
+ },
+ {
+ "epoch": 38.50857142857143,
+ "grad_norm": 30.422245025634766,
+ "learning_rate": 1.2768253968253968e-05,
+ "loss": 0.2175,
+ "step": 6739
+ },
+ {
+ "epoch": 38.51428571428571,
+ "grad_norm": 57.215858459472656,
+ "learning_rate": 1.2761904761904764e-05,
+ "loss": 0.2662,
+ "step": 6740
+ },
+ {
+ "epoch": 38.52,
+ "grad_norm": 164.78506469726562,
+ "learning_rate": 1.2755555555555556e-05,
+ "loss": 0.2561,
+ "step": 6741
+ },
+ {
+ "epoch": 38.52571428571429,
+ "grad_norm": 70.9798812866211,
+ "learning_rate": 1.2749206349206349e-05,
+ "loss": 0.2059,
+ "step": 6742
+ },
+ {
+ "epoch": 38.53142857142857,
+ "grad_norm": 19.46929359436035,
+ "learning_rate": 1.2742857142857143e-05,
+ "loss": 0.1481,
+ "step": 6743
+ },
+ {
+ "epoch": 38.537142857142854,
+ "grad_norm": 41.63719177246094,
+ "learning_rate": 1.2736507936507938e-05,
+ "loss": 0.1382,
+ "step": 6744
+ },
+ {
+ "epoch": 38.542857142857144,
+ "grad_norm": 70.39461517333984,
+ "learning_rate": 1.273015873015873e-05,
+ "loss": 0.1886,
+ "step": 6745
+ },
+ {
+ "epoch": 38.54857142857143,
+ "grad_norm": 406.7051696777344,
+ "learning_rate": 1.2723809523809525e-05,
+ "loss": 0.2394,
+ "step": 6746
+ },
+ {
+ "epoch": 38.55428571428571,
+ "grad_norm": 33.21284484863281,
+ "learning_rate": 1.2717460317460317e-05,
+ "loss": 0.1635,
+ "step": 6747
+ },
+ {
+ "epoch": 38.56,
+ "grad_norm": 37.28272247314453,
+ "learning_rate": 1.2711111111111113e-05,
+ "loss": 0.2497,
+ "step": 6748
+ },
+ {
+ "epoch": 38.565714285714286,
+ "grad_norm": 58.15789031982422,
+ "learning_rate": 1.2704761904761906e-05,
+ "loss": 0.1683,
+ "step": 6749
+ },
+ {
+ "epoch": 38.57142857142857,
+ "grad_norm": 55.796722412109375,
+ "learning_rate": 1.2698412698412699e-05,
+ "loss": 0.1919,
+ "step": 6750
+ },
+ {
+ "epoch": 38.57714285714286,
+ "grad_norm": 32.311729431152344,
+ "learning_rate": 1.2692063492063495e-05,
+ "loss": 0.1835,
+ "step": 6751
+ },
+ {
+ "epoch": 38.582857142857144,
+ "grad_norm": 35.519737243652344,
+ "learning_rate": 1.2685714285714287e-05,
+ "loss": 0.193,
+ "step": 6752
+ },
+ {
+ "epoch": 38.58857142857143,
+ "grad_norm": 42.47575378417969,
+ "learning_rate": 1.267936507936508e-05,
+ "loss": 0.2063,
+ "step": 6753
+ },
+ {
+ "epoch": 38.59428571428572,
+ "grad_norm": 52.6748161315918,
+ "learning_rate": 1.2673015873015872e-05,
+ "loss": 0.2368,
+ "step": 6754
+ },
+ {
+ "epoch": 38.6,
+ "grad_norm": 32.23313522338867,
+ "learning_rate": 1.2666666666666668e-05,
+ "loss": 0.2966,
+ "step": 6755
+ },
+ {
+ "epoch": 38.605714285714285,
+ "grad_norm": 71.69877624511719,
+ "learning_rate": 1.2660317460317461e-05,
+ "loss": 0.283,
+ "step": 6756
+ },
+ {
+ "epoch": 38.61142857142857,
+ "grad_norm": 29.855546951293945,
+ "learning_rate": 1.2653968253968254e-05,
+ "loss": 0.1624,
+ "step": 6757
+ },
+ {
+ "epoch": 38.61714285714286,
+ "grad_norm": 28.4865665435791,
+ "learning_rate": 1.2647619047619046e-05,
+ "loss": 0.1689,
+ "step": 6758
+ },
+ {
+ "epoch": 38.62285714285714,
+ "grad_norm": 82.87152099609375,
+ "learning_rate": 1.2641269841269842e-05,
+ "loss": 0.1722,
+ "step": 6759
+ },
+ {
+ "epoch": 38.628571428571426,
+ "grad_norm": 35.51962661743164,
+ "learning_rate": 1.2634920634920635e-05,
+ "loss": 0.2133,
+ "step": 6760
+ },
+ {
+ "epoch": 38.63428571428572,
+ "grad_norm": 53.56692886352539,
+ "learning_rate": 1.2628571428571428e-05,
+ "loss": 0.1715,
+ "step": 6761
+ },
+ {
+ "epoch": 38.64,
+ "grad_norm": 34.60567855834961,
+ "learning_rate": 1.2622222222222224e-05,
+ "loss": 0.1807,
+ "step": 6762
+ },
+ {
+ "epoch": 38.645714285714284,
+ "grad_norm": 77.77987670898438,
+ "learning_rate": 1.2615873015873016e-05,
+ "loss": 0.2579,
+ "step": 6763
+ },
+ {
+ "epoch": 38.651428571428575,
+ "grad_norm": 41.919795989990234,
+ "learning_rate": 1.260952380952381e-05,
+ "loss": 0.195,
+ "step": 6764
+ },
+ {
+ "epoch": 38.65714285714286,
+ "grad_norm": 30.047168731689453,
+ "learning_rate": 1.2603174603174603e-05,
+ "loss": 0.1852,
+ "step": 6765
+ },
+ {
+ "epoch": 38.66285714285714,
+ "grad_norm": 286.2810363769531,
+ "learning_rate": 1.25968253968254e-05,
+ "loss": 0.1347,
+ "step": 6766
+ },
+ {
+ "epoch": 38.668571428571425,
+ "grad_norm": 71.60431671142578,
+ "learning_rate": 1.2590476190476192e-05,
+ "loss": 0.1787,
+ "step": 6767
+ },
+ {
+ "epoch": 38.674285714285716,
+ "grad_norm": 48.86784744262695,
+ "learning_rate": 1.2584126984126985e-05,
+ "loss": 0.1596,
+ "step": 6768
+ },
+ {
+ "epoch": 38.68,
+ "grad_norm": 26.185443878173828,
+ "learning_rate": 1.2577777777777777e-05,
+ "loss": 0.1406,
+ "step": 6769
+ },
+ {
+ "epoch": 38.68571428571428,
+ "grad_norm": 51.83404541015625,
+ "learning_rate": 1.2571428571428573e-05,
+ "loss": 0.1557,
+ "step": 6770
+ },
+ {
+ "epoch": 38.691428571428574,
+ "grad_norm": 52.92089080810547,
+ "learning_rate": 1.2565079365079366e-05,
+ "loss": 0.1805,
+ "step": 6771
+ },
+ {
+ "epoch": 38.69714285714286,
+ "grad_norm": 53.78825759887695,
+ "learning_rate": 1.2558730158730159e-05,
+ "loss": 0.2165,
+ "step": 6772
+ },
+ {
+ "epoch": 38.70285714285714,
+ "grad_norm": 81.38287353515625,
+ "learning_rate": 1.2552380952380955e-05,
+ "loss": 0.2713,
+ "step": 6773
+ },
+ {
+ "epoch": 38.70857142857143,
+ "grad_norm": 43.09062194824219,
+ "learning_rate": 1.2546031746031747e-05,
+ "loss": 0.187,
+ "step": 6774
+ },
+ {
+ "epoch": 38.714285714285715,
+ "grad_norm": 50.66502380371094,
+ "learning_rate": 1.253968253968254e-05,
+ "loss": 0.182,
+ "step": 6775
+ },
+ {
+ "epoch": 38.72,
+ "grad_norm": 25.26409149169922,
+ "learning_rate": 1.2533333333333332e-05,
+ "loss": 0.1729,
+ "step": 6776
+ },
+ {
+ "epoch": 38.72571428571428,
+ "grad_norm": 45.1693115234375,
+ "learning_rate": 1.2526984126984129e-05,
+ "loss": 0.1879,
+ "step": 6777
+ },
+ {
+ "epoch": 38.73142857142857,
+ "grad_norm": 50.87795639038086,
+ "learning_rate": 1.2520634920634921e-05,
+ "loss": 0.1999,
+ "step": 6778
+ },
+ {
+ "epoch": 38.73714285714286,
+ "grad_norm": 50.23199462890625,
+ "learning_rate": 1.2514285714285714e-05,
+ "loss": 0.1491,
+ "step": 6779
+ },
+ {
+ "epoch": 38.74285714285714,
+ "grad_norm": 46.01109313964844,
+ "learning_rate": 1.2507936507936508e-05,
+ "loss": 0.1764,
+ "step": 6780
+ },
+ {
+ "epoch": 38.74857142857143,
+ "grad_norm": 84.88690185546875,
+ "learning_rate": 1.2501587301587302e-05,
+ "loss": 0.2689,
+ "step": 6781
+ },
+ {
+ "epoch": 38.754285714285714,
+ "grad_norm": 59.24235153198242,
+ "learning_rate": 1.2495238095238097e-05,
+ "loss": 0.2036,
+ "step": 6782
+ },
+ {
+ "epoch": 38.76,
+ "grad_norm": 168.68218994140625,
+ "learning_rate": 1.248888888888889e-05,
+ "loss": 0.3532,
+ "step": 6783
+ },
+ {
+ "epoch": 38.76571428571429,
+ "grad_norm": 53.04069900512695,
+ "learning_rate": 1.2482539682539684e-05,
+ "loss": 0.229,
+ "step": 6784
+ },
+ {
+ "epoch": 38.77142857142857,
+ "grad_norm": 20.792329788208008,
+ "learning_rate": 1.2476190476190478e-05,
+ "loss": 0.2167,
+ "step": 6785
+ },
+ {
+ "epoch": 38.777142857142856,
+ "grad_norm": 80.4717025756836,
+ "learning_rate": 1.246984126984127e-05,
+ "loss": 0.2723,
+ "step": 6786
+ },
+ {
+ "epoch": 38.78285714285714,
+ "grad_norm": 67.51244354248047,
+ "learning_rate": 1.2463492063492065e-05,
+ "loss": 0.2109,
+ "step": 6787
+ },
+ {
+ "epoch": 38.78857142857143,
+ "grad_norm": 22.83247947692871,
+ "learning_rate": 1.2457142857142858e-05,
+ "loss": 0.1464,
+ "step": 6788
+ },
+ {
+ "epoch": 38.794285714285714,
+ "grad_norm": 19.295541763305664,
+ "learning_rate": 1.2450793650793652e-05,
+ "loss": 0.1823,
+ "step": 6789
+ },
+ {
+ "epoch": 38.8,
+ "grad_norm": 52.79185485839844,
+ "learning_rate": 1.2444444444444445e-05,
+ "loss": 0.1479,
+ "step": 6790
+ },
+ {
+ "epoch": 38.80571428571429,
+ "grad_norm": 66.22676086425781,
+ "learning_rate": 1.2438095238095239e-05,
+ "loss": 0.1586,
+ "step": 6791
+ },
+ {
+ "epoch": 38.81142857142857,
+ "grad_norm": 331.5667419433594,
+ "learning_rate": 1.2431746031746032e-05,
+ "loss": 0.2117,
+ "step": 6792
+ },
+ {
+ "epoch": 38.817142857142855,
+ "grad_norm": 593.6991577148438,
+ "learning_rate": 1.2425396825396826e-05,
+ "loss": 0.1801,
+ "step": 6793
+ },
+ {
+ "epoch": 38.822857142857146,
+ "grad_norm": 39.879024505615234,
+ "learning_rate": 1.2419047619047619e-05,
+ "loss": 0.2398,
+ "step": 6794
+ },
+ {
+ "epoch": 38.82857142857143,
+ "grad_norm": 584.5859375,
+ "learning_rate": 1.2412698412698413e-05,
+ "loss": 0.1553,
+ "step": 6795
+ },
+ {
+ "epoch": 38.83428571428571,
+ "grad_norm": 34.988075256347656,
+ "learning_rate": 1.2406349206349207e-05,
+ "loss": 0.2081,
+ "step": 6796
+ },
+ {
+ "epoch": 38.84,
+ "grad_norm": 54.2407341003418,
+ "learning_rate": 1.24e-05,
+ "loss": 0.1424,
+ "step": 6797
+ },
+ {
+ "epoch": 38.84571428571429,
+ "grad_norm": 39.45024108886719,
+ "learning_rate": 1.2393650793650794e-05,
+ "loss": 0.2575,
+ "step": 6798
+ },
+ {
+ "epoch": 38.85142857142857,
+ "grad_norm": 32.569522857666016,
+ "learning_rate": 1.2387301587301589e-05,
+ "loss": 0.1605,
+ "step": 6799
+ },
+ {
+ "epoch": 38.857142857142854,
+ "grad_norm": 24.382707595825195,
+ "learning_rate": 1.2380952380952381e-05,
+ "loss": 0.1615,
+ "step": 6800
+ },
+ {
+ "epoch": 38.862857142857145,
+ "grad_norm": 26.837656021118164,
+ "learning_rate": 1.2374603174603175e-05,
+ "loss": 0.1472,
+ "step": 6801
+ },
+ {
+ "epoch": 38.86857142857143,
+ "grad_norm": 377.7287902832031,
+ "learning_rate": 1.236825396825397e-05,
+ "loss": 0.2733,
+ "step": 6802
+ },
+ {
+ "epoch": 38.87428571428571,
+ "grad_norm": 25.140079498291016,
+ "learning_rate": 1.2361904761904762e-05,
+ "loss": 0.1528,
+ "step": 6803
+ },
+ {
+ "epoch": 38.88,
+ "grad_norm": 20.54310417175293,
+ "learning_rate": 1.2355555555555557e-05,
+ "loss": 0.2128,
+ "step": 6804
+ },
+ {
+ "epoch": 38.885714285714286,
+ "grad_norm": 17.899181365966797,
+ "learning_rate": 1.234920634920635e-05,
+ "loss": 0.2293,
+ "step": 6805
+ },
+ {
+ "epoch": 38.89142857142857,
+ "grad_norm": 123.19231414794922,
+ "learning_rate": 1.2342857142857144e-05,
+ "loss": 0.1704,
+ "step": 6806
+ },
+ {
+ "epoch": 38.89714285714286,
+ "grad_norm": 86.83697509765625,
+ "learning_rate": 1.2336507936507938e-05,
+ "loss": 0.1935,
+ "step": 6807
+ },
+ {
+ "epoch": 38.902857142857144,
+ "grad_norm": 35.76729202270508,
+ "learning_rate": 1.233015873015873e-05,
+ "loss": 0.1891,
+ "step": 6808
+ },
+ {
+ "epoch": 38.90857142857143,
+ "grad_norm": 59.41086959838867,
+ "learning_rate": 1.2323809523809525e-05,
+ "loss": 0.2542,
+ "step": 6809
+ },
+ {
+ "epoch": 38.91428571428571,
+ "grad_norm": 29.30520248413086,
+ "learning_rate": 1.2317460317460318e-05,
+ "loss": 0.1732,
+ "step": 6810
+ },
+ {
+ "epoch": 38.92,
+ "grad_norm": 61.0394287109375,
+ "learning_rate": 1.2311111111111112e-05,
+ "loss": 0.1548,
+ "step": 6811
+ },
+ {
+ "epoch": 38.925714285714285,
+ "grad_norm": 147.2695770263672,
+ "learning_rate": 1.2304761904761905e-05,
+ "loss": 0.1875,
+ "step": 6812
+ },
+ {
+ "epoch": 38.93142857142857,
+ "grad_norm": 50.43434143066406,
+ "learning_rate": 1.2298412698412699e-05,
+ "loss": 0.1551,
+ "step": 6813
+ },
+ {
+ "epoch": 38.93714285714286,
+ "grad_norm": 43.08822250366211,
+ "learning_rate": 1.2292063492063492e-05,
+ "loss": 0.1686,
+ "step": 6814
+ },
+ {
+ "epoch": 38.94285714285714,
+ "grad_norm": 59.40639877319336,
+ "learning_rate": 1.2285714285714286e-05,
+ "loss": 0.2309,
+ "step": 6815
+ },
+ {
+ "epoch": 38.94857142857143,
+ "grad_norm": 34.71112060546875,
+ "learning_rate": 1.2279365079365079e-05,
+ "loss": 0.151,
+ "step": 6816
+ },
+ {
+ "epoch": 38.95428571428572,
+ "grad_norm": 67.87384796142578,
+ "learning_rate": 1.2273015873015873e-05,
+ "loss": 0.2314,
+ "step": 6817
+ },
+ {
+ "epoch": 38.96,
+ "grad_norm": 29.375036239624023,
+ "learning_rate": 1.2266666666666667e-05,
+ "loss": 0.2571,
+ "step": 6818
+ },
+ {
+ "epoch": 38.965714285714284,
+ "grad_norm": 54.79815673828125,
+ "learning_rate": 1.2260317460317462e-05,
+ "loss": 0.2185,
+ "step": 6819
+ },
+ {
+ "epoch": 38.97142857142857,
+ "grad_norm": 27.393661499023438,
+ "learning_rate": 1.2253968253968256e-05,
+ "loss": 0.1525,
+ "step": 6820
+ },
+ {
+ "epoch": 38.97714285714286,
+ "grad_norm": 46.51731872558594,
+ "learning_rate": 1.2247619047619049e-05,
+ "loss": 0.1962,
+ "step": 6821
+ },
+ {
+ "epoch": 38.98285714285714,
+ "grad_norm": 48.312679290771484,
+ "learning_rate": 1.2241269841269843e-05,
+ "loss": 0.2483,
+ "step": 6822
+ },
+ {
+ "epoch": 38.988571428571426,
+ "grad_norm": 1494.267578125,
+ "learning_rate": 1.2234920634920636e-05,
+ "loss": 0.2636,
+ "step": 6823
+ },
+ {
+ "epoch": 38.994285714285716,
+ "grad_norm": 70.67123413085938,
+ "learning_rate": 1.222857142857143e-05,
+ "loss": 0.2336,
+ "step": 6824
+ },
+ {
+ "epoch": 39.0,
+ "grad_norm": 52.84934616088867,
+ "learning_rate": 1.2222222222222222e-05,
+ "loss": 0.2018,
+ "step": 6825
+ },
+ {
+ "epoch": 39.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5795198678970337,
+ "eval_map": 0.929,
+ "eval_map_50": 0.9651,
+ "eval_map_75": 0.9564,
+ "eval_map_large": 0.929,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.929,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7895,
+ "eval_mar_10": 0.9724,
+ "eval_mar_100": 0.9733,
+ "eval_mar_100_per_class": 0.9733,
+ "eval_mar_large": 0.9733,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.0539,
+ "eval_samples_per_second": 22.522,
+ "eval_steps_per_second": 2.834,
+ "step": 6825
+ },
+ {
+ "epoch": 39.005714285714284,
+ "grad_norm": 47.03675842285156,
+ "learning_rate": 1.2215873015873017e-05,
+ "loss": 0.1622,
+ "step": 6826
+ },
+ {
+ "epoch": 39.011428571428574,
+ "grad_norm": 36.863685607910156,
+ "learning_rate": 1.220952380952381e-05,
+ "loss": 0.1944,
+ "step": 6827
+ },
+ {
+ "epoch": 39.01714285714286,
+ "grad_norm": 77.5301284790039,
+ "learning_rate": 1.2203174603174604e-05,
+ "loss": 0.2398,
+ "step": 6828
+ },
+ {
+ "epoch": 39.02285714285714,
+ "grad_norm": 25.368093490600586,
+ "learning_rate": 1.2196825396825396e-05,
+ "loss": 0.1982,
+ "step": 6829
+ },
+ {
+ "epoch": 39.02857142857143,
+ "grad_norm": 76.29206085205078,
+ "learning_rate": 1.219047619047619e-05,
+ "loss": 0.2138,
+ "step": 6830
+ },
+ {
+ "epoch": 39.034285714285716,
+ "grad_norm": 19.772838592529297,
+ "learning_rate": 1.2184126984126985e-05,
+ "loss": 0.1461,
+ "step": 6831
+ },
+ {
+ "epoch": 39.04,
+ "grad_norm": 109.93661499023438,
+ "learning_rate": 1.2177777777777778e-05,
+ "loss": 0.1707,
+ "step": 6832
+ },
+ {
+ "epoch": 39.04571428571428,
+ "grad_norm": 69.74849700927734,
+ "learning_rate": 1.2171428571428572e-05,
+ "loss": 0.1404,
+ "step": 6833
+ },
+ {
+ "epoch": 39.05142857142857,
+ "grad_norm": 34.377532958984375,
+ "learning_rate": 1.2165079365079365e-05,
+ "loss": 0.199,
+ "step": 6834
+ },
+ {
+ "epoch": 39.05714285714286,
+ "grad_norm": 1229.432373046875,
+ "learning_rate": 1.2158730158730159e-05,
+ "loss": 0.2692,
+ "step": 6835
+ },
+ {
+ "epoch": 39.06285714285714,
+ "grad_norm": 55.80887985229492,
+ "learning_rate": 1.2152380952380953e-05,
+ "loss": 0.2974,
+ "step": 6836
+ },
+ {
+ "epoch": 39.06857142857143,
+ "grad_norm": 16.48853302001953,
+ "learning_rate": 1.2146031746031748e-05,
+ "loss": 0.223,
+ "step": 6837
+ },
+ {
+ "epoch": 39.074285714285715,
+ "grad_norm": 32.252227783203125,
+ "learning_rate": 1.213968253968254e-05,
+ "loss": 0.2561,
+ "step": 6838
+ },
+ {
+ "epoch": 39.08,
+ "grad_norm": 275.59722900390625,
+ "learning_rate": 1.2133333333333335e-05,
+ "loss": 0.2649,
+ "step": 6839
+ },
+ {
+ "epoch": 39.08571428571429,
+ "grad_norm": 17.12618064880371,
+ "learning_rate": 1.2126984126984127e-05,
+ "loss": 0.1722,
+ "step": 6840
+ },
+ {
+ "epoch": 39.09142857142857,
+ "grad_norm": 52.881526947021484,
+ "learning_rate": 1.2120634920634922e-05,
+ "loss": 0.1779,
+ "step": 6841
+ },
+ {
+ "epoch": 39.097142857142856,
+ "grad_norm": 52.06009292602539,
+ "learning_rate": 1.2114285714285716e-05,
+ "loss": 0.2009,
+ "step": 6842
+ },
+ {
+ "epoch": 39.10285714285714,
+ "grad_norm": 74.94755554199219,
+ "learning_rate": 1.2107936507936509e-05,
+ "loss": 0.1997,
+ "step": 6843
+ },
+ {
+ "epoch": 39.10857142857143,
+ "grad_norm": 45.03340530395508,
+ "learning_rate": 1.2101587301587303e-05,
+ "loss": 0.161,
+ "step": 6844
+ },
+ {
+ "epoch": 39.114285714285714,
+ "grad_norm": 45.907249450683594,
+ "learning_rate": 1.2095238095238096e-05,
+ "loss": 0.1382,
+ "step": 6845
+ },
+ {
+ "epoch": 39.12,
+ "grad_norm": 76.25215148925781,
+ "learning_rate": 1.208888888888889e-05,
+ "loss": 0.2699,
+ "step": 6846
+ },
+ {
+ "epoch": 39.12571428571429,
+ "grad_norm": 46.23180389404297,
+ "learning_rate": 1.2082539682539682e-05,
+ "loss": 0.1699,
+ "step": 6847
+ },
+ {
+ "epoch": 39.13142857142857,
+ "grad_norm": 40.75603485107422,
+ "learning_rate": 1.2076190476190477e-05,
+ "loss": 0.1141,
+ "step": 6848
+ },
+ {
+ "epoch": 39.137142857142855,
+ "grad_norm": 27.2888126373291,
+ "learning_rate": 1.206984126984127e-05,
+ "loss": 0.1497,
+ "step": 6849
+ },
+ {
+ "epoch": 39.142857142857146,
+ "grad_norm": 39.695926666259766,
+ "learning_rate": 1.2063492063492064e-05,
+ "loss": 0.2857,
+ "step": 6850
+ },
+ {
+ "epoch": 39.14857142857143,
+ "grad_norm": 34.217193603515625,
+ "learning_rate": 1.2057142857142856e-05,
+ "loss": 0.2368,
+ "step": 6851
+ },
+ {
+ "epoch": 39.15428571428571,
+ "grad_norm": 38.376861572265625,
+ "learning_rate": 1.205079365079365e-05,
+ "loss": 0.1819,
+ "step": 6852
+ },
+ {
+ "epoch": 39.16,
+ "grad_norm": 30.317337036132812,
+ "learning_rate": 1.2044444444444445e-05,
+ "loss": 0.196,
+ "step": 6853
+ },
+ {
+ "epoch": 39.16571428571429,
+ "grad_norm": 35.691986083984375,
+ "learning_rate": 1.203809523809524e-05,
+ "loss": 0.2613,
+ "step": 6854
+ },
+ {
+ "epoch": 39.17142857142857,
+ "grad_norm": 40.64143371582031,
+ "learning_rate": 1.2031746031746032e-05,
+ "loss": 0.2197,
+ "step": 6855
+ },
+ {
+ "epoch": 39.177142857142854,
+ "grad_norm": 21.734079360961914,
+ "learning_rate": 1.2025396825396826e-05,
+ "loss": 0.1838,
+ "step": 6856
+ },
+ {
+ "epoch": 39.182857142857145,
+ "grad_norm": 54.33609390258789,
+ "learning_rate": 1.201904761904762e-05,
+ "loss": 0.1806,
+ "step": 6857
+ },
+ {
+ "epoch": 39.18857142857143,
+ "grad_norm": 30.630271911621094,
+ "learning_rate": 1.2012698412698413e-05,
+ "loss": 0.1703,
+ "step": 6858
+ },
+ {
+ "epoch": 39.19428571428571,
+ "grad_norm": 87.60661315917969,
+ "learning_rate": 1.2006349206349208e-05,
+ "loss": 0.1838,
+ "step": 6859
+ },
+ {
+ "epoch": 39.2,
+ "grad_norm": 35.05860137939453,
+ "learning_rate": 1.2e-05,
+ "loss": 0.2497,
+ "step": 6860
+ },
+ {
+ "epoch": 39.205714285714286,
+ "grad_norm": 27.76970863342285,
+ "learning_rate": 1.1993650793650795e-05,
+ "loss": 0.3259,
+ "step": 6861
+ },
+ {
+ "epoch": 39.21142857142857,
+ "grad_norm": 51.48834991455078,
+ "learning_rate": 1.1987301587301587e-05,
+ "loss": 0.1175,
+ "step": 6862
+ },
+ {
+ "epoch": 39.21714285714286,
+ "grad_norm": 83.65272521972656,
+ "learning_rate": 1.1980952380952382e-05,
+ "loss": 0.163,
+ "step": 6863
+ },
+ {
+ "epoch": 39.222857142857144,
+ "grad_norm": 59.87739562988281,
+ "learning_rate": 1.1974603174603174e-05,
+ "loss": 0.1557,
+ "step": 6864
+ },
+ {
+ "epoch": 39.22857142857143,
+ "grad_norm": 22.3823299407959,
+ "learning_rate": 1.1968253968253969e-05,
+ "loss": 0.2134,
+ "step": 6865
+ },
+ {
+ "epoch": 39.23428571428571,
+ "grad_norm": 273.3268737792969,
+ "learning_rate": 1.1961904761904763e-05,
+ "loss": 0.1774,
+ "step": 6866
+ },
+ {
+ "epoch": 39.24,
+ "grad_norm": 49.09288024902344,
+ "learning_rate": 1.1955555555555556e-05,
+ "loss": 0.1455,
+ "step": 6867
+ },
+ {
+ "epoch": 39.245714285714286,
+ "grad_norm": 69.58202362060547,
+ "learning_rate": 1.194920634920635e-05,
+ "loss": 0.1459,
+ "step": 6868
+ },
+ {
+ "epoch": 39.25142857142857,
+ "grad_norm": 21.42776107788086,
+ "learning_rate": 1.1942857142857142e-05,
+ "loss": 0.1862,
+ "step": 6869
+ },
+ {
+ "epoch": 39.25714285714286,
+ "grad_norm": 487.3718566894531,
+ "learning_rate": 1.1936507936507937e-05,
+ "loss": 0.184,
+ "step": 6870
+ },
+ {
+ "epoch": 39.26285714285714,
+ "grad_norm": 33.55144119262695,
+ "learning_rate": 1.1930158730158731e-05,
+ "loss": 0.1283,
+ "step": 6871
+ },
+ {
+ "epoch": 39.26857142857143,
+ "grad_norm": 1602.2911376953125,
+ "learning_rate": 1.1923809523809524e-05,
+ "loss": 0.2526,
+ "step": 6872
+ },
+ {
+ "epoch": 39.27428571428572,
+ "grad_norm": 30.238550186157227,
+ "learning_rate": 1.1917460317460318e-05,
+ "loss": 0.1724,
+ "step": 6873
+ },
+ {
+ "epoch": 39.28,
+ "grad_norm": 37.69853210449219,
+ "learning_rate": 1.1911111111111112e-05,
+ "loss": 0.2636,
+ "step": 6874
+ },
+ {
+ "epoch": 39.285714285714285,
+ "grad_norm": 199.5904541015625,
+ "learning_rate": 1.1904761904761905e-05,
+ "loss": 0.1469,
+ "step": 6875
+ },
+ {
+ "epoch": 39.29142857142857,
+ "grad_norm": 884.3235473632812,
+ "learning_rate": 1.18984126984127e-05,
+ "loss": 0.191,
+ "step": 6876
+ },
+ {
+ "epoch": 39.29714285714286,
+ "grad_norm": 33.138519287109375,
+ "learning_rate": 1.1892063492063494e-05,
+ "loss": 0.1767,
+ "step": 6877
+ },
+ {
+ "epoch": 39.30285714285714,
+ "grad_norm": 34.070228576660156,
+ "learning_rate": 1.1885714285714286e-05,
+ "loss": 0.1881,
+ "step": 6878
+ },
+ {
+ "epoch": 39.308571428571426,
+ "grad_norm": 81.01456451416016,
+ "learning_rate": 1.187936507936508e-05,
+ "loss": 0.1829,
+ "step": 6879
+ },
+ {
+ "epoch": 39.31428571428572,
+ "grad_norm": 93.02015686035156,
+ "learning_rate": 1.1873015873015873e-05,
+ "loss": 0.1867,
+ "step": 6880
+ },
+ {
+ "epoch": 39.32,
+ "grad_norm": 369.30487060546875,
+ "learning_rate": 1.1866666666666668e-05,
+ "loss": 0.2575,
+ "step": 6881
+ },
+ {
+ "epoch": 39.325714285714284,
+ "grad_norm": 29.82270622253418,
+ "learning_rate": 1.186031746031746e-05,
+ "loss": 0.1923,
+ "step": 6882
+ },
+ {
+ "epoch": 39.331428571428575,
+ "grad_norm": 48.5175666809082,
+ "learning_rate": 1.1853968253968255e-05,
+ "loss": 0.2272,
+ "step": 6883
+ },
+ {
+ "epoch": 39.33714285714286,
+ "grad_norm": 51.21316146850586,
+ "learning_rate": 1.1847619047619047e-05,
+ "loss": 0.1988,
+ "step": 6884
+ },
+ {
+ "epoch": 39.34285714285714,
+ "grad_norm": 55.662986755371094,
+ "learning_rate": 1.1841269841269842e-05,
+ "loss": 0.1732,
+ "step": 6885
+ },
+ {
+ "epoch": 39.348571428571425,
+ "grad_norm": 30.418460845947266,
+ "learning_rate": 1.1834920634920634e-05,
+ "loss": 0.16,
+ "step": 6886
+ },
+ {
+ "epoch": 39.354285714285716,
+ "grad_norm": 35.29583740234375,
+ "learning_rate": 1.1828571428571429e-05,
+ "loss": 0.1157,
+ "step": 6887
+ },
+ {
+ "epoch": 39.36,
+ "grad_norm": 26.877500534057617,
+ "learning_rate": 1.1822222222222223e-05,
+ "loss": 0.2222,
+ "step": 6888
+ },
+ {
+ "epoch": 39.36571428571428,
+ "grad_norm": 24.766796112060547,
+ "learning_rate": 1.1815873015873016e-05,
+ "loss": 0.1343,
+ "step": 6889
+ },
+ {
+ "epoch": 39.371428571428574,
+ "grad_norm": 35.40179443359375,
+ "learning_rate": 1.180952380952381e-05,
+ "loss": 0.1445,
+ "step": 6890
+ },
+ {
+ "epoch": 39.37714285714286,
+ "grad_norm": 20.078857421875,
+ "learning_rate": 1.1803174603174604e-05,
+ "loss": 0.1617,
+ "step": 6891
+ },
+ {
+ "epoch": 39.38285714285714,
+ "grad_norm": 37.09568405151367,
+ "learning_rate": 1.1796825396825399e-05,
+ "loss": 0.2622,
+ "step": 6892
+ },
+ {
+ "epoch": 39.38857142857143,
+ "grad_norm": 43.9173698425293,
+ "learning_rate": 1.1790476190476191e-05,
+ "loss": 0.2056,
+ "step": 6893
+ },
+ {
+ "epoch": 39.394285714285715,
+ "grad_norm": 60.2230339050293,
+ "learning_rate": 1.1784126984126986e-05,
+ "loss": 0.2513,
+ "step": 6894
+ },
+ {
+ "epoch": 39.4,
+ "grad_norm": 121.24998474121094,
+ "learning_rate": 1.1777777777777778e-05,
+ "loss": 0.1214,
+ "step": 6895
+ },
+ {
+ "epoch": 39.40571428571428,
+ "grad_norm": 66.94969177246094,
+ "learning_rate": 1.1771428571428572e-05,
+ "loss": 0.2068,
+ "step": 6896
+ },
+ {
+ "epoch": 39.41142857142857,
+ "grad_norm": 49.56264877319336,
+ "learning_rate": 1.1765079365079365e-05,
+ "loss": 0.2071,
+ "step": 6897
+ },
+ {
+ "epoch": 39.417142857142856,
+ "grad_norm": 51.70158386230469,
+ "learning_rate": 1.175873015873016e-05,
+ "loss": 0.2056,
+ "step": 6898
+ },
+ {
+ "epoch": 39.42285714285714,
+ "grad_norm": 76.47562408447266,
+ "learning_rate": 1.1752380952380954e-05,
+ "loss": 0.213,
+ "step": 6899
+ },
+ {
+ "epoch": 39.42857142857143,
+ "grad_norm": 58.22590637207031,
+ "learning_rate": 1.1746031746031746e-05,
+ "loss": 0.2217,
+ "step": 6900
+ },
+ {
+ "epoch": 39.434285714285714,
+ "grad_norm": 35.62051010131836,
+ "learning_rate": 1.173968253968254e-05,
+ "loss": 0.1365,
+ "step": 6901
+ },
+ {
+ "epoch": 39.44,
+ "grad_norm": 56.19094467163086,
+ "learning_rate": 1.1733333333333333e-05,
+ "loss": 0.1971,
+ "step": 6902
+ },
+ {
+ "epoch": 39.44571428571429,
+ "grad_norm": 462.1736755371094,
+ "learning_rate": 1.1726984126984128e-05,
+ "loss": 0.2218,
+ "step": 6903
+ },
+ {
+ "epoch": 39.45142857142857,
+ "grad_norm": 25.76043701171875,
+ "learning_rate": 1.172063492063492e-05,
+ "loss": 0.4593,
+ "step": 6904
+ },
+ {
+ "epoch": 39.457142857142856,
+ "grad_norm": 27.850421905517578,
+ "learning_rate": 1.1714285714285715e-05,
+ "loss": 0.1536,
+ "step": 6905
+ },
+ {
+ "epoch": 39.462857142857146,
+ "grad_norm": 320.2368469238281,
+ "learning_rate": 1.1707936507936507e-05,
+ "loss": 0.1716,
+ "step": 6906
+ },
+ {
+ "epoch": 39.46857142857143,
+ "grad_norm": 40.94053649902344,
+ "learning_rate": 1.1701587301587302e-05,
+ "loss": 0.1689,
+ "step": 6907
+ },
+ {
+ "epoch": 39.47428571428571,
+ "grad_norm": 62.56889343261719,
+ "learning_rate": 1.1695238095238096e-05,
+ "loss": 0.1877,
+ "step": 6908
+ },
+ {
+ "epoch": 39.48,
+ "grad_norm": 44.383026123046875,
+ "learning_rate": 1.168888888888889e-05,
+ "loss": 0.157,
+ "step": 6909
+ },
+ {
+ "epoch": 39.48571428571429,
+ "grad_norm": 59.0760498046875,
+ "learning_rate": 1.1682539682539685e-05,
+ "loss": 0.1679,
+ "step": 6910
+ },
+ {
+ "epoch": 39.49142857142857,
+ "grad_norm": 55.42938995361328,
+ "learning_rate": 1.1676190476190477e-05,
+ "loss": 0.2307,
+ "step": 6911
+ },
+ {
+ "epoch": 39.497142857142855,
+ "grad_norm": 65.94779968261719,
+ "learning_rate": 1.1669841269841272e-05,
+ "loss": 0.2264,
+ "step": 6912
+ },
+ {
+ "epoch": 39.502857142857145,
+ "grad_norm": 89.81292724609375,
+ "learning_rate": 1.1663492063492064e-05,
+ "loss": 0.2101,
+ "step": 6913
+ },
+ {
+ "epoch": 39.50857142857143,
+ "grad_norm": 28.558591842651367,
+ "learning_rate": 1.1657142857142859e-05,
+ "loss": 0.1894,
+ "step": 6914
+ },
+ {
+ "epoch": 39.51428571428571,
+ "grad_norm": 46.673946380615234,
+ "learning_rate": 1.1650793650793651e-05,
+ "loss": 0.3003,
+ "step": 6915
+ },
+ {
+ "epoch": 39.52,
+ "grad_norm": 34.23926544189453,
+ "learning_rate": 1.1644444444444446e-05,
+ "loss": 0.2361,
+ "step": 6916
+ },
+ {
+ "epoch": 39.52571428571429,
+ "grad_norm": 71.27877044677734,
+ "learning_rate": 1.1638095238095238e-05,
+ "loss": 0.2106,
+ "step": 6917
+ },
+ {
+ "epoch": 39.53142857142857,
+ "grad_norm": 68.56620025634766,
+ "learning_rate": 1.1631746031746032e-05,
+ "loss": 0.2345,
+ "step": 6918
+ },
+ {
+ "epoch": 39.537142857142854,
+ "grad_norm": 20.94009780883789,
+ "learning_rate": 1.1625396825396825e-05,
+ "loss": 0.1667,
+ "step": 6919
+ },
+ {
+ "epoch": 39.542857142857144,
+ "grad_norm": 37.50666427612305,
+ "learning_rate": 1.161904761904762e-05,
+ "loss": 0.1454,
+ "step": 6920
+ },
+ {
+ "epoch": 39.54857142857143,
+ "grad_norm": 68.03517150878906,
+ "learning_rate": 1.1612698412698412e-05,
+ "loss": 0.1825,
+ "step": 6921
+ },
+ {
+ "epoch": 39.55428571428571,
+ "grad_norm": 23.844459533691406,
+ "learning_rate": 1.1606349206349206e-05,
+ "loss": 0.1927,
+ "step": 6922
+ },
+ {
+ "epoch": 39.56,
+ "grad_norm": 84.80583953857422,
+ "learning_rate": 1.16e-05,
+ "loss": 0.1556,
+ "step": 6923
+ },
+ {
+ "epoch": 39.565714285714286,
+ "grad_norm": 23.745746612548828,
+ "learning_rate": 1.1593650793650793e-05,
+ "loss": 0.1969,
+ "step": 6924
+ },
+ {
+ "epoch": 39.57142857142857,
+ "grad_norm": 94.98046875,
+ "learning_rate": 1.1587301587301588e-05,
+ "loss": 0.139,
+ "step": 6925
+ },
+ {
+ "epoch": 39.57714285714286,
+ "grad_norm": 76.0391845703125,
+ "learning_rate": 1.1580952380952382e-05,
+ "loss": 0.2522,
+ "step": 6926
+ },
+ {
+ "epoch": 39.582857142857144,
+ "grad_norm": 80.9561538696289,
+ "learning_rate": 1.1574603174603175e-05,
+ "loss": 0.1765,
+ "step": 6927
+ },
+ {
+ "epoch": 39.58857142857143,
+ "grad_norm": 76.00508117675781,
+ "learning_rate": 1.1568253968253969e-05,
+ "loss": 0.1807,
+ "step": 6928
+ },
+ {
+ "epoch": 39.59428571428572,
+ "grad_norm": 116.78324890136719,
+ "learning_rate": 1.1561904761904763e-05,
+ "loss": 0.1959,
+ "step": 6929
+ },
+ {
+ "epoch": 39.6,
+ "grad_norm": 53.20307159423828,
+ "learning_rate": 1.1555555555555556e-05,
+ "loss": 0.1495,
+ "step": 6930
+ },
+ {
+ "epoch": 39.605714285714285,
+ "grad_norm": 49.313350677490234,
+ "learning_rate": 1.154920634920635e-05,
+ "loss": 0.1888,
+ "step": 6931
+ },
+ {
+ "epoch": 39.61142857142857,
+ "grad_norm": 974.427734375,
+ "learning_rate": 1.1542857142857143e-05,
+ "loss": 0.244,
+ "step": 6932
+ },
+ {
+ "epoch": 39.61714285714286,
+ "grad_norm": 68.5076675415039,
+ "learning_rate": 1.1536507936507937e-05,
+ "loss": 0.1971,
+ "step": 6933
+ },
+ {
+ "epoch": 39.62285714285714,
+ "grad_norm": 48.14308547973633,
+ "learning_rate": 1.1530158730158732e-05,
+ "loss": 0.1892,
+ "step": 6934
+ },
+ {
+ "epoch": 39.628571428571426,
+ "grad_norm": 493.64166259765625,
+ "learning_rate": 1.1523809523809524e-05,
+ "loss": 0.1761,
+ "step": 6935
+ },
+ {
+ "epoch": 39.63428571428572,
+ "grad_norm": 70.71945190429688,
+ "learning_rate": 1.1517460317460319e-05,
+ "loss": 0.2291,
+ "step": 6936
+ },
+ {
+ "epoch": 39.64,
+ "grad_norm": 21.475210189819336,
+ "learning_rate": 1.1511111111111111e-05,
+ "loss": 0.17,
+ "step": 6937
+ },
+ {
+ "epoch": 39.645714285714284,
+ "grad_norm": 48.795433044433594,
+ "learning_rate": 1.1504761904761906e-05,
+ "loss": 0.2267,
+ "step": 6938
+ },
+ {
+ "epoch": 39.651428571428575,
+ "grad_norm": 42.746124267578125,
+ "learning_rate": 1.1498412698412698e-05,
+ "loss": 0.1506,
+ "step": 6939
+ },
+ {
+ "epoch": 39.65714285714286,
+ "grad_norm": 94.1886215209961,
+ "learning_rate": 1.1492063492063492e-05,
+ "loss": 0.2586,
+ "step": 6940
+ },
+ {
+ "epoch": 39.66285714285714,
+ "grad_norm": 507.9591369628906,
+ "learning_rate": 1.1485714285714285e-05,
+ "loss": 0.1645,
+ "step": 6941
+ },
+ {
+ "epoch": 39.668571428571425,
+ "grad_norm": 41.86248016357422,
+ "learning_rate": 1.147936507936508e-05,
+ "loss": 0.171,
+ "step": 6942
+ },
+ {
+ "epoch": 39.674285714285716,
+ "grad_norm": 51.88438415527344,
+ "learning_rate": 1.1473015873015872e-05,
+ "loss": 0.1331,
+ "step": 6943
+ },
+ {
+ "epoch": 39.68,
+ "grad_norm": 353.7901611328125,
+ "learning_rate": 1.1466666666666666e-05,
+ "loss": 0.251,
+ "step": 6944
+ },
+ {
+ "epoch": 39.68571428571428,
+ "grad_norm": 19.806060791015625,
+ "learning_rate": 1.146031746031746e-05,
+ "loss": 0.1681,
+ "step": 6945
+ },
+ {
+ "epoch": 39.691428571428574,
+ "grad_norm": 46.80093002319336,
+ "learning_rate": 1.1453968253968255e-05,
+ "loss": 0.2254,
+ "step": 6946
+ },
+ {
+ "epoch": 39.69714285714286,
+ "grad_norm": 33.5659294128418,
+ "learning_rate": 1.144761904761905e-05,
+ "loss": 0.1462,
+ "step": 6947
+ },
+ {
+ "epoch": 39.70285714285714,
+ "grad_norm": 323.8052062988281,
+ "learning_rate": 1.1441269841269842e-05,
+ "loss": 0.2597,
+ "step": 6948
+ },
+ {
+ "epoch": 39.70857142857143,
+ "grad_norm": 27.59954071044922,
+ "learning_rate": 1.1434920634920636e-05,
+ "loss": 0.2973,
+ "step": 6949
+ },
+ {
+ "epoch": 39.714285714285715,
+ "grad_norm": 80.96438598632812,
+ "learning_rate": 1.1428571428571429e-05,
+ "loss": 0.2237,
+ "step": 6950
+ },
+ {
+ "epoch": 39.72,
+ "grad_norm": 52.7150764465332,
+ "learning_rate": 1.1422222222222223e-05,
+ "loss": 0.1318,
+ "step": 6951
+ },
+ {
+ "epoch": 39.72571428571428,
+ "grad_norm": 65.87142181396484,
+ "learning_rate": 1.1415873015873016e-05,
+ "loss": 0.1935,
+ "step": 6952
+ },
+ {
+ "epoch": 39.73142857142857,
+ "grad_norm": 34.133445739746094,
+ "learning_rate": 1.140952380952381e-05,
+ "loss": 0.2051,
+ "step": 6953
+ },
+ {
+ "epoch": 39.73714285714286,
+ "grad_norm": 29.996055603027344,
+ "learning_rate": 1.1403174603174603e-05,
+ "loss": 0.1625,
+ "step": 6954
+ },
+ {
+ "epoch": 39.74285714285714,
+ "grad_norm": 35.15446472167969,
+ "learning_rate": 1.1396825396825397e-05,
+ "loss": 0.2354,
+ "step": 6955
+ },
+ {
+ "epoch": 39.74857142857143,
+ "grad_norm": 30.359275817871094,
+ "learning_rate": 1.1390476190476192e-05,
+ "loss": 0.1966,
+ "step": 6956
+ },
+ {
+ "epoch": 39.754285714285714,
+ "grad_norm": 25.765777587890625,
+ "learning_rate": 1.1384126984126984e-05,
+ "loss": 0.2597,
+ "step": 6957
+ },
+ {
+ "epoch": 39.76,
+ "grad_norm": 49.04469299316406,
+ "learning_rate": 1.1377777777777779e-05,
+ "loss": 0.1757,
+ "step": 6958
+ },
+ {
+ "epoch": 39.76571428571429,
+ "grad_norm": 83.82494354248047,
+ "learning_rate": 1.1371428571428571e-05,
+ "loss": 0.1962,
+ "step": 6959
+ },
+ {
+ "epoch": 39.77142857142857,
+ "grad_norm": 115.36933898925781,
+ "learning_rate": 1.1365079365079366e-05,
+ "loss": 0.2437,
+ "step": 6960
+ },
+ {
+ "epoch": 39.777142857142856,
+ "grad_norm": 42.116424560546875,
+ "learning_rate": 1.1358730158730158e-05,
+ "loss": 0.2258,
+ "step": 6961
+ },
+ {
+ "epoch": 39.78285714285714,
+ "grad_norm": 33.92232131958008,
+ "learning_rate": 1.1352380952380953e-05,
+ "loss": 0.2757,
+ "step": 6962
+ },
+ {
+ "epoch": 39.78857142857143,
+ "grad_norm": 27.678110122680664,
+ "learning_rate": 1.1346031746031747e-05,
+ "loss": 0.1723,
+ "step": 6963
+ },
+ {
+ "epoch": 39.794285714285714,
+ "grad_norm": 46.185401916503906,
+ "learning_rate": 1.1339682539682541e-05,
+ "loss": 0.1713,
+ "step": 6964
+ },
+ {
+ "epoch": 39.8,
+ "grad_norm": 34.324920654296875,
+ "learning_rate": 1.1333333333333334e-05,
+ "loss": 0.1703,
+ "step": 6965
+ },
+ {
+ "epoch": 39.80571428571429,
+ "grad_norm": 29.69679832458496,
+ "learning_rate": 1.1326984126984128e-05,
+ "loss": 0.1478,
+ "step": 6966
+ },
+ {
+ "epoch": 39.81142857142857,
+ "grad_norm": 30.579904556274414,
+ "learning_rate": 1.1320634920634922e-05,
+ "loss": 0.1566,
+ "step": 6967
+ },
+ {
+ "epoch": 39.817142857142855,
+ "grad_norm": 40.814144134521484,
+ "learning_rate": 1.1314285714285715e-05,
+ "loss": 0.1769,
+ "step": 6968
+ },
+ {
+ "epoch": 39.822857142857146,
+ "grad_norm": 45.01163101196289,
+ "learning_rate": 1.130793650793651e-05,
+ "loss": 0.1629,
+ "step": 6969
+ },
+ {
+ "epoch": 39.82857142857143,
+ "grad_norm": 77.55461120605469,
+ "learning_rate": 1.1301587301587302e-05,
+ "loss": 0.1999,
+ "step": 6970
+ },
+ {
+ "epoch": 39.83428571428571,
+ "grad_norm": 43.04883575439453,
+ "learning_rate": 1.1295238095238096e-05,
+ "loss": 0.1714,
+ "step": 6971
+ },
+ {
+ "epoch": 39.84,
+ "grad_norm": 45.441444396972656,
+ "learning_rate": 1.1288888888888889e-05,
+ "loss": 0.1509,
+ "step": 6972
+ },
+ {
+ "epoch": 39.84571428571429,
+ "grad_norm": 60.79438018798828,
+ "learning_rate": 1.1282539682539683e-05,
+ "loss": 0.278,
+ "step": 6973
+ },
+ {
+ "epoch": 39.85142857142857,
+ "grad_norm": 35.067474365234375,
+ "learning_rate": 1.1276190476190476e-05,
+ "loss": 0.1926,
+ "step": 6974
+ },
+ {
+ "epoch": 39.857142857142854,
+ "grad_norm": 41.40991973876953,
+ "learning_rate": 1.126984126984127e-05,
+ "loss": 0.2465,
+ "step": 6975
+ },
+ {
+ "epoch": 39.862857142857145,
+ "grad_norm": 42.7786750793457,
+ "learning_rate": 1.1263492063492063e-05,
+ "loss": 0.1523,
+ "step": 6976
+ },
+ {
+ "epoch": 39.86857142857143,
+ "grad_norm": 43.09067916870117,
+ "learning_rate": 1.1257142857142857e-05,
+ "loss": 0.1933,
+ "step": 6977
+ },
+ {
+ "epoch": 39.87428571428571,
+ "grad_norm": 22.420692443847656,
+ "learning_rate": 1.125079365079365e-05,
+ "loss": 0.172,
+ "step": 6978
+ },
+ {
+ "epoch": 39.88,
+ "grad_norm": 18.63651466369629,
+ "learning_rate": 1.1244444444444444e-05,
+ "loss": 0.1813,
+ "step": 6979
+ },
+ {
+ "epoch": 39.885714285714286,
+ "grad_norm": 87.5951919555664,
+ "learning_rate": 1.1238095238095239e-05,
+ "loss": 0.1922,
+ "step": 6980
+ },
+ {
+ "epoch": 39.89142857142857,
+ "grad_norm": 23.170183181762695,
+ "learning_rate": 1.1231746031746033e-05,
+ "loss": 0.1528,
+ "step": 6981
+ },
+ {
+ "epoch": 39.89714285714286,
+ "grad_norm": 35.52743148803711,
+ "learning_rate": 1.1225396825396827e-05,
+ "loss": 0.1756,
+ "step": 6982
+ },
+ {
+ "epoch": 39.902857142857144,
+ "grad_norm": 22.46258544921875,
+ "learning_rate": 1.121904761904762e-05,
+ "loss": 0.1842,
+ "step": 6983
+ },
+ {
+ "epoch": 39.90857142857143,
+ "grad_norm": 38.78075408935547,
+ "learning_rate": 1.1212698412698414e-05,
+ "loss": 0.1482,
+ "step": 6984
+ },
+ {
+ "epoch": 39.91428571428571,
+ "grad_norm": 42.88488006591797,
+ "learning_rate": 1.1206349206349207e-05,
+ "loss": 0.1541,
+ "step": 6985
+ },
+ {
+ "epoch": 39.92,
+ "grad_norm": 86.86996459960938,
+ "learning_rate": 1.1200000000000001e-05,
+ "loss": 0.1831,
+ "step": 6986
+ },
+ {
+ "epoch": 39.925714285714285,
+ "grad_norm": 43.99440002441406,
+ "learning_rate": 1.1193650793650794e-05,
+ "loss": 0.2159,
+ "step": 6987
+ },
+ {
+ "epoch": 39.93142857142857,
+ "grad_norm": 43.604984283447266,
+ "learning_rate": 1.1187301587301588e-05,
+ "loss": 0.2432,
+ "step": 6988
+ },
+ {
+ "epoch": 39.93714285714286,
+ "grad_norm": 22.00592803955078,
+ "learning_rate": 1.118095238095238e-05,
+ "loss": 0.1796,
+ "step": 6989
+ },
+ {
+ "epoch": 39.94285714285714,
+ "grad_norm": 50.48416519165039,
+ "learning_rate": 1.1174603174603175e-05,
+ "loss": 0.2066,
+ "step": 6990
+ },
+ {
+ "epoch": 39.94857142857143,
+ "grad_norm": 35.589725494384766,
+ "learning_rate": 1.116825396825397e-05,
+ "loss": 0.1294,
+ "step": 6991
+ },
+ {
+ "epoch": 39.95428571428572,
+ "grad_norm": 23.348114013671875,
+ "learning_rate": 1.1161904761904762e-05,
+ "loss": 0.2204,
+ "step": 6992
+ },
+ {
+ "epoch": 39.96,
+ "grad_norm": 89.19754791259766,
+ "learning_rate": 1.1155555555555556e-05,
+ "loss": 0.2061,
+ "step": 6993
+ },
+ {
+ "epoch": 39.965714285714284,
+ "grad_norm": 38.78104782104492,
+ "learning_rate": 1.1149206349206349e-05,
+ "loss": 0.1605,
+ "step": 6994
+ },
+ {
+ "epoch": 39.97142857142857,
+ "grad_norm": 41.04331588745117,
+ "learning_rate": 1.1142857142857143e-05,
+ "loss": 0.201,
+ "step": 6995
+ },
+ {
+ "epoch": 39.97714285714286,
+ "grad_norm": 29.142427444458008,
+ "learning_rate": 1.1136507936507936e-05,
+ "loss": 0.1823,
+ "step": 6996
+ },
+ {
+ "epoch": 39.98285714285714,
+ "grad_norm": 42.97498321533203,
+ "learning_rate": 1.113015873015873e-05,
+ "loss": 0.173,
+ "step": 6997
+ },
+ {
+ "epoch": 39.988571428571426,
+ "grad_norm": 19.229862213134766,
+ "learning_rate": 1.1123809523809525e-05,
+ "loss": 0.1316,
+ "step": 6998
+ },
+ {
+ "epoch": 39.994285714285716,
+ "grad_norm": 78.10343933105469,
+ "learning_rate": 1.1117460317460317e-05,
+ "loss": 0.1944,
+ "step": 6999
+ },
+ {
+ "epoch": 40.0,
+ "grad_norm": 29.40753936767578,
+ "learning_rate": 1.1111111111111112e-05,
+ "loss": 0.1638,
+ "step": 7000
+ },
+ {
+ "epoch": 40.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5729948282241821,
+ "eval_map": 0.9403,
+ "eval_map_50": 0.9713,
+ "eval_map_75": 0.9656,
+ "eval_map_large": 0.9404,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9403,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7908,
+ "eval_mar_10": 0.9775,
+ "eval_mar_100": 0.9784,
+ "eval_mar_100_per_class": 0.9784,
+ "eval_mar_large": 0.9784,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.2563,
+ "eval_samples_per_second": 22.178,
+ "eval_steps_per_second": 2.791,
+ "step": 7000
+ },
+ {
+ "epoch": 40.005714285714284,
+ "grad_norm": 30.414432525634766,
+ "learning_rate": 1.1104761904761906e-05,
+ "loss": 0.1652,
+ "step": 7001
+ },
+ {
+ "epoch": 40.011428571428574,
+ "grad_norm": 39.77214813232422,
+ "learning_rate": 1.10984126984127e-05,
+ "loss": 0.4243,
+ "step": 7002
+ },
+ {
+ "epoch": 40.01714285714286,
+ "grad_norm": 33.90891647338867,
+ "learning_rate": 1.1092063492063493e-05,
+ "loss": 0.1739,
+ "step": 7003
+ },
+ {
+ "epoch": 40.02285714285714,
+ "grad_norm": 16.878549575805664,
+ "learning_rate": 1.1085714285714287e-05,
+ "loss": 0.2202,
+ "step": 7004
+ },
+ {
+ "epoch": 40.02857142857143,
+ "grad_norm": 38.1385612487793,
+ "learning_rate": 1.107936507936508e-05,
+ "loss": 0.1297,
+ "step": 7005
+ },
+ {
+ "epoch": 40.034285714285716,
+ "grad_norm": 53.239200592041016,
+ "learning_rate": 1.1073015873015874e-05,
+ "loss": 0.2195,
+ "step": 7006
+ },
+ {
+ "epoch": 40.04,
+ "grad_norm": 36.35694122314453,
+ "learning_rate": 1.1066666666666667e-05,
+ "loss": 0.1495,
+ "step": 7007
+ },
+ {
+ "epoch": 40.04571428571428,
+ "grad_norm": 80.08834075927734,
+ "learning_rate": 1.1060317460317461e-05,
+ "loss": 0.1739,
+ "step": 7008
+ },
+ {
+ "epoch": 40.05142857142857,
+ "grad_norm": 37.675682067871094,
+ "learning_rate": 1.1053968253968254e-05,
+ "loss": 0.1584,
+ "step": 7009
+ },
+ {
+ "epoch": 40.05714285714286,
+ "grad_norm": 42.608245849609375,
+ "learning_rate": 1.1047619047619048e-05,
+ "loss": 0.1318,
+ "step": 7010
+ },
+ {
+ "epoch": 40.06285714285714,
+ "grad_norm": 76.56353759765625,
+ "learning_rate": 1.104126984126984e-05,
+ "loss": 0.1978,
+ "step": 7011
+ },
+ {
+ "epoch": 40.06857142857143,
+ "grad_norm": 29.985271453857422,
+ "learning_rate": 1.1034920634920635e-05,
+ "loss": 0.2171,
+ "step": 7012
+ },
+ {
+ "epoch": 40.074285714285715,
+ "grad_norm": 23.408645629882812,
+ "learning_rate": 1.102857142857143e-05,
+ "loss": 0.1768,
+ "step": 7013
+ },
+ {
+ "epoch": 40.08,
+ "grad_norm": 35.90739440917969,
+ "learning_rate": 1.1022222222222222e-05,
+ "loss": 0.1585,
+ "step": 7014
+ },
+ {
+ "epoch": 40.08571428571429,
+ "grad_norm": 29.93295669555664,
+ "learning_rate": 1.1015873015873016e-05,
+ "loss": 0.1715,
+ "step": 7015
+ },
+ {
+ "epoch": 40.09142857142857,
+ "grad_norm": 174.803955078125,
+ "learning_rate": 1.1009523809523809e-05,
+ "loss": 0.1777,
+ "step": 7016
+ },
+ {
+ "epoch": 40.097142857142856,
+ "grad_norm": 46.307621002197266,
+ "learning_rate": 1.1003174603174603e-05,
+ "loss": 0.1968,
+ "step": 7017
+ },
+ {
+ "epoch": 40.10285714285714,
+ "grad_norm": 30.228389739990234,
+ "learning_rate": 1.0996825396825398e-05,
+ "loss": 0.1642,
+ "step": 7018
+ },
+ {
+ "epoch": 40.10857142857143,
+ "grad_norm": 53.23178482055664,
+ "learning_rate": 1.0990476190476192e-05,
+ "loss": 0.1606,
+ "step": 7019
+ },
+ {
+ "epoch": 40.114285714285714,
+ "grad_norm": 36.800209045410156,
+ "learning_rate": 1.0984126984126985e-05,
+ "loss": 0.1575,
+ "step": 7020
+ },
+ {
+ "epoch": 40.12,
+ "grad_norm": 36.75564193725586,
+ "learning_rate": 1.0977777777777779e-05,
+ "loss": 0.143,
+ "step": 7021
+ },
+ {
+ "epoch": 40.12571428571429,
+ "grad_norm": 67.83435821533203,
+ "learning_rate": 1.0971428571428572e-05,
+ "loss": 0.2065,
+ "step": 7022
+ },
+ {
+ "epoch": 40.13142857142857,
+ "grad_norm": 30.078880310058594,
+ "learning_rate": 1.0965079365079366e-05,
+ "loss": 0.3438,
+ "step": 7023
+ },
+ {
+ "epoch": 40.137142857142855,
+ "grad_norm": 66.90591430664062,
+ "learning_rate": 1.0958730158730159e-05,
+ "loss": 0.1947,
+ "step": 7024
+ },
+ {
+ "epoch": 40.142857142857146,
+ "grad_norm": 33.890262603759766,
+ "learning_rate": 1.0952380952380953e-05,
+ "loss": 0.1603,
+ "step": 7025
+ },
+ {
+ "epoch": 40.14857142857143,
+ "grad_norm": 49.547325134277344,
+ "learning_rate": 1.0946031746031747e-05,
+ "loss": 0.221,
+ "step": 7026
+ },
+ {
+ "epoch": 40.15428571428571,
+ "grad_norm": 62.14547348022461,
+ "learning_rate": 1.093968253968254e-05,
+ "loss": 0.2098,
+ "step": 7027
+ },
+ {
+ "epoch": 40.16,
+ "grad_norm": 26.917131423950195,
+ "learning_rate": 1.0933333333333334e-05,
+ "loss": 0.1699,
+ "step": 7028
+ },
+ {
+ "epoch": 40.16571428571429,
+ "grad_norm": 35.044677734375,
+ "learning_rate": 1.0926984126984127e-05,
+ "loss": 0.1271,
+ "step": 7029
+ },
+ {
+ "epoch": 40.17142857142857,
+ "grad_norm": 93.6030502319336,
+ "learning_rate": 1.0920634920634921e-05,
+ "loss": 0.2828,
+ "step": 7030
+ },
+ {
+ "epoch": 40.177142857142854,
+ "grad_norm": 46.65414810180664,
+ "learning_rate": 1.0914285714285714e-05,
+ "loss": 0.1463,
+ "step": 7031
+ },
+ {
+ "epoch": 40.182857142857145,
+ "grad_norm": 24.331409454345703,
+ "learning_rate": 1.0907936507936508e-05,
+ "loss": 0.2234,
+ "step": 7032
+ },
+ {
+ "epoch": 40.18857142857143,
+ "grad_norm": 42.96149826049805,
+ "learning_rate": 1.09015873015873e-05,
+ "loss": 0.1677,
+ "step": 7033
+ },
+ {
+ "epoch": 40.19428571428571,
+ "grad_norm": 41.679786682128906,
+ "learning_rate": 1.0895238095238095e-05,
+ "loss": 0.1671,
+ "step": 7034
+ },
+ {
+ "epoch": 40.2,
+ "grad_norm": 41.53316879272461,
+ "learning_rate": 1.088888888888889e-05,
+ "loss": 0.145,
+ "step": 7035
+ },
+ {
+ "epoch": 40.205714285714286,
+ "grad_norm": 44.54117202758789,
+ "learning_rate": 1.0882539682539684e-05,
+ "loss": 0.2878,
+ "step": 7036
+ },
+ {
+ "epoch": 40.21142857142857,
+ "grad_norm": 116.3648910522461,
+ "learning_rate": 1.0876190476190478e-05,
+ "loss": 0.1527,
+ "step": 7037
+ },
+ {
+ "epoch": 40.21714285714286,
+ "grad_norm": 75.6655044555664,
+ "learning_rate": 1.086984126984127e-05,
+ "loss": 0.1768,
+ "step": 7038
+ },
+ {
+ "epoch": 40.222857142857144,
+ "grad_norm": 41.37080764770508,
+ "learning_rate": 1.0863492063492065e-05,
+ "loss": 0.2164,
+ "step": 7039
+ },
+ {
+ "epoch": 40.22857142857143,
+ "grad_norm": 36.2270393371582,
+ "learning_rate": 1.0857142857142858e-05,
+ "loss": 0.198,
+ "step": 7040
+ },
+ {
+ "epoch": 40.23428571428571,
+ "grad_norm": 52.032310485839844,
+ "learning_rate": 1.0850793650793652e-05,
+ "loss": 0.1649,
+ "step": 7041
+ },
+ {
+ "epoch": 40.24,
+ "grad_norm": 31.848403930664062,
+ "learning_rate": 1.0844444444444445e-05,
+ "loss": 0.1873,
+ "step": 7042
+ },
+ {
+ "epoch": 40.245714285714286,
+ "grad_norm": 63.652000427246094,
+ "learning_rate": 1.0838095238095239e-05,
+ "loss": 0.1516,
+ "step": 7043
+ },
+ {
+ "epoch": 40.25142857142857,
+ "grad_norm": 55.00597381591797,
+ "learning_rate": 1.0831746031746032e-05,
+ "loss": 0.2101,
+ "step": 7044
+ },
+ {
+ "epoch": 40.25714285714286,
+ "grad_norm": 90.49565887451172,
+ "learning_rate": 1.0825396825396826e-05,
+ "loss": 0.1857,
+ "step": 7045
+ },
+ {
+ "epoch": 40.26285714285714,
+ "grad_norm": 38.97643280029297,
+ "learning_rate": 1.0819047619047619e-05,
+ "loss": 0.1871,
+ "step": 7046
+ },
+ {
+ "epoch": 40.26857142857143,
+ "grad_norm": 60.40447998046875,
+ "learning_rate": 1.0812698412698413e-05,
+ "loss": 0.1452,
+ "step": 7047
+ },
+ {
+ "epoch": 40.27428571428572,
+ "grad_norm": 40.19563293457031,
+ "learning_rate": 1.0806349206349207e-05,
+ "loss": 0.284,
+ "step": 7048
+ },
+ {
+ "epoch": 40.28,
+ "grad_norm": 27.480947494506836,
+ "learning_rate": 1.08e-05,
+ "loss": 0.2156,
+ "step": 7049
+ },
+ {
+ "epoch": 40.285714285714285,
+ "grad_norm": 45.169227600097656,
+ "learning_rate": 1.0793650793650794e-05,
+ "loss": 0.1619,
+ "step": 7050
+ },
+ {
+ "epoch": 40.29142857142857,
+ "grad_norm": 57.9741325378418,
+ "learning_rate": 1.0787301587301587e-05,
+ "loss": 0.2071,
+ "step": 7051
+ },
+ {
+ "epoch": 40.29714285714286,
+ "grad_norm": 48.04543685913086,
+ "learning_rate": 1.0780952380952381e-05,
+ "loss": 0.209,
+ "step": 7052
+ },
+ {
+ "epoch": 40.30285714285714,
+ "grad_norm": 18.890419006347656,
+ "learning_rate": 1.0774603174603176e-05,
+ "loss": 0.1393,
+ "step": 7053
+ },
+ {
+ "epoch": 40.308571428571426,
+ "grad_norm": 21.3095645904541,
+ "learning_rate": 1.0768253968253968e-05,
+ "loss": 0.181,
+ "step": 7054
+ },
+ {
+ "epoch": 40.31428571428572,
+ "grad_norm": 20.197839736938477,
+ "learning_rate": 1.0761904761904763e-05,
+ "loss": 0.1545,
+ "step": 7055
+ },
+ {
+ "epoch": 40.32,
+ "grad_norm": 67.84980773925781,
+ "learning_rate": 1.0755555555555557e-05,
+ "loss": 0.1378,
+ "step": 7056
+ },
+ {
+ "epoch": 40.325714285714284,
+ "grad_norm": 30.961355209350586,
+ "learning_rate": 1.074920634920635e-05,
+ "loss": 0.1575,
+ "step": 7057
+ },
+ {
+ "epoch": 40.331428571428575,
+ "grad_norm": 61.83423614501953,
+ "learning_rate": 1.0742857142857144e-05,
+ "loss": 0.182,
+ "step": 7058
+ },
+ {
+ "epoch": 40.33714285714286,
+ "grad_norm": 80.88799285888672,
+ "learning_rate": 1.0736507936507938e-05,
+ "loss": 0.152,
+ "step": 7059
+ },
+ {
+ "epoch": 40.34285714285714,
+ "grad_norm": 62.28117752075195,
+ "learning_rate": 1.073015873015873e-05,
+ "loss": 0.1789,
+ "step": 7060
+ },
+ {
+ "epoch": 40.348571428571425,
+ "grad_norm": 102.28784942626953,
+ "learning_rate": 1.0723809523809525e-05,
+ "loss": 0.1686,
+ "step": 7061
+ },
+ {
+ "epoch": 40.354285714285716,
+ "grad_norm": 73.03924560546875,
+ "learning_rate": 1.0717460317460318e-05,
+ "loss": 0.1942,
+ "step": 7062
+ },
+ {
+ "epoch": 40.36,
+ "grad_norm": 75.60195922851562,
+ "learning_rate": 1.0711111111111112e-05,
+ "loss": 0.217,
+ "step": 7063
+ },
+ {
+ "epoch": 40.36571428571428,
+ "grad_norm": 65.61262512207031,
+ "learning_rate": 1.0704761904761905e-05,
+ "loss": 0.1986,
+ "step": 7064
+ },
+ {
+ "epoch": 40.371428571428574,
+ "grad_norm": 498.84271240234375,
+ "learning_rate": 1.0698412698412699e-05,
+ "loss": 0.2723,
+ "step": 7065
+ },
+ {
+ "epoch": 40.37714285714286,
+ "grad_norm": 44.167240142822266,
+ "learning_rate": 1.0692063492063492e-05,
+ "loss": 0.2334,
+ "step": 7066
+ },
+ {
+ "epoch": 40.38285714285714,
+ "grad_norm": 67.59822845458984,
+ "learning_rate": 1.0685714285714286e-05,
+ "loss": 0.1676,
+ "step": 7067
+ },
+ {
+ "epoch": 40.38857142857143,
+ "grad_norm": 41.171634674072266,
+ "learning_rate": 1.0679365079365079e-05,
+ "loss": 0.148,
+ "step": 7068
+ },
+ {
+ "epoch": 40.394285714285715,
+ "grad_norm": 36.31204605102539,
+ "learning_rate": 1.0673015873015873e-05,
+ "loss": 0.1669,
+ "step": 7069
+ },
+ {
+ "epoch": 40.4,
+ "grad_norm": 55.22999572753906,
+ "learning_rate": 1.0666666666666667e-05,
+ "loss": 0.1675,
+ "step": 7070
+ },
+ {
+ "epoch": 40.40571428571428,
+ "grad_norm": 109.92345428466797,
+ "learning_rate": 1.066031746031746e-05,
+ "loss": 0.2286,
+ "step": 7071
+ },
+ {
+ "epoch": 40.41142857142857,
+ "grad_norm": 87.8012466430664,
+ "learning_rate": 1.0653968253968254e-05,
+ "loss": 0.1891,
+ "step": 7072
+ },
+ {
+ "epoch": 40.417142857142856,
+ "grad_norm": 233.1424560546875,
+ "learning_rate": 1.0647619047619049e-05,
+ "loss": 0.1949,
+ "step": 7073
+ },
+ {
+ "epoch": 40.42285714285714,
+ "grad_norm": 56.26198196411133,
+ "learning_rate": 1.0641269841269843e-05,
+ "loss": 0.1869,
+ "step": 7074
+ },
+ {
+ "epoch": 40.42857142857143,
+ "grad_norm": 246.6047821044922,
+ "learning_rate": 1.0634920634920636e-05,
+ "loss": 0.1971,
+ "step": 7075
+ },
+ {
+ "epoch": 40.434285714285714,
+ "grad_norm": 36.35358428955078,
+ "learning_rate": 1.062857142857143e-05,
+ "loss": 0.2352,
+ "step": 7076
+ },
+ {
+ "epoch": 40.44,
+ "grad_norm": 17.073652267456055,
+ "learning_rate": 1.0622222222222223e-05,
+ "loss": 0.1667,
+ "step": 7077
+ },
+ {
+ "epoch": 40.44571428571429,
+ "grad_norm": 34.12038040161133,
+ "learning_rate": 1.0615873015873017e-05,
+ "loss": 0.1763,
+ "step": 7078
+ },
+ {
+ "epoch": 40.45142857142857,
+ "grad_norm": 40.721702575683594,
+ "learning_rate": 1.060952380952381e-05,
+ "loss": 0.1756,
+ "step": 7079
+ },
+ {
+ "epoch": 40.457142857142856,
+ "grad_norm": 66.41804504394531,
+ "learning_rate": 1.0603174603174604e-05,
+ "loss": 0.1986,
+ "step": 7080
+ },
+ {
+ "epoch": 40.462857142857146,
+ "grad_norm": 26.351877212524414,
+ "learning_rate": 1.0596825396825396e-05,
+ "loss": 0.1862,
+ "step": 7081
+ },
+ {
+ "epoch": 40.46857142857143,
+ "grad_norm": 37.494659423828125,
+ "learning_rate": 1.059047619047619e-05,
+ "loss": 0.1328,
+ "step": 7082
+ },
+ {
+ "epoch": 40.47428571428571,
+ "grad_norm": 51.77208709716797,
+ "learning_rate": 1.0584126984126985e-05,
+ "loss": 0.1816,
+ "step": 7083
+ },
+ {
+ "epoch": 40.48,
+ "grad_norm": 91.71420288085938,
+ "learning_rate": 1.0577777777777778e-05,
+ "loss": 0.2641,
+ "step": 7084
+ },
+ {
+ "epoch": 40.48571428571429,
+ "grad_norm": 47.01348114013672,
+ "learning_rate": 1.0571428571428572e-05,
+ "loss": 0.201,
+ "step": 7085
+ },
+ {
+ "epoch": 40.49142857142857,
+ "grad_norm": 24.24553108215332,
+ "learning_rate": 1.0565079365079365e-05,
+ "loss": 0.185,
+ "step": 7086
+ },
+ {
+ "epoch": 40.497142857142855,
+ "grad_norm": 24.89585304260254,
+ "learning_rate": 1.0558730158730159e-05,
+ "loss": 0.1371,
+ "step": 7087
+ },
+ {
+ "epoch": 40.502857142857145,
+ "grad_norm": 67.30992126464844,
+ "learning_rate": 1.0552380952380952e-05,
+ "loss": 0.1728,
+ "step": 7088
+ },
+ {
+ "epoch": 40.50857142857143,
+ "grad_norm": 295.7307434082031,
+ "learning_rate": 1.0546031746031746e-05,
+ "loss": 0.1639,
+ "step": 7089
+ },
+ {
+ "epoch": 40.51428571428571,
+ "grad_norm": 20.085918426513672,
+ "learning_rate": 1.053968253968254e-05,
+ "loss": 0.1722,
+ "step": 7090
+ },
+ {
+ "epoch": 40.52,
+ "grad_norm": 47.78133773803711,
+ "learning_rate": 1.0533333333333335e-05,
+ "loss": 0.15,
+ "step": 7091
+ },
+ {
+ "epoch": 40.52571428571429,
+ "grad_norm": 22.585948944091797,
+ "learning_rate": 1.0526984126984127e-05,
+ "loss": 0.1737,
+ "step": 7092
+ },
+ {
+ "epoch": 40.53142857142857,
+ "grad_norm": 20.893796920776367,
+ "learning_rate": 1.0520634920634922e-05,
+ "loss": 0.1234,
+ "step": 7093
+ },
+ {
+ "epoch": 40.537142857142854,
+ "grad_norm": 48.247596740722656,
+ "learning_rate": 1.0514285714285716e-05,
+ "loss": 0.1572,
+ "step": 7094
+ },
+ {
+ "epoch": 40.542857142857144,
+ "grad_norm": 25.091320037841797,
+ "learning_rate": 1.0507936507936509e-05,
+ "loss": 0.1978,
+ "step": 7095
+ },
+ {
+ "epoch": 40.54857142857143,
+ "grad_norm": 51.04119110107422,
+ "learning_rate": 1.0501587301587303e-05,
+ "loss": 0.1781,
+ "step": 7096
+ },
+ {
+ "epoch": 40.55428571428571,
+ "grad_norm": 42.68234634399414,
+ "learning_rate": 1.0495238095238096e-05,
+ "loss": 0.2108,
+ "step": 7097
+ },
+ {
+ "epoch": 40.56,
+ "grad_norm": 118.75373077392578,
+ "learning_rate": 1.048888888888889e-05,
+ "loss": 0.1533,
+ "step": 7098
+ },
+ {
+ "epoch": 40.565714285714286,
+ "grad_norm": 68.9887924194336,
+ "learning_rate": 1.0482539682539683e-05,
+ "loss": 0.2354,
+ "step": 7099
+ },
+ {
+ "epoch": 40.57142857142857,
+ "grad_norm": 30.95083236694336,
+ "learning_rate": 1.0476190476190477e-05,
+ "loss": 0.239,
+ "step": 7100
+ },
+ {
+ "epoch": 40.57714285714286,
+ "grad_norm": 48.973960876464844,
+ "learning_rate": 1.046984126984127e-05,
+ "loss": 0.2646,
+ "step": 7101
+ },
+ {
+ "epoch": 40.582857142857144,
+ "grad_norm": 216.03866577148438,
+ "learning_rate": 1.0463492063492064e-05,
+ "loss": 0.2001,
+ "step": 7102
+ },
+ {
+ "epoch": 40.58857142857143,
+ "grad_norm": 51.790870666503906,
+ "learning_rate": 1.0457142857142856e-05,
+ "loss": 0.2352,
+ "step": 7103
+ },
+ {
+ "epoch": 40.59428571428572,
+ "grad_norm": 26.630159378051758,
+ "learning_rate": 1.045079365079365e-05,
+ "loss": 0.1987,
+ "step": 7104
+ },
+ {
+ "epoch": 40.6,
+ "grad_norm": 51.316497802734375,
+ "learning_rate": 1.0444444444444445e-05,
+ "loss": 0.1359,
+ "step": 7105
+ },
+ {
+ "epoch": 40.605714285714285,
+ "grad_norm": 53.45886993408203,
+ "learning_rate": 1.0438095238095238e-05,
+ "loss": 0.1916,
+ "step": 7106
+ },
+ {
+ "epoch": 40.61142857142857,
+ "grad_norm": 28.972932815551758,
+ "learning_rate": 1.0431746031746032e-05,
+ "loss": 0.2482,
+ "step": 7107
+ },
+ {
+ "epoch": 40.61714285714286,
+ "grad_norm": 112.40228271484375,
+ "learning_rate": 1.0425396825396826e-05,
+ "loss": 0.2062,
+ "step": 7108
+ },
+ {
+ "epoch": 40.62285714285714,
+ "grad_norm": 35.6661376953125,
+ "learning_rate": 1.041904761904762e-05,
+ "loss": 0.1567,
+ "step": 7109
+ },
+ {
+ "epoch": 40.628571428571426,
+ "grad_norm": 47.076473236083984,
+ "learning_rate": 1.0412698412698413e-05,
+ "loss": 0.1987,
+ "step": 7110
+ },
+ {
+ "epoch": 40.63428571428572,
+ "grad_norm": 27.69927978515625,
+ "learning_rate": 1.0406349206349208e-05,
+ "loss": 0.1421,
+ "step": 7111
+ },
+ {
+ "epoch": 40.64,
+ "grad_norm": 41.67701721191406,
+ "learning_rate": 1.04e-05,
+ "loss": 0.1456,
+ "step": 7112
+ },
+ {
+ "epoch": 40.645714285714284,
+ "grad_norm": 168.35186767578125,
+ "learning_rate": 1.0393650793650795e-05,
+ "loss": 0.2464,
+ "step": 7113
+ },
+ {
+ "epoch": 40.651428571428575,
+ "grad_norm": 60.49980545043945,
+ "learning_rate": 1.0387301587301587e-05,
+ "loss": 0.1441,
+ "step": 7114
+ },
+ {
+ "epoch": 40.65714285714286,
+ "grad_norm": 59.31900405883789,
+ "learning_rate": 1.0380952380952382e-05,
+ "loss": 0.1682,
+ "step": 7115
+ },
+ {
+ "epoch": 40.66285714285714,
+ "grad_norm": 41.835601806640625,
+ "learning_rate": 1.0374603174603176e-05,
+ "loss": 0.1564,
+ "step": 7116
+ },
+ {
+ "epoch": 40.668571428571425,
+ "grad_norm": 51.27973937988281,
+ "learning_rate": 1.0368253968253969e-05,
+ "loss": 0.1359,
+ "step": 7117
+ },
+ {
+ "epoch": 40.674285714285716,
+ "grad_norm": 39.31769943237305,
+ "learning_rate": 1.0361904761904763e-05,
+ "loss": 0.168,
+ "step": 7118
+ },
+ {
+ "epoch": 40.68,
+ "grad_norm": 56.311710357666016,
+ "learning_rate": 1.0355555555555556e-05,
+ "loss": 0.1511,
+ "step": 7119
+ },
+ {
+ "epoch": 40.68571428571428,
+ "grad_norm": 49.366722106933594,
+ "learning_rate": 1.034920634920635e-05,
+ "loss": 0.1828,
+ "step": 7120
+ },
+ {
+ "epoch": 40.691428571428574,
+ "grad_norm": 32.207176208496094,
+ "learning_rate": 1.0342857142857143e-05,
+ "loss": 0.1443,
+ "step": 7121
+ },
+ {
+ "epoch": 40.69714285714286,
+ "grad_norm": 57.174530029296875,
+ "learning_rate": 1.0336507936507937e-05,
+ "loss": 0.2462,
+ "step": 7122
+ },
+ {
+ "epoch": 40.70285714285714,
+ "grad_norm": 33.00163650512695,
+ "learning_rate": 1.033015873015873e-05,
+ "loss": 0.1486,
+ "step": 7123
+ },
+ {
+ "epoch": 40.70857142857143,
+ "grad_norm": 54.939388275146484,
+ "learning_rate": 1.0323809523809524e-05,
+ "loss": 0.1501,
+ "step": 7124
+ },
+ {
+ "epoch": 40.714285714285715,
+ "grad_norm": 47.95927429199219,
+ "learning_rate": 1.0317460317460318e-05,
+ "loss": 0.1341,
+ "step": 7125
+ },
+ {
+ "epoch": 40.72,
+ "grad_norm": 42.815513610839844,
+ "learning_rate": 1.031111111111111e-05,
+ "loss": 0.133,
+ "step": 7126
+ },
+ {
+ "epoch": 40.72571428571428,
+ "grad_norm": 41.889278411865234,
+ "learning_rate": 1.0304761904761905e-05,
+ "loss": 0.2395,
+ "step": 7127
+ },
+ {
+ "epoch": 40.73142857142857,
+ "grad_norm": 27.716449737548828,
+ "learning_rate": 1.02984126984127e-05,
+ "loss": 0.1674,
+ "step": 7128
+ },
+ {
+ "epoch": 40.73714285714286,
+ "grad_norm": 26.745899200439453,
+ "learning_rate": 1.0292063492063494e-05,
+ "loss": 0.151,
+ "step": 7129
+ },
+ {
+ "epoch": 40.74285714285714,
+ "grad_norm": 55.21826171875,
+ "learning_rate": 1.0285714285714286e-05,
+ "loss": 0.1403,
+ "step": 7130
+ },
+ {
+ "epoch": 40.74857142857143,
+ "grad_norm": 23.59458351135254,
+ "learning_rate": 1.027936507936508e-05,
+ "loss": 0.1576,
+ "step": 7131
+ },
+ {
+ "epoch": 40.754285714285714,
+ "grad_norm": 33.98942947387695,
+ "learning_rate": 1.0273015873015873e-05,
+ "loss": 0.2031,
+ "step": 7132
+ },
+ {
+ "epoch": 40.76,
+ "grad_norm": 57.35104751586914,
+ "learning_rate": 1.0266666666666668e-05,
+ "loss": 0.1822,
+ "step": 7133
+ },
+ {
+ "epoch": 40.76571428571429,
+ "grad_norm": 34.38922882080078,
+ "learning_rate": 1.026031746031746e-05,
+ "loss": 0.1983,
+ "step": 7134
+ },
+ {
+ "epoch": 40.77142857142857,
+ "grad_norm": 65.87013244628906,
+ "learning_rate": 1.0253968253968255e-05,
+ "loss": 0.2299,
+ "step": 7135
+ },
+ {
+ "epoch": 40.777142857142856,
+ "grad_norm": 25.383821487426758,
+ "learning_rate": 1.0247619047619047e-05,
+ "loss": 0.148,
+ "step": 7136
+ },
+ {
+ "epoch": 40.78285714285714,
+ "grad_norm": 181.95584106445312,
+ "learning_rate": 1.0241269841269842e-05,
+ "loss": 0.1647,
+ "step": 7137
+ },
+ {
+ "epoch": 40.78857142857143,
+ "grad_norm": 83.68648529052734,
+ "learning_rate": 1.0234920634920634e-05,
+ "loss": 0.3434,
+ "step": 7138
+ },
+ {
+ "epoch": 40.794285714285714,
+ "grad_norm": 37.009952545166016,
+ "learning_rate": 1.0228571428571429e-05,
+ "loss": 0.2471,
+ "step": 7139
+ },
+ {
+ "epoch": 40.8,
+ "grad_norm": 25.347917556762695,
+ "learning_rate": 1.0222222222222223e-05,
+ "loss": 0.1191,
+ "step": 7140
+ },
+ {
+ "epoch": 40.80571428571429,
+ "grad_norm": 29.95460319519043,
+ "learning_rate": 1.0215873015873016e-05,
+ "loss": 0.1561,
+ "step": 7141
+ },
+ {
+ "epoch": 40.81142857142857,
+ "grad_norm": 62.166805267333984,
+ "learning_rate": 1.020952380952381e-05,
+ "loss": 0.1845,
+ "step": 7142
+ },
+ {
+ "epoch": 40.817142857142855,
+ "grad_norm": 29.537752151489258,
+ "learning_rate": 1.0203174603174603e-05,
+ "loss": 0.1622,
+ "step": 7143
+ },
+ {
+ "epoch": 40.822857142857146,
+ "grad_norm": 45.15113067626953,
+ "learning_rate": 1.0196825396825397e-05,
+ "loss": 0.1796,
+ "step": 7144
+ },
+ {
+ "epoch": 40.82857142857143,
+ "grad_norm": 58.65167999267578,
+ "learning_rate": 1.0190476190476191e-05,
+ "loss": 0.2089,
+ "step": 7145
+ },
+ {
+ "epoch": 40.83428571428571,
+ "grad_norm": 17.80611228942871,
+ "learning_rate": 1.0184126984126986e-05,
+ "loss": 0.1626,
+ "step": 7146
+ },
+ {
+ "epoch": 40.84,
+ "grad_norm": 37.232444763183594,
+ "learning_rate": 1.0177777777777778e-05,
+ "loss": 0.1372,
+ "step": 7147
+ },
+ {
+ "epoch": 40.84571428571429,
+ "grad_norm": 30.31142234802246,
+ "learning_rate": 1.0171428571428573e-05,
+ "loss": 0.1886,
+ "step": 7148
+ },
+ {
+ "epoch": 40.85142857142857,
+ "grad_norm": 47.255409240722656,
+ "learning_rate": 1.0165079365079365e-05,
+ "loss": 0.1589,
+ "step": 7149
+ },
+ {
+ "epoch": 40.857142857142854,
+ "grad_norm": 63.91340255737305,
+ "learning_rate": 1.015873015873016e-05,
+ "loss": 0.2037,
+ "step": 7150
+ },
+ {
+ "epoch": 40.862857142857145,
+ "grad_norm": 17.828096389770508,
+ "learning_rate": 1.0152380952380954e-05,
+ "loss": 0.1569,
+ "step": 7151
+ },
+ {
+ "epoch": 40.86857142857143,
+ "grad_norm": 27.19562530517578,
+ "learning_rate": 1.0146031746031746e-05,
+ "loss": 0.2451,
+ "step": 7152
+ },
+ {
+ "epoch": 40.87428571428571,
+ "grad_norm": 43.03912353515625,
+ "learning_rate": 1.013968253968254e-05,
+ "loss": 0.1407,
+ "step": 7153
+ },
+ {
+ "epoch": 40.88,
+ "grad_norm": 33.057586669921875,
+ "learning_rate": 1.0133333333333333e-05,
+ "loss": 0.1286,
+ "step": 7154
+ },
+ {
+ "epoch": 40.885714285714286,
+ "grad_norm": 183.0701904296875,
+ "learning_rate": 1.0126984126984128e-05,
+ "loss": 0.1672,
+ "step": 7155
+ },
+ {
+ "epoch": 40.89142857142857,
+ "grad_norm": 42.93004608154297,
+ "learning_rate": 1.012063492063492e-05,
+ "loss": 0.1786,
+ "step": 7156
+ },
+ {
+ "epoch": 40.89714285714286,
+ "grad_norm": 991.3026123046875,
+ "learning_rate": 1.0114285714285715e-05,
+ "loss": 0.1998,
+ "step": 7157
+ },
+ {
+ "epoch": 40.902857142857144,
+ "grad_norm": 42.53242492675781,
+ "learning_rate": 1.0107936507936507e-05,
+ "loss": 0.1536,
+ "step": 7158
+ },
+ {
+ "epoch": 40.90857142857143,
+ "grad_norm": 32.176368713378906,
+ "learning_rate": 1.0101587301587302e-05,
+ "loss": 0.1624,
+ "step": 7159
+ },
+ {
+ "epoch": 40.91428571428571,
+ "grad_norm": 99.18122100830078,
+ "learning_rate": 1.0095238095238094e-05,
+ "loss": 0.1218,
+ "step": 7160
+ },
+ {
+ "epoch": 40.92,
+ "grad_norm": 44.048099517822266,
+ "learning_rate": 1.0088888888888889e-05,
+ "loss": 0.1807,
+ "step": 7161
+ },
+ {
+ "epoch": 40.925714285714285,
+ "grad_norm": 31.111604690551758,
+ "learning_rate": 1.0082539682539683e-05,
+ "loss": 0.1413,
+ "step": 7162
+ },
+ {
+ "epoch": 40.93142857142857,
+ "grad_norm": 63.4465217590332,
+ "learning_rate": 1.0076190476190477e-05,
+ "loss": 0.1993,
+ "step": 7163
+ },
+ {
+ "epoch": 40.93714285714286,
+ "grad_norm": 42.592750549316406,
+ "learning_rate": 1.0069841269841272e-05,
+ "loss": 0.2017,
+ "step": 7164
+ },
+ {
+ "epoch": 40.94285714285714,
+ "grad_norm": 24.540151596069336,
+ "learning_rate": 1.0063492063492064e-05,
+ "loss": 0.1304,
+ "step": 7165
+ },
+ {
+ "epoch": 40.94857142857143,
+ "grad_norm": 72.01651000976562,
+ "learning_rate": 1.0057142857142859e-05,
+ "loss": 0.3004,
+ "step": 7166
+ },
+ {
+ "epoch": 40.95428571428572,
+ "grad_norm": 35.35070037841797,
+ "learning_rate": 1.0050793650793651e-05,
+ "loss": 0.1546,
+ "step": 7167
+ },
+ {
+ "epoch": 40.96,
+ "grad_norm": 44.21571350097656,
+ "learning_rate": 1.0044444444444446e-05,
+ "loss": 0.3235,
+ "step": 7168
+ },
+ {
+ "epoch": 40.965714285714284,
+ "grad_norm": 32.9054069519043,
+ "learning_rate": 1.0038095238095238e-05,
+ "loss": 0.153,
+ "step": 7169
+ },
+ {
+ "epoch": 40.97142857142857,
+ "grad_norm": 26.192230224609375,
+ "learning_rate": 1.0031746031746033e-05,
+ "loss": 0.1832,
+ "step": 7170
+ },
+ {
+ "epoch": 40.97714285714286,
+ "grad_norm": 46.06916427612305,
+ "learning_rate": 1.0025396825396825e-05,
+ "loss": 0.1509,
+ "step": 7171
+ },
+ {
+ "epoch": 40.98285714285714,
+ "grad_norm": 40.123329162597656,
+ "learning_rate": 1.001904761904762e-05,
+ "loss": 0.1606,
+ "step": 7172
+ },
+ {
+ "epoch": 40.988571428571426,
+ "grad_norm": 37.24240493774414,
+ "learning_rate": 1.0012698412698414e-05,
+ "loss": 0.1792,
+ "step": 7173
+ },
+ {
+ "epoch": 40.994285714285716,
+ "grad_norm": 50.49718475341797,
+ "learning_rate": 1.0006349206349206e-05,
+ "loss": 0.15,
+ "step": 7174
+ },
+ {
+ "epoch": 41.0,
+ "grad_norm": 25.841310501098633,
+ "learning_rate": 1e-05,
+ "loss": 0.1727,
+ "step": 7175
+ },
+ {
+ "epoch": 41.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5730687379837036,
+ "eval_map": 0.9366,
+ "eval_map_50": 0.9716,
+ "eval_map_75": 0.9644,
+ "eval_map_large": 0.9366,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9366,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7889,
+ "eval_mar_10": 0.9762,
+ "eval_mar_100": 0.9775,
+ "eval_mar_100_per_class": 0.9775,
+ "eval_mar_large": 0.9775,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.554,
+ "eval_samples_per_second": 21.691,
+ "eval_steps_per_second": 2.73,
+ "step": 7175
+ },
+ {
+ "epoch": 41.005714285714284,
+ "grad_norm": 38.025718688964844,
+ "learning_rate": 9.993650793650793e-06,
+ "loss": 0.1689,
+ "step": 7176
+ },
+ {
+ "epoch": 41.011428571428574,
+ "grad_norm": 110.06847381591797,
+ "learning_rate": 9.987301587301588e-06,
+ "loss": 0.2411,
+ "step": 7177
+ },
+ {
+ "epoch": 41.01714285714286,
+ "grad_norm": 68.49540710449219,
+ "learning_rate": 9.98095238095238e-06,
+ "loss": 0.1803,
+ "step": 7178
+ },
+ {
+ "epoch": 41.02285714285714,
+ "grad_norm": 22.572601318359375,
+ "learning_rate": 9.974603174603175e-06,
+ "loss": 0.151,
+ "step": 7179
+ },
+ {
+ "epoch": 41.02857142857143,
+ "grad_norm": 35.90503692626953,
+ "learning_rate": 9.968253968253969e-06,
+ "loss": 0.1583,
+ "step": 7180
+ },
+ {
+ "epoch": 41.034285714285716,
+ "grad_norm": 54.24342346191406,
+ "learning_rate": 9.961904761904763e-06,
+ "loss": 0.1196,
+ "step": 7181
+ },
+ {
+ "epoch": 41.04,
+ "grad_norm": 26.808170318603516,
+ "learning_rate": 9.955555555555556e-06,
+ "loss": 0.1653,
+ "step": 7182
+ },
+ {
+ "epoch": 41.04571428571428,
+ "grad_norm": 26.27927017211914,
+ "learning_rate": 9.94920634920635e-06,
+ "loss": 0.205,
+ "step": 7183
+ },
+ {
+ "epoch": 41.05142857142857,
+ "grad_norm": 31.863327026367188,
+ "learning_rate": 9.942857142857143e-06,
+ "loss": 0.215,
+ "step": 7184
+ },
+ {
+ "epoch": 41.05714285714286,
+ "grad_norm": 33.81623840332031,
+ "learning_rate": 9.936507936507937e-06,
+ "loss": 0.1212,
+ "step": 7185
+ },
+ {
+ "epoch": 41.06285714285714,
+ "grad_norm": 22.21336555480957,
+ "learning_rate": 9.930158730158732e-06,
+ "loss": 0.2077,
+ "step": 7186
+ },
+ {
+ "epoch": 41.06857142857143,
+ "grad_norm": 39.59670639038086,
+ "learning_rate": 9.923809523809524e-06,
+ "loss": 0.1624,
+ "step": 7187
+ },
+ {
+ "epoch": 41.074285714285715,
+ "grad_norm": 28.05008888244629,
+ "learning_rate": 9.917460317460319e-06,
+ "loss": 0.1848,
+ "step": 7188
+ },
+ {
+ "epoch": 41.08,
+ "grad_norm": 22.544891357421875,
+ "learning_rate": 9.911111111111111e-06,
+ "loss": 0.2286,
+ "step": 7189
+ },
+ {
+ "epoch": 41.08571428571429,
+ "grad_norm": 33.423702239990234,
+ "learning_rate": 9.904761904761906e-06,
+ "loss": 0.1854,
+ "step": 7190
+ },
+ {
+ "epoch": 41.09142857142857,
+ "grad_norm": 45.96302795410156,
+ "learning_rate": 9.898412698412698e-06,
+ "loss": 0.1611,
+ "step": 7191
+ },
+ {
+ "epoch": 41.097142857142856,
+ "grad_norm": 21.249528884887695,
+ "learning_rate": 9.892063492063493e-06,
+ "loss": 0.1757,
+ "step": 7192
+ },
+ {
+ "epoch": 41.10285714285714,
+ "grad_norm": 55.92581558227539,
+ "learning_rate": 9.885714285714285e-06,
+ "loss": 0.4331,
+ "step": 7193
+ },
+ {
+ "epoch": 41.10857142857143,
+ "grad_norm": 158.06480407714844,
+ "learning_rate": 9.87936507936508e-06,
+ "loss": 0.1864,
+ "step": 7194
+ },
+ {
+ "epoch": 41.114285714285714,
+ "grad_norm": 73.18998718261719,
+ "learning_rate": 9.873015873015872e-06,
+ "loss": 0.1886,
+ "step": 7195
+ },
+ {
+ "epoch": 41.12,
+ "grad_norm": 39.14434051513672,
+ "learning_rate": 9.866666666666667e-06,
+ "loss": 0.1682,
+ "step": 7196
+ },
+ {
+ "epoch": 41.12571428571429,
+ "grad_norm": 169.47976684570312,
+ "learning_rate": 9.86031746031746e-06,
+ "loss": 0.1514,
+ "step": 7197
+ },
+ {
+ "epoch": 41.13142857142857,
+ "grad_norm": 55.46110153198242,
+ "learning_rate": 9.853968253968253e-06,
+ "loss": 0.1567,
+ "step": 7198
+ },
+ {
+ "epoch": 41.137142857142855,
+ "grad_norm": 23.1102294921875,
+ "learning_rate": 9.847619047619048e-06,
+ "loss": 0.1469,
+ "step": 7199
+ },
+ {
+ "epoch": 41.142857142857146,
+ "grad_norm": 65.3264389038086,
+ "learning_rate": 9.841269841269842e-06,
+ "loss": 0.2098,
+ "step": 7200
+ },
+ {
+ "epoch": 41.14857142857143,
+ "grad_norm": 18.760353088378906,
+ "learning_rate": 9.834920634920636e-06,
+ "loss": 0.1653,
+ "step": 7201
+ },
+ {
+ "epoch": 41.15428571428571,
+ "grad_norm": 55.80414581298828,
+ "learning_rate": 9.828571428571429e-06,
+ "loss": 0.1389,
+ "step": 7202
+ },
+ {
+ "epoch": 41.16,
+ "grad_norm": 39.084163665771484,
+ "learning_rate": 9.822222222222223e-06,
+ "loss": 0.1083,
+ "step": 7203
+ },
+ {
+ "epoch": 41.16571428571429,
+ "grad_norm": 29.897830963134766,
+ "learning_rate": 9.815873015873016e-06,
+ "loss": 0.183,
+ "step": 7204
+ },
+ {
+ "epoch": 41.17142857142857,
+ "grad_norm": 44.33466339111328,
+ "learning_rate": 9.80952380952381e-06,
+ "loss": 0.0939,
+ "step": 7205
+ },
+ {
+ "epoch": 41.177142857142854,
+ "grad_norm": 93.39254760742188,
+ "learning_rate": 9.803174603174603e-06,
+ "loss": 0.13,
+ "step": 7206
+ },
+ {
+ "epoch": 41.182857142857145,
+ "grad_norm": 133.07708740234375,
+ "learning_rate": 9.796825396825397e-06,
+ "loss": 0.2016,
+ "step": 7207
+ },
+ {
+ "epoch": 41.18857142857143,
+ "grad_norm": 25.82640838623047,
+ "learning_rate": 9.790476190476192e-06,
+ "loss": 0.1519,
+ "step": 7208
+ },
+ {
+ "epoch": 41.19428571428571,
+ "grad_norm": 32.13935089111328,
+ "learning_rate": 9.784126984126984e-06,
+ "loss": 0.1569,
+ "step": 7209
+ },
+ {
+ "epoch": 41.2,
+ "grad_norm": 28.959239959716797,
+ "learning_rate": 9.777777777777779e-06,
+ "loss": 0.151,
+ "step": 7210
+ },
+ {
+ "epoch": 41.205714285714286,
+ "grad_norm": 45.810340881347656,
+ "learning_rate": 9.771428571428571e-06,
+ "loss": 0.1436,
+ "step": 7211
+ },
+ {
+ "epoch": 41.21142857142857,
+ "grad_norm": 50.805850982666016,
+ "learning_rate": 9.765079365079366e-06,
+ "loss": 0.1672,
+ "step": 7212
+ },
+ {
+ "epoch": 41.21714285714286,
+ "grad_norm": 47.62202835083008,
+ "learning_rate": 9.758730158730158e-06,
+ "loss": 0.1782,
+ "step": 7213
+ },
+ {
+ "epoch": 41.222857142857144,
+ "grad_norm": 506.37432861328125,
+ "learning_rate": 9.752380952380953e-06,
+ "loss": 0.2537,
+ "step": 7214
+ },
+ {
+ "epoch": 41.22857142857143,
+ "grad_norm": 13.947271347045898,
+ "learning_rate": 9.746031746031745e-06,
+ "loss": 0.1388,
+ "step": 7215
+ },
+ {
+ "epoch": 41.23428571428571,
+ "grad_norm": 60.3114013671875,
+ "learning_rate": 9.73968253968254e-06,
+ "loss": 0.1152,
+ "step": 7216
+ },
+ {
+ "epoch": 41.24,
+ "grad_norm": 24.50984001159668,
+ "learning_rate": 9.733333333333334e-06,
+ "loss": 0.1979,
+ "step": 7217
+ },
+ {
+ "epoch": 41.245714285714286,
+ "grad_norm": 27.732975006103516,
+ "learning_rate": 9.726984126984128e-06,
+ "loss": 0.1992,
+ "step": 7218
+ },
+ {
+ "epoch": 41.25142857142857,
+ "grad_norm": 43.578125,
+ "learning_rate": 9.720634920634923e-06,
+ "loss": 0.2019,
+ "step": 7219
+ },
+ {
+ "epoch": 41.25714285714286,
+ "grad_norm": 23.549551010131836,
+ "learning_rate": 9.714285714285715e-06,
+ "loss": 0.1809,
+ "step": 7220
+ },
+ {
+ "epoch": 41.26285714285714,
+ "grad_norm": 29.41553497314453,
+ "learning_rate": 9.70793650793651e-06,
+ "loss": 0.1406,
+ "step": 7221
+ },
+ {
+ "epoch": 41.26857142857143,
+ "grad_norm": 33.58580017089844,
+ "learning_rate": 9.701587301587302e-06,
+ "loss": 0.1632,
+ "step": 7222
+ },
+ {
+ "epoch": 41.27428571428572,
+ "grad_norm": 35.227149963378906,
+ "learning_rate": 9.695238095238096e-06,
+ "loss": 0.1733,
+ "step": 7223
+ },
+ {
+ "epoch": 41.28,
+ "grad_norm": 53.736473083496094,
+ "learning_rate": 9.688888888888889e-06,
+ "loss": 0.22,
+ "step": 7224
+ },
+ {
+ "epoch": 41.285714285714285,
+ "grad_norm": 110.43754577636719,
+ "learning_rate": 9.682539682539683e-06,
+ "loss": 0.1869,
+ "step": 7225
+ },
+ {
+ "epoch": 41.29142857142857,
+ "grad_norm": 33.39349365234375,
+ "learning_rate": 9.676190476190476e-06,
+ "loss": 0.2195,
+ "step": 7226
+ },
+ {
+ "epoch": 41.29714285714286,
+ "grad_norm": 50.573699951171875,
+ "learning_rate": 9.66984126984127e-06,
+ "loss": 0.1543,
+ "step": 7227
+ },
+ {
+ "epoch": 41.30285714285714,
+ "grad_norm": 87.84054565429688,
+ "learning_rate": 9.663492063492063e-06,
+ "loss": 0.2164,
+ "step": 7228
+ },
+ {
+ "epoch": 41.308571428571426,
+ "grad_norm": 2033.5458984375,
+ "learning_rate": 9.657142857142857e-06,
+ "loss": 0.1763,
+ "step": 7229
+ },
+ {
+ "epoch": 41.31428571428572,
+ "grad_norm": 49.382286071777344,
+ "learning_rate": 9.650793650793652e-06,
+ "loss": 0.1636,
+ "step": 7230
+ },
+ {
+ "epoch": 41.32,
+ "grad_norm": 32.18121337890625,
+ "learning_rate": 9.644444444444444e-06,
+ "loss": 0.168,
+ "step": 7231
+ },
+ {
+ "epoch": 41.325714285714284,
+ "grad_norm": 63.839229583740234,
+ "learning_rate": 9.638095238095239e-06,
+ "loss": 0.1657,
+ "step": 7232
+ },
+ {
+ "epoch": 41.331428571428575,
+ "grad_norm": 36.92330551147461,
+ "learning_rate": 9.631746031746031e-06,
+ "loss": 0.1305,
+ "step": 7233
+ },
+ {
+ "epoch": 41.33714285714286,
+ "grad_norm": 57.05158996582031,
+ "learning_rate": 9.625396825396826e-06,
+ "loss": 0.1297,
+ "step": 7234
+ },
+ {
+ "epoch": 41.34285714285714,
+ "grad_norm": 39.037010192871094,
+ "learning_rate": 9.61904761904762e-06,
+ "loss": 0.1542,
+ "step": 7235
+ },
+ {
+ "epoch": 41.348571428571425,
+ "grad_norm": 33.03523635864258,
+ "learning_rate": 9.612698412698414e-06,
+ "loss": 0.1577,
+ "step": 7236
+ },
+ {
+ "epoch": 41.354285714285716,
+ "grad_norm": 21.612871170043945,
+ "learning_rate": 9.606349206349207e-06,
+ "loss": 0.156,
+ "step": 7237
+ },
+ {
+ "epoch": 41.36,
+ "grad_norm": 37.1363639831543,
+ "learning_rate": 9.600000000000001e-06,
+ "loss": 0.1448,
+ "step": 7238
+ },
+ {
+ "epoch": 41.36571428571428,
+ "grad_norm": 72.37059783935547,
+ "learning_rate": 9.593650793650794e-06,
+ "loss": 0.1823,
+ "step": 7239
+ },
+ {
+ "epoch": 41.371428571428574,
+ "grad_norm": 33.95829391479492,
+ "learning_rate": 9.587301587301588e-06,
+ "loss": 0.1539,
+ "step": 7240
+ },
+ {
+ "epoch": 41.37714285714286,
+ "grad_norm": 32.29264450073242,
+ "learning_rate": 9.580952380952381e-06,
+ "loss": 0.223,
+ "step": 7241
+ },
+ {
+ "epoch": 41.38285714285714,
+ "grad_norm": 36.21721267700195,
+ "learning_rate": 9.574603174603175e-06,
+ "loss": 0.2386,
+ "step": 7242
+ },
+ {
+ "epoch": 41.38857142857143,
+ "grad_norm": 46.11216354370117,
+ "learning_rate": 9.56825396825397e-06,
+ "loss": 0.1555,
+ "step": 7243
+ },
+ {
+ "epoch": 41.394285714285715,
+ "grad_norm": 19.189931869506836,
+ "learning_rate": 9.561904761904762e-06,
+ "loss": 0.1856,
+ "step": 7244
+ },
+ {
+ "epoch": 41.4,
+ "grad_norm": 66.90583038330078,
+ "learning_rate": 9.555555555555556e-06,
+ "loss": 0.122,
+ "step": 7245
+ },
+ {
+ "epoch": 41.40571428571428,
+ "grad_norm": 32.81113815307617,
+ "learning_rate": 9.549206349206349e-06,
+ "loss": 0.3572,
+ "step": 7246
+ },
+ {
+ "epoch": 41.41142857142857,
+ "grad_norm": 35.498680114746094,
+ "learning_rate": 9.542857142857143e-06,
+ "loss": 0.2171,
+ "step": 7247
+ },
+ {
+ "epoch": 41.417142857142856,
+ "grad_norm": 76.73021697998047,
+ "learning_rate": 9.536507936507936e-06,
+ "loss": 0.1933,
+ "step": 7248
+ },
+ {
+ "epoch": 41.42285714285714,
+ "grad_norm": 47.33732223510742,
+ "learning_rate": 9.53015873015873e-06,
+ "loss": 0.1082,
+ "step": 7249
+ },
+ {
+ "epoch": 41.42857142857143,
+ "grad_norm": 61.91738510131836,
+ "learning_rate": 9.523809523809523e-06,
+ "loss": 0.1747,
+ "step": 7250
+ },
+ {
+ "epoch": 41.434285714285714,
+ "grad_norm": 35.25819778442383,
+ "learning_rate": 9.517460317460317e-06,
+ "loss": 0.1635,
+ "step": 7251
+ },
+ {
+ "epoch": 41.44,
+ "grad_norm": 26.504154205322266,
+ "learning_rate": 9.511111111111112e-06,
+ "loss": 0.1682,
+ "step": 7252
+ },
+ {
+ "epoch": 41.44571428571429,
+ "grad_norm": 64.45111846923828,
+ "learning_rate": 9.504761904761906e-06,
+ "loss": 0.2093,
+ "step": 7253
+ },
+ {
+ "epoch": 41.45142857142857,
+ "grad_norm": 35.32815933227539,
+ "learning_rate": 9.498412698412699e-06,
+ "loss": 0.1466,
+ "step": 7254
+ },
+ {
+ "epoch": 41.457142857142856,
+ "grad_norm": 29.58865737915039,
+ "learning_rate": 9.492063492063493e-06,
+ "loss": 0.163,
+ "step": 7255
+ },
+ {
+ "epoch": 41.462857142857146,
+ "grad_norm": 23.193740844726562,
+ "learning_rate": 9.485714285714287e-06,
+ "loss": 0.2038,
+ "step": 7256
+ },
+ {
+ "epoch": 41.46857142857143,
+ "grad_norm": 37.13410949707031,
+ "learning_rate": 9.47936507936508e-06,
+ "loss": 0.1304,
+ "step": 7257
+ },
+ {
+ "epoch": 41.47428571428571,
+ "grad_norm": 44.46181106567383,
+ "learning_rate": 9.473015873015874e-06,
+ "loss": 0.172,
+ "step": 7258
+ },
+ {
+ "epoch": 41.48,
+ "grad_norm": 29.92389678955078,
+ "learning_rate": 9.466666666666667e-06,
+ "loss": 0.1343,
+ "step": 7259
+ },
+ {
+ "epoch": 41.48571428571429,
+ "grad_norm": 64.4034194946289,
+ "learning_rate": 9.460317460317461e-06,
+ "loss": 0.2648,
+ "step": 7260
+ },
+ {
+ "epoch": 41.49142857142857,
+ "grad_norm": 26.941253662109375,
+ "learning_rate": 9.453968253968254e-06,
+ "loss": 0.1634,
+ "step": 7261
+ },
+ {
+ "epoch": 41.497142857142855,
+ "grad_norm": 49.134525299072266,
+ "learning_rate": 9.447619047619048e-06,
+ "loss": 0.1707,
+ "step": 7262
+ },
+ {
+ "epoch": 41.502857142857145,
+ "grad_norm": 341.13677978515625,
+ "learning_rate": 9.441269841269841e-06,
+ "loss": 0.1977,
+ "step": 7263
+ },
+ {
+ "epoch": 41.50857142857143,
+ "grad_norm": 53.64311981201172,
+ "learning_rate": 9.434920634920635e-06,
+ "loss": 0.1562,
+ "step": 7264
+ },
+ {
+ "epoch": 41.51428571428571,
+ "grad_norm": 18.503982543945312,
+ "learning_rate": 9.42857142857143e-06,
+ "loss": 0.1383,
+ "step": 7265
+ },
+ {
+ "epoch": 41.52,
+ "grad_norm": 39.2716178894043,
+ "learning_rate": 9.422222222222222e-06,
+ "loss": 0.1532,
+ "step": 7266
+ },
+ {
+ "epoch": 41.52571428571429,
+ "grad_norm": 26.437915802001953,
+ "learning_rate": 9.415873015873017e-06,
+ "loss": 0.2173,
+ "step": 7267
+ },
+ {
+ "epoch": 41.53142857142857,
+ "grad_norm": 22.738313674926758,
+ "learning_rate": 9.409523809523809e-06,
+ "loss": 0.1626,
+ "step": 7268
+ },
+ {
+ "epoch": 41.537142857142854,
+ "grad_norm": 81.11518859863281,
+ "learning_rate": 9.403174603174603e-06,
+ "loss": 0.1764,
+ "step": 7269
+ },
+ {
+ "epoch": 41.542857142857144,
+ "grad_norm": 41.88934326171875,
+ "learning_rate": 9.396825396825396e-06,
+ "loss": 0.1465,
+ "step": 7270
+ },
+ {
+ "epoch": 41.54857142857143,
+ "grad_norm": 53.15003204345703,
+ "learning_rate": 9.39047619047619e-06,
+ "loss": 0.1899,
+ "step": 7271
+ },
+ {
+ "epoch": 41.55428571428571,
+ "grad_norm": 253.06153869628906,
+ "learning_rate": 9.384126984126985e-06,
+ "loss": 0.208,
+ "step": 7272
+ },
+ {
+ "epoch": 41.56,
+ "grad_norm": 23.035747528076172,
+ "learning_rate": 9.377777777777779e-06,
+ "loss": 0.1603,
+ "step": 7273
+ },
+ {
+ "epoch": 41.565714285714286,
+ "grad_norm": 63.308746337890625,
+ "learning_rate": 9.371428571428572e-06,
+ "loss": 0.1591,
+ "step": 7274
+ },
+ {
+ "epoch": 41.57142857142857,
+ "grad_norm": 91.1810302734375,
+ "learning_rate": 9.365079365079366e-06,
+ "loss": 0.144,
+ "step": 7275
+ },
+ {
+ "epoch": 41.57714285714286,
+ "grad_norm": 40.969486236572266,
+ "learning_rate": 9.35873015873016e-06,
+ "loss": 0.1754,
+ "step": 7276
+ },
+ {
+ "epoch": 41.582857142857144,
+ "grad_norm": 19.191484451293945,
+ "learning_rate": 9.352380952380953e-06,
+ "loss": 0.1415,
+ "step": 7277
+ },
+ {
+ "epoch": 41.58857142857143,
+ "grad_norm": 53.4935188293457,
+ "learning_rate": 9.346031746031747e-06,
+ "loss": 0.2566,
+ "step": 7278
+ },
+ {
+ "epoch": 41.59428571428572,
+ "grad_norm": 59.00881576538086,
+ "learning_rate": 9.33968253968254e-06,
+ "loss": 0.2594,
+ "step": 7279
+ },
+ {
+ "epoch": 41.6,
+ "grad_norm": 25.847272872924805,
+ "learning_rate": 9.333333333333334e-06,
+ "loss": 0.1878,
+ "step": 7280
+ },
+ {
+ "epoch": 41.605714285714285,
+ "grad_norm": 21.484376907348633,
+ "learning_rate": 9.326984126984127e-06,
+ "loss": 0.1395,
+ "step": 7281
+ },
+ {
+ "epoch": 41.61142857142857,
+ "grad_norm": 50.11928176879883,
+ "learning_rate": 9.320634920634921e-06,
+ "loss": 0.2086,
+ "step": 7282
+ },
+ {
+ "epoch": 41.61714285714286,
+ "grad_norm": 69.43877410888672,
+ "learning_rate": 9.314285714285714e-06,
+ "loss": 0.1617,
+ "step": 7283
+ },
+ {
+ "epoch": 41.62285714285714,
+ "grad_norm": 61.6375732421875,
+ "learning_rate": 9.307936507936508e-06,
+ "loss": 0.1607,
+ "step": 7284
+ },
+ {
+ "epoch": 41.628571428571426,
+ "grad_norm": 43.103084564208984,
+ "learning_rate": 9.301587301587301e-06,
+ "loss": 0.134,
+ "step": 7285
+ },
+ {
+ "epoch": 41.63428571428572,
+ "grad_norm": 46.29750061035156,
+ "learning_rate": 9.295238095238095e-06,
+ "loss": 0.2005,
+ "step": 7286
+ },
+ {
+ "epoch": 41.64,
+ "grad_norm": 33.02797317504883,
+ "learning_rate": 9.288888888888888e-06,
+ "loss": 0.1527,
+ "step": 7287
+ },
+ {
+ "epoch": 41.645714285714284,
+ "grad_norm": 73.48651123046875,
+ "learning_rate": 9.282539682539682e-06,
+ "loss": 0.1214,
+ "step": 7288
+ },
+ {
+ "epoch": 41.651428571428575,
+ "grad_norm": 46.53531265258789,
+ "learning_rate": 9.276190476190477e-06,
+ "loss": 0.1913,
+ "step": 7289
+ },
+ {
+ "epoch": 41.65714285714286,
+ "grad_norm": 55.551116943359375,
+ "learning_rate": 9.26984126984127e-06,
+ "loss": 0.1345,
+ "step": 7290
+ },
+ {
+ "epoch": 41.66285714285714,
+ "grad_norm": 72.10269927978516,
+ "learning_rate": 9.263492063492065e-06,
+ "loss": 0.1673,
+ "step": 7291
+ },
+ {
+ "epoch": 41.668571428571425,
+ "grad_norm": 47.2354850769043,
+ "learning_rate": 9.257142857142858e-06,
+ "loss": 0.2478,
+ "step": 7292
+ },
+ {
+ "epoch": 41.674285714285716,
+ "grad_norm": 27.38324546813965,
+ "learning_rate": 9.250793650793652e-06,
+ "loss": 0.1887,
+ "step": 7293
+ },
+ {
+ "epoch": 41.68,
+ "grad_norm": 70.26866912841797,
+ "learning_rate": 9.244444444444445e-06,
+ "loss": 0.2682,
+ "step": 7294
+ },
+ {
+ "epoch": 41.68571428571428,
+ "grad_norm": 54.18074417114258,
+ "learning_rate": 9.238095238095239e-06,
+ "loss": 0.3105,
+ "step": 7295
+ },
+ {
+ "epoch": 41.691428571428574,
+ "grad_norm": 162.82015991210938,
+ "learning_rate": 9.231746031746032e-06,
+ "loss": 0.1445,
+ "step": 7296
+ },
+ {
+ "epoch": 41.69714285714286,
+ "grad_norm": 63.81920623779297,
+ "learning_rate": 9.225396825396826e-06,
+ "loss": 0.189,
+ "step": 7297
+ },
+ {
+ "epoch": 41.70285714285714,
+ "grad_norm": 32.952274322509766,
+ "learning_rate": 9.219047619047619e-06,
+ "loss": 0.172,
+ "step": 7298
+ },
+ {
+ "epoch": 41.70857142857143,
+ "grad_norm": 37.01651382446289,
+ "learning_rate": 9.212698412698413e-06,
+ "loss": 0.1575,
+ "step": 7299
+ },
+ {
+ "epoch": 41.714285714285715,
+ "grad_norm": 66.97454833984375,
+ "learning_rate": 9.206349206349207e-06,
+ "loss": 0.1425,
+ "step": 7300
+ },
+ {
+ "epoch": 41.72,
+ "grad_norm": 47.040809631347656,
+ "learning_rate": 9.2e-06,
+ "loss": 0.1384,
+ "step": 7301
+ },
+ {
+ "epoch": 41.72571428571428,
+ "grad_norm": 82.4026870727539,
+ "learning_rate": 9.193650793650794e-06,
+ "loss": 0.301,
+ "step": 7302
+ },
+ {
+ "epoch": 41.73142857142857,
+ "grad_norm": 39.067626953125,
+ "learning_rate": 9.187301587301587e-06,
+ "loss": 0.2164,
+ "step": 7303
+ },
+ {
+ "epoch": 41.73714285714286,
+ "grad_norm": 306.2405090332031,
+ "learning_rate": 9.180952380952381e-06,
+ "loss": 0.1345,
+ "step": 7304
+ },
+ {
+ "epoch": 41.74285714285714,
+ "grad_norm": 29.726070404052734,
+ "learning_rate": 9.174603174603174e-06,
+ "loss": 0.1596,
+ "step": 7305
+ },
+ {
+ "epoch": 41.74857142857143,
+ "grad_norm": 62.11240768432617,
+ "learning_rate": 9.168253968253968e-06,
+ "loss": 0.1669,
+ "step": 7306
+ },
+ {
+ "epoch": 41.754285714285714,
+ "grad_norm": 372.9456787109375,
+ "learning_rate": 9.161904761904763e-06,
+ "loss": 0.1711,
+ "step": 7307
+ },
+ {
+ "epoch": 41.76,
+ "grad_norm": 378.95269775390625,
+ "learning_rate": 9.155555555555557e-06,
+ "loss": 0.1879,
+ "step": 7308
+ },
+ {
+ "epoch": 41.76571428571429,
+ "grad_norm": 33.18974685668945,
+ "learning_rate": 9.14920634920635e-06,
+ "loss": 0.1627,
+ "step": 7309
+ },
+ {
+ "epoch": 41.77142857142857,
+ "grad_norm": 72.96440124511719,
+ "learning_rate": 9.142857142857144e-06,
+ "loss": 0.128,
+ "step": 7310
+ },
+ {
+ "epoch": 41.777142857142856,
+ "grad_norm": 56.02112579345703,
+ "learning_rate": 9.136507936507938e-06,
+ "loss": 0.1993,
+ "step": 7311
+ },
+ {
+ "epoch": 41.78285714285714,
+ "grad_norm": 47.467979431152344,
+ "learning_rate": 9.130158730158731e-06,
+ "loss": 0.1318,
+ "step": 7312
+ },
+ {
+ "epoch": 41.78857142857143,
+ "grad_norm": 83.64862823486328,
+ "learning_rate": 9.123809523809525e-06,
+ "loss": 0.2166,
+ "step": 7313
+ },
+ {
+ "epoch": 41.794285714285714,
+ "grad_norm": 97.51394653320312,
+ "learning_rate": 9.117460317460318e-06,
+ "loss": 0.1289,
+ "step": 7314
+ },
+ {
+ "epoch": 41.8,
+ "grad_norm": 37.590824127197266,
+ "learning_rate": 9.111111111111112e-06,
+ "loss": 0.2469,
+ "step": 7315
+ },
+ {
+ "epoch": 41.80571428571429,
+ "grad_norm": 56.855812072753906,
+ "learning_rate": 9.104761904761905e-06,
+ "loss": 0.1568,
+ "step": 7316
+ },
+ {
+ "epoch": 41.81142857142857,
+ "grad_norm": 60.23370361328125,
+ "learning_rate": 9.098412698412699e-06,
+ "loss": 0.1729,
+ "step": 7317
+ },
+ {
+ "epoch": 41.817142857142855,
+ "grad_norm": 49.858306884765625,
+ "learning_rate": 9.092063492063492e-06,
+ "loss": 0.1847,
+ "step": 7318
+ },
+ {
+ "epoch": 41.822857142857146,
+ "grad_norm": 54.69880294799805,
+ "learning_rate": 9.085714285714286e-06,
+ "loss": 0.1988,
+ "step": 7319
+ },
+ {
+ "epoch": 41.82857142857143,
+ "grad_norm": 59.97076416015625,
+ "learning_rate": 9.079365079365079e-06,
+ "loss": 0.1602,
+ "step": 7320
+ },
+ {
+ "epoch": 41.83428571428571,
+ "grad_norm": 47.91924285888672,
+ "learning_rate": 9.073015873015873e-06,
+ "loss": 0.1752,
+ "step": 7321
+ },
+ {
+ "epoch": 41.84,
+ "grad_norm": 59.64024353027344,
+ "learning_rate": 9.066666666666667e-06,
+ "loss": 0.1819,
+ "step": 7322
+ },
+ {
+ "epoch": 41.84571428571429,
+ "grad_norm": 101.1203384399414,
+ "learning_rate": 9.06031746031746e-06,
+ "loss": 0.2232,
+ "step": 7323
+ },
+ {
+ "epoch": 41.85142857142857,
+ "grad_norm": 30.48805046081543,
+ "learning_rate": 9.053968253968254e-06,
+ "loss": 0.1565,
+ "step": 7324
+ },
+ {
+ "epoch": 41.857142857142854,
+ "grad_norm": 74.9332504272461,
+ "learning_rate": 9.047619047619047e-06,
+ "loss": 0.149,
+ "step": 7325
+ },
+ {
+ "epoch": 41.862857142857145,
+ "grad_norm": 82.43065643310547,
+ "learning_rate": 9.041269841269841e-06,
+ "loss": 0.1471,
+ "step": 7326
+ },
+ {
+ "epoch": 41.86857142857143,
+ "grad_norm": 80.38092041015625,
+ "learning_rate": 9.034920634920636e-06,
+ "loss": 0.1401,
+ "step": 7327
+ },
+ {
+ "epoch": 41.87428571428571,
+ "grad_norm": 16.437274932861328,
+ "learning_rate": 9.02857142857143e-06,
+ "loss": 0.1996,
+ "step": 7328
+ },
+ {
+ "epoch": 41.88,
+ "grad_norm": 36.55046081542969,
+ "learning_rate": 9.022222222222223e-06,
+ "loss": 0.1489,
+ "step": 7329
+ },
+ {
+ "epoch": 41.885714285714286,
+ "grad_norm": 52.14812088012695,
+ "learning_rate": 9.015873015873017e-06,
+ "loss": 0.227,
+ "step": 7330
+ },
+ {
+ "epoch": 41.89142857142857,
+ "grad_norm": 28.186607360839844,
+ "learning_rate": 9.00952380952381e-06,
+ "loss": 0.1211,
+ "step": 7331
+ },
+ {
+ "epoch": 41.89714285714286,
+ "grad_norm": 51.75035095214844,
+ "learning_rate": 9.003174603174604e-06,
+ "loss": 0.2142,
+ "step": 7332
+ },
+ {
+ "epoch": 41.902857142857144,
+ "grad_norm": 67.82548522949219,
+ "learning_rate": 8.996825396825398e-06,
+ "loss": 0.1853,
+ "step": 7333
+ },
+ {
+ "epoch": 41.90857142857143,
+ "grad_norm": 45.53987503051758,
+ "learning_rate": 8.990476190476191e-06,
+ "loss": 0.1488,
+ "step": 7334
+ },
+ {
+ "epoch": 41.91428571428571,
+ "grad_norm": 29.2596435546875,
+ "learning_rate": 8.984126984126985e-06,
+ "loss": 0.1507,
+ "step": 7335
+ },
+ {
+ "epoch": 41.92,
+ "grad_norm": 56.77320861816406,
+ "learning_rate": 8.977777777777778e-06,
+ "loss": 0.1727,
+ "step": 7336
+ },
+ {
+ "epoch": 41.925714285714285,
+ "grad_norm": 41.84785461425781,
+ "learning_rate": 8.971428571428572e-06,
+ "loss": 0.1439,
+ "step": 7337
+ },
+ {
+ "epoch": 41.93142857142857,
+ "grad_norm": 63.32027053833008,
+ "learning_rate": 8.965079365079365e-06,
+ "loss": 0.1856,
+ "step": 7338
+ },
+ {
+ "epoch": 41.93714285714286,
+ "grad_norm": 639.3530883789062,
+ "learning_rate": 8.958730158730159e-06,
+ "loss": 0.1719,
+ "step": 7339
+ },
+ {
+ "epoch": 41.94285714285714,
+ "grad_norm": 55.293338775634766,
+ "learning_rate": 8.952380952380952e-06,
+ "loss": 0.1421,
+ "step": 7340
+ },
+ {
+ "epoch": 41.94857142857143,
+ "grad_norm": 31.818618774414062,
+ "learning_rate": 8.946031746031746e-06,
+ "loss": 0.176,
+ "step": 7341
+ },
+ {
+ "epoch": 41.95428571428572,
+ "grad_norm": 87.29776763916016,
+ "learning_rate": 8.939682539682539e-06,
+ "loss": 0.144,
+ "step": 7342
+ },
+ {
+ "epoch": 41.96,
+ "grad_norm": 16.483863830566406,
+ "learning_rate": 8.933333333333333e-06,
+ "loss": 0.1344,
+ "step": 7343
+ },
+ {
+ "epoch": 41.965714285714284,
+ "grad_norm": 48.55203628540039,
+ "learning_rate": 8.926984126984127e-06,
+ "loss": 0.1686,
+ "step": 7344
+ },
+ {
+ "epoch": 41.97142857142857,
+ "grad_norm": 33.186134338378906,
+ "learning_rate": 8.920634920634922e-06,
+ "loss": 0.1719,
+ "step": 7345
+ },
+ {
+ "epoch": 41.97714285714286,
+ "grad_norm": 66.01484680175781,
+ "learning_rate": 8.914285714285716e-06,
+ "loss": 0.1366,
+ "step": 7346
+ },
+ {
+ "epoch": 41.98285714285714,
+ "grad_norm": 48.887664794921875,
+ "learning_rate": 8.907936507936509e-06,
+ "loss": 0.1151,
+ "step": 7347
+ },
+ {
+ "epoch": 41.988571428571426,
+ "grad_norm": 68.40586853027344,
+ "learning_rate": 8.901587301587303e-06,
+ "loss": 0.15,
+ "step": 7348
+ },
+ {
+ "epoch": 41.994285714285716,
+ "grad_norm": 23.55936622619629,
+ "learning_rate": 8.895238095238096e-06,
+ "loss": 0.172,
+ "step": 7349
+ },
+ {
+ "epoch": 42.0,
+ "grad_norm": 72.13063049316406,
+ "learning_rate": 8.88888888888889e-06,
+ "loss": 0.1595,
+ "step": 7350
+ },
+ {
+ "epoch": 42.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5726149082183838,
+ "eval_map": 0.9362,
+ "eval_map_50": 0.969,
+ "eval_map_75": 0.9629,
+ "eval_map_large": 0.9362,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9362,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.787,
+ "eval_mar_10": 0.9743,
+ "eval_mar_100": 0.9749,
+ "eval_mar_100_per_class": 0.9749,
+ "eval_mar_large": 0.9749,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.2795,
+ "eval_samples_per_second": 22.139,
+ "eval_steps_per_second": 2.786,
+ "step": 7350
+ },
+ {
+ "epoch": 42.005714285714284,
+ "grad_norm": 58.79615020751953,
+ "learning_rate": 8.882539682539683e-06,
+ "loss": 0.1559,
+ "step": 7351
+ },
+ {
+ "epoch": 42.011428571428574,
+ "grad_norm": 56.658626556396484,
+ "learning_rate": 8.876190476190477e-06,
+ "loss": 0.1876,
+ "step": 7352
+ },
+ {
+ "epoch": 42.01714285714286,
+ "grad_norm": 24.444313049316406,
+ "learning_rate": 8.86984126984127e-06,
+ "loss": 0.1505,
+ "step": 7353
+ },
+ {
+ "epoch": 42.02285714285714,
+ "grad_norm": 83.33272552490234,
+ "learning_rate": 8.863492063492064e-06,
+ "loss": 0.1383,
+ "step": 7354
+ },
+ {
+ "epoch": 42.02857142857143,
+ "grad_norm": 38.414390563964844,
+ "learning_rate": 8.857142857142857e-06,
+ "loss": 0.1874,
+ "step": 7355
+ },
+ {
+ "epoch": 42.034285714285716,
+ "grad_norm": 108.53782653808594,
+ "learning_rate": 8.850793650793651e-06,
+ "loss": 0.2398,
+ "step": 7356
+ },
+ {
+ "epoch": 42.04,
+ "grad_norm": 49.074947357177734,
+ "learning_rate": 8.844444444444445e-06,
+ "loss": 0.1487,
+ "step": 7357
+ },
+ {
+ "epoch": 42.04571428571428,
+ "grad_norm": 29.031755447387695,
+ "learning_rate": 8.838095238095238e-06,
+ "loss": 0.1396,
+ "step": 7358
+ },
+ {
+ "epoch": 42.05142857142857,
+ "grad_norm": 42.39862823486328,
+ "learning_rate": 8.831746031746032e-06,
+ "loss": 0.2134,
+ "step": 7359
+ },
+ {
+ "epoch": 42.05714285714286,
+ "grad_norm": 14.854192733764648,
+ "learning_rate": 8.825396825396825e-06,
+ "loss": 0.0998,
+ "step": 7360
+ },
+ {
+ "epoch": 42.06285714285714,
+ "grad_norm": 25.197086334228516,
+ "learning_rate": 8.819047619047619e-06,
+ "loss": 0.233,
+ "step": 7361
+ },
+ {
+ "epoch": 42.06857142857143,
+ "grad_norm": 218.95265197753906,
+ "learning_rate": 8.812698412698413e-06,
+ "loss": 0.1632,
+ "step": 7362
+ },
+ {
+ "epoch": 42.074285714285715,
+ "grad_norm": 36.32129669189453,
+ "learning_rate": 8.806349206349208e-06,
+ "loss": 0.1277,
+ "step": 7363
+ },
+ {
+ "epoch": 42.08,
+ "grad_norm": 43.652706146240234,
+ "learning_rate": 8.8e-06,
+ "loss": 0.1239,
+ "step": 7364
+ },
+ {
+ "epoch": 42.08571428571429,
+ "grad_norm": 33.010154724121094,
+ "learning_rate": 8.793650793650795e-06,
+ "loss": 0.1407,
+ "step": 7365
+ },
+ {
+ "epoch": 42.09142857142857,
+ "grad_norm": 51.78741455078125,
+ "learning_rate": 8.787301587301587e-06,
+ "loss": 0.2169,
+ "step": 7366
+ },
+ {
+ "epoch": 42.097142857142856,
+ "grad_norm": 20.24148178100586,
+ "learning_rate": 8.780952380952382e-06,
+ "loss": 0.2252,
+ "step": 7367
+ },
+ {
+ "epoch": 42.10285714285714,
+ "grad_norm": 40.71898651123047,
+ "learning_rate": 8.774603174603176e-06,
+ "loss": 0.1663,
+ "step": 7368
+ },
+ {
+ "epoch": 42.10857142857143,
+ "grad_norm": 49.57981491088867,
+ "learning_rate": 8.768253968253969e-06,
+ "loss": 0.157,
+ "step": 7369
+ },
+ {
+ "epoch": 42.114285714285714,
+ "grad_norm": 39.95606231689453,
+ "learning_rate": 8.761904761904763e-06,
+ "loss": 0.1447,
+ "step": 7370
+ },
+ {
+ "epoch": 42.12,
+ "grad_norm": 21.526668548583984,
+ "learning_rate": 8.755555555555556e-06,
+ "loss": 0.1345,
+ "step": 7371
+ },
+ {
+ "epoch": 42.12571428571429,
+ "grad_norm": 17.036447525024414,
+ "learning_rate": 8.74920634920635e-06,
+ "loss": 0.1782,
+ "step": 7372
+ },
+ {
+ "epoch": 42.13142857142857,
+ "grad_norm": 33.06338882446289,
+ "learning_rate": 8.742857142857143e-06,
+ "loss": 0.1922,
+ "step": 7373
+ },
+ {
+ "epoch": 42.137142857142855,
+ "grad_norm": 32.230064392089844,
+ "learning_rate": 8.736507936507937e-06,
+ "loss": 0.1713,
+ "step": 7374
+ },
+ {
+ "epoch": 42.142857142857146,
+ "grad_norm": 49.66074752807617,
+ "learning_rate": 8.73015873015873e-06,
+ "loss": 0.2553,
+ "step": 7375
+ },
+ {
+ "epoch": 42.14857142857143,
+ "grad_norm": 63.30400466918945,
+ "learning_rate": 8.723809523809524e-06,
+ "loss": 0.2243,
+ "step": 7376
+ },
+ {
+ "epoch": 42.15428571428571,
+ "grad_norm": 17.84560203552246,
+ "learning_rate": 8.717460317460317e-06,
+ "loss": 0.1485,
+ "step": 7377
+ },
+ {
+ "epoch": 42.16,
+ "grad_norm": 45.32656478881836,
+ "learning_rate": 8.711111111111111e-06,
+ "loss": 0.1368,
+ "step": 7378
+ },
+ {
+ "epoch": 42.16571428571429,
+ "grad_norm": 65.4549331665039,
+ "learning_rate": 8.704761904761905e-06,
+ "loss": 0.1309,
+ "step": 7379
+ },
+ {
+ "epoch": 42.17142857142857,
+ "grad_norm": 58.42035675048828,
+ "learning_rate": 8.6984126984127e-06,
+ "loss": 0.1501,
+ "step": 7380
+ },
+ {
+ "epoch": 42.177142857142854,
+ "grad_norm": 63.2991943359375,
+ "learning_rate": 8.692063492063492e-06,
+ "loss": 0.1632,
+ "step": 7381
+ },
+ {
+ "epoch": 42.182857142857145,
+ "grad_norm": 24.08550453186035,
+ "learning_rate": 8.685714285714287e-06,
+ "loss": 0.1295,
+ "step": 7382
+ },
+ {
+ "epoch": 42.18857142857143,
+ "grad_norm": 404.84649658203125,
+ "learning_rate": 8.679365079365081e-06,
+ "loss": 0.1835,
+ "step": 7383
+ },
+ {
+ "epoch": 42.19428571428571,
+ "grad_norm": 51.992088317871094,
+ "learning_rate": 8.673015873015873e-06,
+ "loss": 0.1745,
+ "step": 7384
+ },
+ {
+ "epoch": 42.2,
+ "grad_norm": 29.288646697998047,
+ "learning_rate": 8.666666666666668e-06,
+ "loss": 0.1602,
+ "step": 7385
+ },
+ {
+ "epoch": 42.205714285714286,
+ "grad_norm": 29.5358943939209,
+ "learning_rate": 8.66031746031746e-06,
+ "loss": 0.1556,
+ "step": 7386
+ },
+ {
+ "epoch": 42.21142857142857,
+ "grad_norm": 51.27584457397461,
+ "learning_rate": 8.653968253968255e-06,
+ "loss": 0.1505,
+ "step": 7387
+ },
+ {
+ "epoch": 42.21714285714286,
+ "grad_norm": 45.10905456542969,
+ "learning_rate": 8.647619047619047e-06,
+ "loss": 0.1207,
+ "step": 7388
+ },
+ {
+ "epoch": 42.222857142857144,
+ "grad_norm": 32.930137634277344,
+ "learning_rate": 8.641269841269842e-06,
+ "loss": 0.1865,
+ "step": 7389
+ },
+ {
+ "epoch": 42.22857142857143,
+ "grad_norm": 20.60719871520996,
+ "learning_rate": 8.634920634920636e-06,
+ "loss": 0.1643,
+ "step": 7390
+ },
+ {
+ "epoch": 42.23428571428571,
+ "grad_norm": 28.81972885131836,
+ "learning_rate": 8.628571428571429e-06,
+ "loss": 0.163,
+ "step": 7391
+ },
+ {
+ "epoch": 42.24,
+ "grad_norm": 65.38088989257812,
+ "learning_rate": 8.622222222222223e-06,
+ "loss": 0.1798,
+ "step": 7392
+ },
+ {
+ "epoch": 42.245714285714286,
+ "grad_norm": 90.9728775024414,
+ "learning_rate": 8.615873015873016e-06,
+ "loss": 0.1716,
+ "step": 7393
+ },
+ {
+ "epoch": 42.25142857142857,
+ "grad_norm": 1374.4593505859375,
+ "learning_rate": 8.60952380952381e-06,
+ "loss": 0.1409,
+ "step": 7394
+ },
+ {
+ "epoch": 42.25714285714286,
+ "grad_norm": 74.26192474365234,
+ "learning_rate": 8.603174603174603e-06,
+ "loss": 0.151,
+ "step": 7395
+ },
+ {
+ "epoch": 42.26285714285714,
+ "grad_norm": 34.685264587402344,
+ "learning_rate": 8.596825396825397e-06,
+ "loss": 0.1559,
+ "step": 7396
+ },
+ {
+ "epoch": 42.26857142857143,
+ "grad_norm": 54.60033416748047,
+ "learning_rate": 8.59047619047619e-06,
+ "loss": 0.1207,
+ "step": 7397
+ },
+ {
+ "epoch": 42.27428571428572,
+ "grad_norm": 33.01666259765625,
+ "learning_rate": 8.584126984126984e-06,
+ "loss": 0.2563,
+ "step": 7398
+ },
+ {
+ "epoch": 42.28,
+ "grad_norm": 78.32510375976562,
+ "learning_rate": 8.577777777777778e-06,
+ "loss": 0.1415,
+ "step": 7399
+ },
+ {
+ "epoch": 42.285714285714285,
+ "grad_norm": 29.386106491088867,
+ "learning_rate": 8.571428571428573e-06,
+ "loss": 0.1619,
+ "step": 7400
+ },
+ {
+ "epoch": 42.29142857142857,
+ "grad_norm": 19.554582595825195,
+ "learning_rate": 8.565079365079365e-06,
+ "loss": 0.1221,
+ "step": 7401
+ },
+ {
+ "epoch": 42.29714285714286,
+ "grad_norm": 61.935787200927734,
+ "learning_rate": 8.55873015873016e-06,
+ "loss": 0.2028,
+ "step": 7402
+ },
+ {
+ "epoch": 42.30285714285714,
+ "grad_norm": 33.08884048461914,
+ "learning_rate": 8.552380952380954e-06,
+ "loss": 0.0932,
+ "step": 7403
+ },
+ {
+ "epoch": 42.308571428571426,
+ "grad_norm": 19.032835006713867,
+ "learning_rate": 8.546031746031747e-06,
+ "loss": 0.13,
+ "step": 7404
+ },
+ {
+ "epoch": 42.31428571428572,
+ "grad_norm": 28.662944793701172,
+ "learning_rate": 8.539682539682541e-06,
+ "loss": 0.1759,
+ "step": 7405
+ },
+ {
+ "epoch": 42.32,
+ "grad_norm": 30.545503616333008,
+ "learning_rate": 8.533333333333334e-06,
+ "loss": 0.1711,
+ "step": 7406
+ },
+ {
+ "epoch": 42.325714285714284,
+ "grad_norm": 97.189697265625,
+ "learning_rate": 8.526984126984128e-06,
+ "loss": 0.1324,
+ "step": 7407
+ },
+ {
+ "epoch": 42.331428571428575,
+ "grad_norm": 59.2839469909668,
+ "learning_rate": 8.52063492063492e-06,
+ "loss": 0.2409,
+ "step": 7408
+ },
+ {
+ "epoch": 42.33714285714286,
+ "grad_norm": 50.607032775878906,
+ "learning_rate": 8.514285714285715e-06,
+ "loss": 0.14,
+ "step": 7409
+ },
+ {
+ "epoch": 42.34285714285714,
+ "grad_norm": 49.94506072998047,
+ "learning_rate": 8.507936507936507e-06,
+ "loss": 0.1492,
+ "step": 7410
+ },
+ {
+ "epoch": 42.348571428571425,
+ "grad_norm": 24.435863494873047,
+ "learning_rate": 8.501587301587302e-06,
+ "loss": 0.4463,
+ "step": 7411
+ },
+ {
+ "epoch": 42.354285714285716,
+ "grad_norm": 46.557308197021484,
+ "learning_rate": 8.495238095238094e-06,
+ "loss": 0.1528,
+ "step": 7412
+ },
+ {
+ "epoch": 42.36,
+ "grad_norm": 57.091331481933594,
+ "learning_rate": 8.488888888888889e-06,
+ "loss": 0.1227,
+ "step": 7413
+ },
+ {
+ "epoch": 42.36571428571428,
+ "grad_norm": 38.18864059448242,
+ "learning_rate": 8.482539682539683e-06,
+ "loss": 0.2634,
+ "step": 7414
+ },
+ {
+ "epoch": 42.371428571428574,
+ "grad_norm": 25.98653793334961,
+ "learning_rate": 8.476190476190476e-06,
+ "loss": 0.2947,
+ "step": 7415
+ },
+ {
+ "epoch": 42.37714285714286,
+ "grad_norm": 23.898725509643555,
+ "learning_rate": 8.46984126984127e-06,
+ "loss": 0.1523,
+ "step": 7416
+ },
+ {
+ "epoch": 42.38285714285714,
+ "grad_norm": 19.86840057373047,
+ "learning_rate": 8.463492063492064e-06,
+ "loss": 0.1333,
+ "step": 7417
+ },
+ {
+ "epoch": 42.38857142857143,
+ "grad_norm": 33.621646881103516,
+ "learning_rate": 8.457142857142859e-06,
+ "loss": 0.243,
+ "step": 7418
+ },
+ {
+ "epoch": 42.394285714285715,
+ "grad_norm": 39.622520446777344,
+ "learning_rate": 8.450793650793651e-06,
+ "loss": 0.2453,
+ "step": 7419
+ },
+ {
+ "epoch": 42.4,
+ "grad_norm": 43.996707916259766,
+ "learning_rate": 8.444444444444446e-06,
+ "loss": 0.1304,
+ "step": 7420
+ },
+ {
+ "epoch": 42.40571428571428,
+ "grad_norm": 93.22525024414062,
+ "learning_rate": 8.438095238095238e-06,
+ "loss": 0.1889,
+ "step": 7421
+ },
+ {
+ "epoch": 42.41142857142857,
+ "grad_norm": 27.656633377075195,
+ "learning_rate": 8.431746031746033e-06,
+ "loss": 0.134,
+ "step": 7422
+ },
+ {
+ "epoch": 42.417142857142856,
+ "grad_norm": 118.73726654052734,
+ "learning_rate": 8.425396825396825e-06,
+ "loss": 0.1986,
+ "step": 7423
+ },
+ {
+ "epoch": 42.42285714285714,
+ "grad_norm": 47.353248596191406,
+ "learning_rate": 8.41904761904762e-06,
+ "loss": 0.1321,
+ "step": 7424
+ },
+ {
+ "epoch": 42.42857142857143,
+ "grad_norm": 26.87725830078125,
+ "learning_rate": 8.412698412698414e-06,
+ "loss": 0.1406,
+ "step": 7425
+ },
+ {
+ "epoch": 42.434285714285714,
+ "grad_norm": 124.5577392578125,
+ "learning_rate": 8.406349206349207e-06,
+ "loss": 0.2331,
+ "step": 7426
+ },
+ {
+ "epoch": 42.44,
+ "grad_norm": 20.424171447753906,
+ "learning_rate": 8.400000000000001e-06,
+ "loss": 0.1607,
+ "step": 7427
+ },
+ {
+ "epoch": 42.44571428571429,
+ "grad_norm": 82.92603302001953,
+ "learning_rate": 8.393650793650794e-06,
+ "loss": 0.1664,
+ "step": 7428
+ },
+ {
+ "epoch": 42.45142857142857,
+ "grad_norm": 53.30680847167969,
+ "learning_rate": 8.387301587301588e-06,
+ "loss": 0.1705,
+ "step": 7429
+ },
+ {
+ "epoch": 42.457142857142856,
+ "grad_norm": 22.10956573486328,
+ "learning_rate": 8.38095238095238e-06,
+ "loss": 0.2412,
+ "step": 7430
+ },
+ {
+ "epoch": 42.462857142857146,
+ "grad_norm": 27.898704528808594,
+ "learning_rate": 8.374603174603175e-06,
+ "loss": 0.1431,
+ "step": 7431
+ },
+ {
+ "epoch": 42.46857142857143,
+ "grad_norm": 61.94043731689453,
+ "learning_rate": 8.368253968253967e-06,
+ "loss": 0.1794,
+ "step": 7432
+ },
+ {
+ "epoch": 42.47428571428571,
+ "grad_norm": 32.78137969970703,
+ "learning_rate": 8.361904761904762e-06,
+ "loss": 0.1466,
+ "step": 7433
+ },
+ {
+ "epoch": 42.48,
+ "grad_norm": 28.5189266204834,
+ "learning_rate": 8.355555555555556e-06,
+ "loss": 0.2008,
+ "step": 7434
+ },
+ {
+ "epoch": 42.48571428571429,
+ "grad_norm": 55.5509147644043,
+ "learning_rate": 8.34920634920635e-06,
+ "loss": 0.1297,
+ "step": 7435
+ },
+ {
+ "epoch": 42.49142857142857,
+ "grad_norm": 56.201786041259766,
+ "learning_rate": 8.342857142857143e-06,
+ "loss": 0.2611,
+ "step": 7436
+ },
+ {
+ "epoch": 42.497142857142855,
+ "grad_norm": 1000.422607421875,
+ "learning_rate": 8.336507936507937e-06,
+ "loss": 0.1611,
+ "step": 7437
+ },
+ {
+ "epoch": 42.502857142857145,
+ "grad_norm": 19.964914321899414,
+ "learning_rate": 8.330158730158732e-06,
+ "loss": 0.2158,
+ "step": 7438
+ },
+ {
+ "epoch": 42.50857142857143,
+ "grad_norm": 60.65313720703125,
+ "learning_rate": 8.323809523809524e-06,
+ "loss": 0.1371,
+ "step": 7439
+ },
+ {
+ "epoch": 42.51428571428571,
+ "grad_norm": 63.652687072753906,
+ "learning_rate": 8.317460317460319e-06,
+ "loss": 0.1351,
+ "step": 7440
+ },
+ {
+ "epoch": 42.52,
+ "grad_norm": 54.9022331237793,
+ "learning_rate": 8.311111111111111e-06,
+ "loss": 0.0954,
+ "step": 7441
+ },
+ {
+ "epoch": 42.52571428571429,
+ "grad_norm": 55.63421630859375,
+ "learning_rate": 8.304761904761906e-06,
+ "loss": 0.1193,
+ "step": 7442
+ },
+ {
+ "epoch": 42.53142857142857,
+ "grad_norm": 29.859519958496094,
+ "learning_rate": 8.298412698412698e-06,
+ "loss": 0.1315,
+ "step": 7443
+ },
+ {
+ "epoch": 42.537142857142854,
+ "grad_norm": 68.5718765258789,
+ "learning_rate": 8.292063492063493e-06,
+ "loss": 0.1668,
+ "step": 7444
+ },
+ {
+ "epoch": 42.542857142857144,
+ "grad_norm": 40.8301887512207,
+ "learning_rate": 8.285714285714285e-06,
+ "loss": 0.1709,
+ "step": 7445
+ },
+ {
+ "epoch": 42.54857142857143,
+ "grad_norm": 28.545330047607422,
+ "learning_rate": 8.27936507936508e-06,
+ "loss": 0.1146,
+ "step": 7446
+ },
+ {
+ "epoch": 42.55428571428571,
+ "grad_norm": 1060.134033203125,
+ "learning_rate": 8.273015873015872e-06,
+ "loss": 0.1951,
+ "step": 7447
+ },
+ {
+ "epoch": 42.56,
+ "grad_norm": 53.850128173828125,
+ "learning_rate": 8.266666666666667e-06,
+ "loss": 0.1573,
+ "step": 7448
+ },
+ {
+ "epoch": 42.565714285714286,
+ "grad_norm": 17.7730655670166,
+ "learning_rate": 8.260317460317461e-06,
+ "loss": 0.2601,
+ "step": 7449
+ },
+ {
+ "epoch": 42.57142857142857,
+ "grad_norm": 25.573711395263672,
+ "learning_rate": 8.253968253968254e-06,
+ "loss": 0.1189,
+ "step": 7450
+ },
+ {
+ "epoch": 42.57714285714286,
+ "grad_norm": 42.47023391723633,
+ "learning_rate": 8.247619047619048e-06,
+ "loss": 0.1348,
+ "step": 7451
+ },
+ {
+ "epoch": 42.582857142857144,
+ "grad_norm": 27.079898834228516,
+ "learning_rate": 8.241269841269842e-06,
+ "loss": 0.1767,
+ "step": 7452
+ },
+ {
+ "epoch": 42.58857142857143,
+ "grad_norm": 53.77452087402344,
+ "learning_rate": 8.234920634920635e-06,
+ "loss": 0.1532,
+ "step": 7453
+ },
+ {
+ "epoch": 42.59428571428572,
+ "grad_norm": 55.31523132324219,
+ "learning_rate": 8.22857142857143e-06,
+ "loss": 0.1667,
+ "step": 7454
+ },
+ {
+ "epoch": 42.6,
+ "grad_norm": 43.21916580200195,
+ "learning_rate": 8.222222222222223e-06,
+ "loss": 0.164,
+ "step": 7455
+ },
+ {
+ "epoch": 42.605714285714285,
+ "grad_norm": 51.73290252685547,
+ "learning_rate": 8.215873015873016e-06,
+ "loss": 0.1625,
+ "step": 7456
+ },
+ {
+ "epoch": 42.61142857142857,
+ "grad_norm": 50.38142776489258,
+ "learning_rate": 8.20952380952381e-06,
+ "loss": 0.1448,
+ "step": 7457
+ },
+ {
+ "epoch": 42.61714285714286,
+ "grad_norm": 27.53108024597168,
+ "learning_rate": 8.203174603174603e-06,
+ "loss": 0.1938,
+ "step": 7458
+ },
+ {
+ "epoch": 42.62285714285714,
+ "grad_norm": 48.98680877685547,
+ "learning_rate": 8.196825396825397e-06,
+ "loss": 0.1132,
+ "step": 7459
+ },
+ {
+ "epoch": 42.628571428571426,
+ "grad_norm": 25.339763641357422,
+ "learning_rate": 8.190476190476192e-06,
+ "loss": 0.12,
+ "step": 7460
+ },
+ {
+ "epoch": 42.63428571428572,
+ "grad_norm": 97.11475372314453,
+ "learning_rate": 8.184126984126984e-06,
+ "loss": 0.1563,
+ "step": 7461
+ },
+ {
+ "epoch": 42.64,
+ "grad_norm": 34.36635208129883,
+ "learning_rate": 8.177777777777779e-06,
+ "loss": 0.1327,
+ "step": 7462
+ },
+ {
+ "epoch": 42.645714285714284,
+ "grad_norm": 47.332855224609375,
+ "learning_rate": 8.171428571428571e-06,
+ "loss": 0.158,
+ "step": 7463
+ },
+ {
+ "epoch": 42.651428571428575,
+ "grad_norm": 22.13446617126465,
+ "learning_rate": 8.165079365079366e-06,
+ "loss": 0.1633,
+ "step": 7464
+ },
+ {
+ "epoch": 42.65714285714286,
+ "grad_norm": 81.29454040527344,
+ "learning_rate": 8.158730158730158e-06,
+ "loss": 0.1518,
+ "step": 7465
+ },
+ {
+ "epoch": 42.66285714285714,
+ "grad_norm": 19.024784088134766,
+ "learning_rate": 8.152380952380953e-06,
+ "loss": 0.1706,
+ "step": 7466
+ },
+ {
+ "epoch": 42.668571428571425,
+ "grad_norm": 93.32360076904297,
+ "learning_rate": 8.146031746031745e-06,
+ "loss": 0.172,
+ "step": 7467
+ },
+ {
+ "epoch": 42.674285714285716,
+ "grad_norm": 44.49494934082031,
+ "learning_rate": 8.13968253968254e-06,
+ "loss": 0.1559,
+ "step": 7468
+ },
+ {
+ "epoch": 42.68,
+ "grad_norm": 57.658836364746094,
+ "learning_rate": 8.133333333333332e-06,
+ "loss": 0.1283,
+ "step": 7469
+ },
+ {
+ "epoch": 42.68571428571428,
+ "grad_norm": 22.79932403564453,
+ "learning_rate": 8.126984126984127e-06,
+ "loss": 0.1191,
+ "step": 7470
+ },
+ {
+ "epoch": 42.691428571428574,
+ "grad_norm": 67.99079895019531,
+ "learning_rate": 8.120634920634921e-06,
+ "loss": 0.19,
+ "step": 7471
+ },
+ {
+ "epoch": 42.69714285714286,
+ "grad_norm": 45.546077728271484,
+ "learning_rate": 8.114285714285715e-06,
+ "loss": 0.1761,
+ "step": 7472
+ },
+ {
+ "epoch": 42.70285714285714,
+ "grad_norm": 26.195148468017578,
+ "learning_rate": 8.10793650793651e-06,
+ "loss": 0.1524,
+ "step": 7473
+ },
+ {
+ "epoch": 42.70857142857143,
+ "grad_norm": 52.820526123046875,
+ "learning_rate": 8.101587301587302e-06,
+ "loss": 0.1547,
+ "step": 7474
+ },
+ {
+ "epoch": 42.714285714285715,
+ "grad_norm": 33.6549072265625,
+ "learning_rate": 8.095238095238097e-06,
+ "loss": 0.1609,
+ "step": 7475
+ },
+ {
+ "epoch": 42.72,
+ "grad_norm": 29.644054412841797,
+ "learning_rate": 8.08888888888889e-06,
+ "loss": 0.1765,
+ "step": 7476
+ },
+ {
+ "epoch": 42.72571428571428,
+ "grad_norm": 29.143583297729492,
+ "learning_rate": 8.082539682539684e-06,
+ "loss": 0.1616,
+ "step": 7477
+ },
+ {
+ "epoch": 42.73142857142857,
+ "grad_norm": 54.07554626464844,
+ "learning_rate": 8.076190476190476e-06,
+ "loss": 0.1606,
+ "step": 7478
+ },
+ {
+ "epoch": 42.73714285714286,
+ "grad_norm": 59.4071159362793,
+ "learning_rate": 8.06984126984127e-06,
+ "loss": 0.2025,
+ "step": 7479
+ },
+ {
+ "epoch": 42.74285714285714,
+ "grad_norm": 44.09516143798828,
+ "learning_rate": 8.063492063492063e-06,
+ "loss": 0.1097,
+ "step": 7480
+ },
+ {
+ "epoch": 42.74857142857143,
+ "grad_norm": 32.09178161621094,
+ "learning_rate": 8.057142857142857e-06,
+ "loss": 0.1291,
+ "step": 7481
+ },
+ {
+ "epoch": 42.754285714285714,
+ "grad_norm": 28.902109146118164,
+ "learning_rate": 8.050793650793652e-06,
+ "loss": 0.2254,
+ "step": 7482
+ },
+ {
+ "epoch": 42.76,
+ "grad_norm": 27.518037796020508,
+ "learning_rate": 8.044444444444444e-06,
+ "loss": 0.124,
+ "step": 7483
+ },
+ {
+ "epoch": 42.76571428571429,
+ "grad_norm": 457.9650573730469,
+ "learning_rate": 8.038095238095239e-06,
+ "loss": 0.2841,
+ "step": 7484
+ },
+ {
+ "epoch": 42.77142857142857,
+ "grad_norm": 46.90461730957031,
+ "learning_rate": 8.031746031746031e-06,
+ "loss": 0.1329,
+ "step": 7485
+ },
+ {
+ "epoch": 42.777142857142856,
+ "grad_norm": 31.557842254638672,
+ "learning_rate": 8.025396825396826e-06,
+ "loss": 0.2024,
+ "step": 7486
+ },
+ {
+ "epoch": 42.78285714285714,
+ "grad_norm": 40.40676498413086,
+ "learning_rate": 8.019047619047618e-06,
+ "loss": 0.1522,
+ "step": 7487
+ },
+ {
+ "epoch": 42.78857142857143,
+ "grad_norm": 52.519371032714844,
+ "learning_rate": 8.012698412698413e-06,
+ "loss": 0.1644,
+ "step": 7488
+ },
+ {
+ "epoch": 42.794285714285714,
+ "grad_norm": 22.291147232055664,
+ "learning_rate": 8.006349206349207e-06,
+ "loss": 0.118,
+ "step": 7489
+ },
+ {
+ "epoch": 42.8,
+ "grad_norm": 45.24946594238281,
+ "learning_rate": 8.000000000000001e-06,
+ "loss": 0.1665,
+ "step": 7490
+ },
+ {
+ "epoch": 42.80571428571429,
+ "grad_norm": 34.55263900756836,
+ "learning_rate": 7.993650793650794e-06,
+ "loss": 0.2683,
+ "step": 7491
+ },
+ {
+ "epoch": 42.81142857142857,
+ "grad_norm": 31.581363677978516,
+ "learning_rate": 7.987301587301588e-06,
+ "loss": 0.1601,
+ "step": 7492
+ },
+ {
+ "epoch": 42.817142857142855,
+ "grad_norm": 34.04935836791992,
+ "learning_rate": 7.980952380952383e-06,
+ "loss": 0.178,
+ "step": 7493
+ },
+ {
+ "epoch": 42.822857142857146,
+ "grad_norm": 84.48345947265625,
+ "learning_rate": 7.974603174603175e-06,
+ "loss": 0.1736,
+ "step": 7494
+ },
+ {
+ "epoch": 42.82857142857143,
+ "grad_norm": 22.20957374572754,
+ "learning_rate": 7.96825396825397e-06,
+ "loss": 0.1343,
+ "step": 7495
+ },
+ {
+ "epoch": 42.83428571428571,
+ "grad_norm": 31.793655395507812,
+ "learning_rate": 7.961904761904762e-06,
+ "loss": 0.1058,
+ "step": 7496
+ },
+ {
+ "epoch": 42.84,
+ "grad_norm": 20.395389556884766,
+ "learning_rate": 7.955555555555557e-06,
+ "loss": 0.2209,
+ "step": 7497
+ },
+ {
+ "epoch": 42.84571428571429,
+ "grad_norm": 39.11921691894531,
+ "learning_rate": 7.94920634920635e-06,
+ "loss": 0.1347,
+ "step": 7498
+ },
+ {
+ "epoch": 42.85142857142857,
+ "grad_norm": 70.40147399902344,
+ "learning_rate": 7.942857142857144e-06,
+ "loss": 0.1641,
+ "step": 7499
+ },
+ {
+ "epoch": 42.857142857142854,
+ "grad_norm": 35.69590377807617,
+ "learning_rate": 7.936507936507936e-06,
+ "loss": 0.201,
+ "step": 7500
+ },
+ {
+ "epoch": 42.862857142857145,
+ "grad_norm": 48.583946228027344,
+ "learning_rate": 7.93015873015873e-06,
+ "loss": 0.1983,
+ "step": 7501
+ },
+ {
+ "epoch": 42.86857142857143,
+ "grad_norm": 39.1660270690918,
+ "learning_rate": 7.923809523809523e-06,
+ "loss": 0.1265,
+ "step": 7502
+ },
+ {
+ "epoch": 42.87428571428571,
+ "grad_norm": 49.16383361816406,
+ "learning_rate": 7.917460317460317e-06,
+ "loss": 0.1961,
+ "step": 7503
+ },
+ {
+ "epoch": 42.88,
+ "grad_norm": 28.290082931518555,
+ "learning_rate": 7.91111111111111e-06,
+ "loss": 0.1965,
+ "step": 7504
+ },
+ {
+ "epoch": 42.885714285714286,
+ "grad_norm": 80.19752502441406,
+ "learning_rate": 7.904761904761904e-06,
+ "loss": 0.2386,
+ "step": 7505
+ },
+ {
+ "epoch": 42.89142857142857,
+ "grad_norm": 25.667085647583008,
+ "learning_rate": 7.898412698412699e-06,
+ "loss": 0.1357,
+ "step": 7506
+ },
+ {
+ "epoch": 42.89714285714286,
+ "grad_norm": 79.53691101074219,
+ "learning_rate": 7.892063492063493e-06,
+ "loss": 0.2207,
+ "step": 7507
+ },
+ {
+ "epoch": 42.902857142857144,
+ "grad_norm": 59.32994842529297,
+ "learning_rate": 7.885714285714286e-06,
+ "loss": 0.1417,
+ "step": 7508
+ },
+ {
+ "epoch": 42.90857142857143,
+ "grad_norm": 21.014270782470703,
+ "learning_rate": 7.87936507936508e-06,
+ "loss": 0.1568,
+ "step": 7509
+ },
+ {
+ "epoch": 42.91428571428571,
+ "grad_norm": 60.28929901123047,
+ "learning_rate": 7.873015873015874e-06,
+ "loss": 0.1168,
+ "step": 7510
+ },
+ {
+ "epoch": 42.92,
+ "grad_norm": 32.30584716796875,
+ "learning_rate": 7.866666666666667e-06,
+ "loss": 0.2005,
+ "step": 7511
+ },
+ {
+ "epoch": 42.925714285714285,
+ "grad_norm": 24.222583770751953,
+ "learning_rate": 7.860317460317461e-06,
+ "loss": 0.1403,
+ "step": 7512
+ },
+ {
+ "epoch": 42.93142857142857,
+ "grad_norm": 42.4453239440918,
+ "learning_rate": 7.853968253968254e-06,
+ "loss": 0.1229,
+ "step": 7513
+ },
+ {
+ "epoch": 42.93714285714286,
+ "grad_norm": 37.953250885009766,
+ "learning_rate": 7.847619047619048e-06,
+ "loss": 0.1573,
+ "step": 7514
+ },
+ {
+ "epoch": 42.94285714285714,
+ "grad_norm": 856.2482299804688,
+ "learning_rate": 7.841269841269841e-06,
+ "loss": 0.1855,
+ "step": 7515
+ },
+ {
+ "epoch": 42.94857142857143,
+ "grad_norm": 55.68648910522461,
+ "learning_rate": 7.834920634920635e-06,
+ "loss": 0.1351,
+ "step": 7516
+ },
+ {
+ "epoch": 42.95428571428572,
+ "grad_norm": 41.787540435791016,
+ "learning_rate": 7.82857142857143e-06,
+ "loss": 0.1239,
+ "step": 7517
+ },
+ {
+ "epoch": 42.96,
+ "grad_norm": 53.466854095458984,
+ "learning_rate": 7.822222222222222e-06,
+ "loss": 0.2125,
+ "step": 7518
+ },
+ {
+ "epoch": 42.965714285714284,
+ "grad_norm": 203.714111328125,
+ "learning_rate": 7.815873015873017e-06,
+ "loss": 0.1814,
+ "step": 7519
+ },
+ {
+ "epoch": 42.97142857142857,
+ "grad_norm": 31.365480422973633,
+ "learning_rate": 7.80952380952381e-06,
+ "loss": 0.2021,
+ "step": 7520
+ },
+ {
+ "epoch": 42.97714285714286,
+ "grad_norm": 197.8916015625,
+ "learning_rate": 7.803174603174604e-06,
+ "loss": 0.214,
+ "step": 7521
+ },
+ {
+ "epoch": 42.98285714285714,
+ "grad_norm": 29.759140014648438,
+ "learning_rate": 7.796825396825396e-06,
+ "loss": 0.1622,
+ "step": 7522
+ },
+ {
+ "epoch": 42.988571428571426,
+ "grad_norm": 61.037593841552734,
+ "learning_rate": 7.79047619047619e-06,
+ "loss": 0.1531,
+ "step": 7523
+ },
+ {
+ "epoch": 42.994285714285716,
+ "grad_norm": 39.895286560058594,
+ "learning_rate": 7.784126984126985e-06,
+ "loss": 0.1649,
+ "step": 7524
+ },
+ {
+ "epoch": 43.0,
+ "grad_norm": 46.61077880859375,
+ "learning_rate": 7.777777777777777e-06,
+ "loss": 0.1352,
+ "step": 7525
+ },
+ {
+ "epoch": 43.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5749897956848145,
+ "eval_map": 0.9381,
+ "eval_map_50": 0.9699,
+ "eval_map_75": 0.9637,
+ "eval_map_large": 0.9382,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9381,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7892,
+ "eval_mar_10": 0.9759,
+ "eval_mar_100": 0.9765,
+ "eval_mar_100_per_class": 0.9765,
+ "eval_mar_large": 0.9765,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.3945,
+ "eval_samples_per_second": 21.949,
+ "eval_steps_per_second": 2.762,
+ "step": 7525
+ },
+ {
+ "epoch": 43.005714285714284,
+ "grad_norm": 82.03201293945312,
+ "learning_rate": 7.771428571428572e-06,
+ "loss": 0.1836,
+ "step": 7526
+ },
+ {
+ "epoch": 43.011428571428574,
+ "grad_norm": 435.36126708984375,
+ "learning_rate": 7.765079365079366e-06,
+ "loss": 0.2198,
+ "step": 7527
+ },
+ {
+ "epoch": 43.01714285714286,
+ "grad_norm": 42.7081413269043,
+ "learning_rate": 7.75873015873016e-06,
+ "loss": 0.1261,
+ "step": 7528
+ },
+ {
+ "epoch": 43.02285714285714,
+ "grad_norm": 46.990474700927734,
+ "learning_rate": 7.752380952380953e-06,
+ "loss": 0.1833,
+ "step": 7529
+ },
+ {
+ "epoch": 43.02857142857143,
+ "grad_norm": 39.83193588256836,
+ "learning_rate": 7.746031746031747e-06,
+ "loss": 0.123,
+ "step": 7530
+ },
+ {
+ "epoch": 43.034285714285716,
+ "grad_norm": 16.36966896057129,
+ "learning_rate": 7.73968253968254e-06,
+ "loss": 0.1705,
+ "step": 7531
+ },
+ {
+ "epoch": 43.04,
+ "grad_norm": 108.55115509033203,
+ "learning_rate": 7.733333333333334e-06,
+ "loss": 0.1844,
+ "step": 7532
+ },
+ {
+ "epoch": 43.04571428571428,
+ "grad_norm": 54.612186431884766,
+ "learning_rate": 7.726984126984127e-06,
+ "loss": 0.1496,
+ "step": 7533
+ },
+ {
+ "epoch": 43.05142857142857,
+ "grad_norm": 33.79128646850586,
+ "learning_rate": 7.720634920634921e-06,
+ "loss": 0.1736,
+ "step": 7534
+ },
+ {
+ "epoch": 43.05714285714286,
+ "grad_norm": 374.0523986816406,
+ "learning_rate": 7.714285714285714e-06,
+ "loss": 0.1488,
+ "step": 7535
+ },
+ {
+ "epoch": 43.06285714285714,
+ "grad_norm": 83.07660675048828,
+ "learning_rate": 7.707936507936508e-06,
+ "loss": 0.1235,
+ "step": 7536
+ },
+ {
+ "epoch": 43.06857142857143,
+ "grad_norm": 62.87297821044922,
+ "learning_rate": 7.701587301587301e-06,
+ "loss": 0.135,
+ "step": 7537
+ },
+ {
+ "epoch": 43.074285714285715,
+ "grad_norm": 29.516284942626953,
+ "learning_rate": 7.695238095238095e-06,
+ "loss": 0.1292,
+ "step": 7538
+ },
+ {
+ "epoch": 43.08,
+ "grad_norm": 35.36422348022461,
+ "learning_rate": 7.68888888888889e-06,
+ "loss": 0.1709,
+ "step": 7539
+ },
+ {
+ "epoch": 43.08571428571429,
+ "grad_norm": 31.686317443847656,
+ "learning_rate": 7.682539682539682e-06,
+ "loss": 0.1376,
+ "step": 7540
+ },
+ {
+ "epoch": 43.09142857142857,
+ "grad_norm": 66.57743835449219,
+ "learning_rate": 7.676190476190477e-06,
+ "loss": 0.1701,
+ "step": 7541
+ },
+ {
+ "epoch": 43.097142857142856,
+ "grad_norm": 45.713653564453125,
+ "learning_rate": 7.66984126984127e-06,
+ "loss": 0.1109,
+ "step": 7542
+ },
+ {
+ "epoch": 43.10285714285714,
+ "grad_norm": 29.52507972717285,
+ "learning_rate": 7.663492063492064e-06,
+ "loss": 0.1376,
+ "step": 7543
+ },
+ {
+ "epoch": 43.10857142857143,
+ "grad_norm": 59.936851501464844,
+ "learning_rate": 7.657142857142858e-06,
+ "loss": 0.1464,
+ "step": 7544
+ },
+ {
+ "epoch": 43.114285714285714,
+ "grad_norm": 402.4036865234375,
+ "learning_rate": 7.650793650793652e-06,
+ "loss": 0.1702,
+ "step": 7545
+ },
+ {
+ "epoch": 43.12,
+ "grad_norm": 160.90231323242188,
+ "learning_rate": 7.644444444444445e-06,
+ "loss": 0.115,
+ "step": 7546
+ },
+ {
+ "epoch": 43.12571428571429,
+ "grad_norm": 40.402618408203125,
+ "learning_rate": 7.63809523809524e-06,
+ "loss": 0.1222,
+ "step": 7547
+ },
+ {
+ "epoch": 43.13142857142857,
+ "grad_norm": 20.92841148376465,
+ "learning_rate": 7.631746031746032e-06,
+ "loss": 0.1456,
+ "step": 7548
+ },
+ {
+ "epoch": 43.137142857142855,
+ "grad_norm": 105.12482452392578,
+ "learning_rate": 7.625396825396826e-06,
+ "loss": 0.1148,
+ "step": 7549
+ },
+ {
+ "epoch": 43.142857142857146,
+ "grad_norm": 18.593313217163086,
+ "learning_rate": 7.6190476190476205e-06,
+ "loss": 0.1563,
+ "step": 7550
+ },
+ {
+ "epoch": 43.14857142857143,
+ "grad_norm": 42.624813079833984,
+ "learning_rate": 7.612698412698413e-06,
+ "loss": 0.1964,
+ "step": 7551
+ },
+ {
+ "epoch": 43.15428571428571,
+ "grad_norm": 31.275169372558594,
+ "learning_rate": 7.6063492063492074e-06,
+ "loss": 0.1141,
+ "step": 7552
+ },
+ {
+ "epoch": 43.16,
+ "grad_norm": 34.69234085083008,
+ "learning_rate": 7.6e-06,
+ "loss": 0.1878,
+ "step": 7553
+ },
+ {
+ "epoch": 43.16571428571429,
+ "grad_norm": 75.30792236328125,
+ "learning_rate": 7.593650793650794e-06,
+ "loss": 0.1781,
+ "step": 7554
+ },
+ {
+ "epoch": 43.17142857142857,
+ "grad_norm": 29.43678092956543,
+ "learning_rate": 7.587301587301587e-06,
+ "loss": 0.1592,
+ "step": 7555
+ },
+ {
+ "epoch": 43.177142857142854,
+ "grad_norm": 35.68794631958008,
+ "learning_rate": 7.580952380952381e-06,
+ "loss": 0.2717,
+ "step": 7556
+ },
+ {
+ "epoch": 43.182857142857145,
+ "grad_norm": 56.68461608886719,
+ "learning_rate": 7.574603174603175e-06,
+ "loss": 0.137,
+ "step": 7557
+ },
+ {
+ "epoch": 43.18857142857143,
+ "grad_norm": 134.8107147216797,
+ "learning_rate": 7.568253968253969e-06,
+ "loss": 0.1402,
+ "step": 7558
+ },
+ {
+ "epoch": 43.19428571428571,
+ "grad_norm": 91.69994354248047,
+ "learning_rate": 7.561904761904762e-06,
+ "loss": 0.2529,
+ "step": 7559
+ },
+ {
+ "epoch": 43.2,
+ "grad_norm": 47.072265625,
+ "learning_rate": 7.555555555555556e-06,
+ "loss": 0.1387,
+ "step": 7560
+ },
+ {
+ "epoch": 43.205714285714286,
+ "grad_norm": 25.122535705566406,
+ "learning_rate": 7.549206349206349e-06,
+ "loss": 0.1738,
+ "step": 7561
+ },
+ {
+ "epoch": 43.21142857142857,
+ "grad_norm": 26.446069717407227,
+ "learning_rate": 7.542857142857143e-06,
+ "loss": 0.144,
+ "step": 7562
+ },
+ {
+ "epoch": 43.21714285714286,
+ "grad_norm": 135.1419219970703,
+ "learning_rate": 7.5365079365079375e-06,
+ "loss": 0.2356,
+ "step": 7563
+ },
+ {
+ "epoch": 43.222857142857144,
+ "grad_norm": 19.344514846801758,
+ "learning_rate": 7.53015873015873e-06,
+ "loss": 0.1643,
+ "step": 7564
+ },
+ {
+ "epoch": 43.22857142857143,
+ "grad_norm": 20.15989875793457,
+ "learning_rate": 7.523809523809524e-06,
+ "loss": 0.1282,
+ "step": 7565
+ },
+ {
+ "epoch": 43.23428571428571,
+ "grad_norm": 22.729511260986328,
+ "learning_rate": 7.517460317460318e-06,
+ "loss": 0.1119,
+ "step": 7566
+ },
+ {
+ "epoch": 43.24,
+ "grad_norm": 50.45492172241211,
+ "learning_rate": 7.511111111111112e-06,
+ "loss": 0.1361,
+ "step": 7567
+ },
+ {
+ "epoch": 43.245714285714286,
+ "grad_norm": 28.559432983398438,
+ "learning_rate": 7.504761904761905e-06,
+ "loss": 0.1731,
+ "step": 7568
+ },
+ {
+ "epoch": 43.25142857142857,
+ "grad_norm": 286.8123474121094,
+ "learning_rate": 7.498412698412699e-06,
+ "loss": 0.1314,
+ "step": 7569
+ },
+ {
+ "epoch": 43.25714285714286,
+ "grad_norm": 33.98759841918945,
+ "learning_rate": 7.492063492063492e-06,
+ "loss": 0.1734,
+ "step": 7570
+ },
+ {
+ "epoch": 43.26285714285714,
+ "grad_norm": 21.450578689575195,
+ "learning_rate": 7.485714285714286e-06,
+ "loss": 0.1865,
+ "step": 7571
+ },
+ {
+ "epoch": 43.26857142857143,
+ "grad_norm": 11.982288360595703,
+ "learning_rate": 7.479365079365079e-06,
+ "loss": 0.2033,
+ "step": 7572
+ },
+ {
+ "epoch": 43.27428571428572,
+ "grad_norm": 38.925289154052734,
+ "learning_rate": 7.473015873015873e-06,
+ "loss": 0.1453,
+ "step": 7573
+ },
+ {
+ "epoch": 43.28,
+ "grad_norm": 22.259342193603516,
+ "learning_rate": 7.4666666666666675e-06,
+ "loss": 0.1331,
+ "step": 7574
+ },
+ {
+ "epoch": 43.285714285714285,
+ "grad_norm": 45.1170539855957,
+ "learning_rate": 7.460317460317461e-06,
+ "loss": 0.469,
+ "step": 7575
+ },
+ {
+ "epoch": 43.29142857142857,
+ "grad_norm": 90.38233947753906,
+ "learning_rate": 7.4539682539682544e-06,
+ "loss": 0.148,
+ "step": 7576
+ },
+ {
+ "epoch": 43.29714285714286,
+ "grad_norm": 31.033479690551758,
+ "learning_rate": 7.447619047619048e-06,
+ "loss": 0.1868,
+ "step": 7577
+ },
+ {
+ "epoch": 43.30285714285714,
+ "grad_norm": 89.41876983642578,
+ "learning_rate": 7.441269841269842e-06,
+ "loss": 0.1399,
+ "step": 7578
+ },
+ {
+ "epoch": 43.308571428571426,
+ "grad_norm": 66.96041107177734,
+ "learning_rate": 7.434920634920635e-06,
+ "loss": 0.1686,
+ "step": 7579
+ },
+ {
+ "epoch": 43.31428571428572,
+ "grad_norm": 70.08899688720703,
+ "learning_rate": 7.428571428571429e-06,
+ "loss": 0.1374,
+ "step": 7580
+ },
+ {
+ "epoch": 43.32,
+ "grad_norm": 18.745100021362305,
+ "learning_rate": 7.422222222222222e-06,
+ "loss": 0.1762,
+ "step": 7581
+ },
+ {
+ "epoch": 43.325714285714284,
+ "grad_norm": 54.25947570800781,
+ "learning_rate": 7.415873015873016e-06,
+ "loss": 0.139,
+ "step": 7582
+ },
+ {
+ "epoch": 43.331428571428575,
+ "grad_norm": 79.05216979980469,
+ "learning_rate": 7.40952380952381e-06,
+ "loss": 0.1897,
+ "step": 7583
+ },
+ {
+ "epoch": 43.33714285714286,
+ "grad_norm": 31.019147872924805,
+ "learning_rate": 7.403174603174603e-06,
+ "loss": 0.1648,
+ "step": 7584
+ },
+ {
+ "epoch": 43.34285714285714,
+ "grad_norm": 16.248050689697266,
+ "learning_rate": 7.3968253968253975e-06,
+ "loss": 0.1553,
+ "step": 7585
+ },
+ {
+ "epoch": 43.348571428571425,
+ "grad_norm": 37.0955924987793,
+ "learning_rate": 7.390476190476191e-06,
+ "loss": 0.1316,
+ "step": 7586
+ },
+ {
+ "epoch": 43.354285714285716,
+ "grad_norm": 22.13149070739746,
+ "learning_rate": 7.384126984126985e-06,
+ "loss": 0.2298,
+ "step": 7587
+ },
+ {
+ "epoch": 43.36,
+ "grad_norm": 34.06486892700195,
+ "learning_rate": 7.377777777777778e-06,
+ "loss": 0.1287,
+ "step": 7588
+ },
+ {
+ "epoch": 43.36571428571428,
+ "grad_norm": 77.93970489501953,
+ "learning_rate": 7.371428571428572e-06,
+ "loss": 0.1197,
+ "step": 7589
+ },
+ {
+ "epoch": 43.371428571428574,
+ "grad_norm": 62.190792083740234,
+ "learning_rate": 7.365079365079365e-06,
+ "loss": 0.1972,
+ "step": 7590
+ },
+ {
+ "epoch": 43.37714285714286,
+ "grad_norm": 42.25458526611328,
+ "learning_rate": 7.358730158730159e-06,
+ "loss": 0.1302,
+ "step": 7591
+ },
+ {
+ "epoch": 43.38285714285714,
+ "grad_norm": 27.4211483001709,
+ "learning_rate": 7.352380952380952e-06,
+ "loss": 0.1605,
+ "step": 7592
+ },
+ {
+ "epoch": 43.38857142857143,
+ "grad_norm": 35.7080078125,
+ "learning_rate": 7.346031746031746e-06,
+ "loss": 0.1723,
+ "step": 7593
+ },
+ {
+ "epoch": 43.394285714285715,
+ "grad_norm": 19.61418914794922,
+ "learning_rate": 7.33968253968254e-06,
+ "loss": 0.332,
+ "step": 7594
+ },
+ {
+ "epoch": 43.4,
+ "grad_norm": 44.07203674316406,
+ "learning_rate": 7.333333333333334e-06,
+ "loss": 0.1577,
+ "step": 7595
+ },
+ {
+ "epoch": 43.40571428571428,
+ "grad_norm": 24.050046920776367,
+ "learning_rate": 7.326984126984128e-06,
+ "loss": 0.1353,
+ "step": 7596
+ },
+ {
+ "epoch": 43.41142857142857,
+ "grad_norm": 36.659420013427734,
+ "learning_rate": 7.320634920634921e-06,
+ "loss": 0.1791,
+ "step": 7597
+ },
+ {
+ "epoch": 43.417142857142856,
+ "grad_norm": 30.74590492248535,
+ "learning_rate": 7.314285714285715e-06,
+ "loss": 0.1289,
+ "step": 7598
+ },
+ {
+ "epoch": 43.42285714285714,
+ "grad_norm": 54.270450592041016,
+ "learning_rate": 7.307936507936508e-06,
+ "loss": 0.1525,
+ "step": 7599
+ },
+ {
+ "epoch": 43.42857142857143,
+ "grad_norm": 13.909317970275879,
+ "learning_rate": 7.301587301587302e-06,
+ "loss": 0.1565,
+ "step": 7600
+ },
+ {
+ "epoch": 43.434285714285714,
+ "grad_norm": 39.99986267089844,
+ "learning_rate": 7.295238095238095e-06,
+ "loss": 0.1583,
+ "step": 7601
+ },
+ {
+ "epoch": 43.44,
+ "grad_norm": 76.60659790039062,
+ "learning_rate": 7.288888888888889e-06,
+ "loss": 0.1394,
+ "step": 7602
+ },
+ {
+ "epoch": 43.44571428571429,
+ "grad_norm": 68.71869659423828,
+ "learning_rate": 7.282539682539683e-06,
+ "loss": 0.1694,
+ "step": 7603
+ },
+ {
+ "epoch": 43.45142857142857,
+ "grad_norm": 240.68544006347656,
+ "learning_rate": 7.276190476190477e-06,
+ "loss": 0.1812,
+ "step": 7604
+ },
+ {
+ "epoch": 43.457142857142856,
+ "grad_norm": 27.34787940979004,
+ "learning_rate": 7.26984126984127e-06,
+ "loss": 0.1393,
+ "step": 7605
+ },
+ {
+ "epoch": 43.462857142857146,
+ "grad_norm": 37.935115814208984,
+ "learning_rate": 7.263492063492064e-06,
+ "loss": 0.1654,
+ "step": 7606
+ },
+ {
+ "epoch": 43.46857142857143,
+ "grad_norm": 44.87675857543945,
+ "learning_rate": 7.257142857142857e-06,
+ "loss": 0.1219,
+ "step": 7607
+ },
+ {
+ "epoch": 43.47428571428571,
+ "grad_norm": 23.068836212158203,
+ "learning_rate": 7.250793650793651e-06,
+ "loss": 0.1369,
+ "step": 7608
+ },
+ {
+ "epoch": 43.48,
+ "grad_norm": 65.95645141601562,
+ "learning_rate": 7.244444444444445e-06,
+ "loss": 0.1619,
+ "step": 7609
+ },
+ {
+ "epoch": 43.48571428571429,
+ "grad_norm": 66.43006134033203,
+ "learning_rate": 7.238095238095238e-06,
+ "loss": 0.1209,
+ "step": 7610
+ },
+ {
+ "epoch": 43.49142857142857,
+ "grad_norm": 47.76883316040039,
+ "learning_rate": 7.231746031746032e-06,
+ "loss": 0.1227,
+ "step": 7611
+ },
+ {
+ "epoch": 43.497142857142855,
+ "grad_norm": 47.75768280029297,
+ "learning_rate": 7.225396825396826e-06,
+ "loss": 0.1494,
+ "step": 7612
+ },
+ {
+ "epoch": 43.502857142857145,
+ "grad_norm": 26.707481384277344,
+ "learning_rate": 7.21904761904762e-06,
+ "loss": 0.1576,
+ "step": 7613
+ },
+ {
+ "epoch": 43.50857142857143,
+ "grad_norm": 49.4616813659668,
+ "learning_rate": 7.212698412698413e-06,
+ "loss": 0.173,
+ "step": 7614
+ },
+ {
+ "epoch": 43.51428571428571,
+ "grad_norm": 39.75261688232422,
+ "learning_rate": 7.206349206349207e-06,
+ "loss": 0.17,
+ "step": 7615
+ },
+ {
+ "epoch": 43.52,
+ "grad_norm": 19.144189834594727,
+ "learning_rate": 7.2e-06,
+ "loss": 0.1685,
+ "step": 7616
+ },
+ {
+ "epoch": 43.52571428571429,
+ "grad_norm": 61.75029754638672,
+ "learning_rate": 7.193650793650794e-06,
+ "loss": 0.1447,
+ "step": 7617
+ },
+ {
+ "epoch": 43.53142857142857,
+ "grad_norm": 16.760744094848633,
+ "learning_rate": 7.187301587301587e-06,
+ "loss": 0.1048,
+ "step": 7618
+ },
+ {
+ "epoch": 43.537142857142854,
+ "grad_norm": 18.882497787475586,
+ "learning_rate": 7.180952380952381e-06,
+ "loss": 0.197,
+ "step": 7619
+ },
+ {
+ "epoch": 43.542857142857144,
+ "grad_norm": 38.44529724121094,
+ "learning_rate": 7.174603174603175e-06,
+ "loss": 0.1801,
+ "step": 7620
+ },
+ {
+ "epoch": 43.54857142857143,
+ "grad_norm": 114.03311157226562,
+ "learning_rate": 7.168253968253969e-06,
+ "loss": 0.146,
+ "step": 7621
+ },
+ {
+ "epoch": 43.55428571428571,
+ "grad_norm": 90.95785522460938,
+ "learning_rate": 7.161904761904763e-06,
+ "loss": 0.1437,
+ "step": 7622
+ },
+ {
+ "epoch": 43.56,
+ "grad_norm": 30.148263931274414,
+ "learning_rate": 7.155555555555556e-06,
+ "loss": 0.1549,
+ "step": 7623
+ },
+ {
+ "epoch": 43.565714285714286,
+ "grad_norm": 56.53114318847656,
+ "learning_rate": 7.14920634920635e-06,
+ "loss": 0.2126,
+ "step": 7624
+ },
+ {
+ "epoch": 43.57142857142857,
+ "grad_norm": 86.12628173828125,
+ "learning_rate": 7.142857142857143e-06,
+ "loss": 0.1507,
+ "step": 7625
+ },
+ {
+ "epoch": 43.57714285714286,
+ "grad_norm": 26.091625213623047,
+ "learning_rate": 7.136507936507937e-06,
+ "loss": 0.1571,
+ "step": 7626
+ },
+ {
+ "epoch": 43.582857142857144,
+ "grad_norm": 30.037315368652344,
+ "learning_rate": 7.13015873015873e-06,
+ "loss": 0.1186,
+ "step": 7627
+ },
+ {
+ "epoch": 43.58857142857143,
+ "grad_norm": 33.91983413696289,
+ "learning_rate": 7.123809523809524e-06,
+ "loss": 0.1662,
+ "step": 7628
+ },
+ {
+ "epoch": 43.59428571428572,
+ "grad_norm": 58.2686653137207,
+ "learning_rate": 7.1174603174603175e-06,
+ "loss": 0.1438,
+ "step": 7629
+ },
+ {
+ "epoch": 43.6,
+ "grad_norm": 24.259056091308594,
+ "learning_rate": 7.111111111111112e-06,
+ "loss": 0.1747,
+ "step": 7630
+ },
+ {
+ "epoch": 43.605714285714285,
+ "grad_norm": 38.988319396972656,
+ "learning_rate": 7.104761904761905e-06,
+ "loss": 0.3159,
+ "step": 7631
+ },
+ {
+ "epoch": 43.61142857142857,
+ "grad_norm": 39.77925491333008,
+ "learning_rate": 7.098412698412699e-06,
+ "loss": 0.1578,
+ "step": 7632
+ },
+ {
+ "epoch": 43.61714285714286,
+ "grad_norm": 29.86747932434082,
+ "learning_rate": 7.092063492063493e-06,
+ "loss": 0.1524,
+ "step": 7633
+ },
+ {
+ "epoch": 43.62285714285714,
+ "grad_norm": 34.30352020263672,
+ "learning_rate": 7.085714285714286e-06,
+ "loss": 0.1373,
+ "step": 7634
+ },
+ {
+ "epoch": 43.628571428571426,
+ "grad_norm": 21.58470916748047,
+ "learning_rate": 7.07936507936508e-06,
+ "loss": 0.1218,
+ "step": 7635
+ },
+ {
+ "epoch": 43.63428571428572,
+ "grad_norm": 50.943058013916016,
+ "learning_rate": 7.073015873015873e-06,
+ "loss": 0.1302,
+ "step": 7636
+ },
+ {
+ "epoch": 43.64,
+ "grad_norm": 21.63199234008789,
+ "learning_rate": 7.066666666666667e-06,
+ "loss": 0.1617,
+ "step": 7637
+ },
+ {
+ "epoch": 43.645714285714284,
+ "grad_norm": 20.79852867126465,
+ "learning_rate": 7.0603174603174605e-06,
+ "loss": 0.1723,
+ "step": 7638
+ },
+ {
+ "epoch": 43.651428571428575,
+ "grad_norm": 30.47551155090332,
+ "learning_rate": 7.053968253968255e-06,
+ "loss": 0.144,
+ "step": 7639
+ },
+ {
+ "epoch": 43.65714285714286,
+ "grad_norm": 16.609371185302734,
+ "learning_rate": 7.0476190476190475e-06,
+ "loss": 0.2066,
+ "step": 7640
+ },
+ {
+ "epoch": 43.66285714285714,
+ "grad_norm": 47.40229415893555,
+ "learning_rate": 7.041269841269842e-06,
+ "loss": 0.1788,
+ "step": 7641
+ },
+ {
+ "epoch": 43.668571428571425,
+ "grad_norm": 32.97322463989258,
+ "learning_rate": 7.034920634920636e-06,
+ "loss": 0.1435,
+ "step": 7642
+ },
+ {
+ "epoch": 43.674285714285716,
+ "grad_norm": 36.80225372314453,
+ "learning_rate": 7.028571428571429e-06,
+ "loss": 0.145,
+ "step": 7643
+ },
+ {
+ "epoch": 43.68,
+ "grad_norm": 27.588375091552734,
+ "learning_rate": 7.022222222222223e-06,
+ "loss": 0.1489,
+ "step": 7644
+ },
+ {
+ "epoch": 43.68571428571428,
+ "grad_norm": 49.63401794433594,
+ "learning_rate": 7.015873015873016e-06,
+ "loss": 0.1575,
+ "step": 7645
+ },
+ {
+ "epoch": 43.691428571428574,
+ "grad_norm": 49.40351104736328,
+ "learning_rate": 7.00952380952381e-06,
+ "loss": 0.1875,
+ "step": 7646
+ },
+ {
+ "epoch": 43.69714285714286,
+ "grad_norm": 43.086910247802734,
+ "learning_rate": 7.003174603174604e-06,
+ "loss": 0.1205,
+ "step": 7647
+ },
+ {
+ "epoch": 43.70285714285714,
+ "grad_norm": 29.192461013793945,
+ "learning_rate": 6.996825396825397e-06,
+ "loss": 0.1581,
+ "step": 7648
+ },
+ {
+ "epoch": 43.70857142857143,
+ "grad_norm": 42.49656295776367,
+ "learning_rate": 6.9904761904761905e-06,
+ "loss": 0.1362,
+ "step": 7649
+ },
+ {
+ "epoch": 43.714285714285715,
+ "grad_norm": 46.579830169677734,
+ "learning_rate": 6.984126984126985e-06,
+ "loss": 0.1742,
+ "step": 7650
+ },
+ {
+ "epoch": 43.72,
+ "grad_norm": 960.8983154296875,
+ "learning_rate": 6.9777777777777775e-06,
+ "loss": 0.1848,
+ "step": 7651
+ },
+ {
+ "epoch": 43.72571428571428,
+ "grad_norm": 70.21526336669922,
+ "learning_rate": 6.971428571428572e-06,
+ "loss": 0.1467,
+ "step": 7652
+ },
+ {
+ "epoch": 43.73142857142857,
+ "grad_norm": 13.356405258178711,
+ "learning_rate": 6.965079365079366e-06,
+ "loss": 0.2087,
+ "step": 7653
+ },
+ {
+ "epoch": 43.73714285714286,
+ "grad_norm": 18.8299503326416,
+ "learning_rate": 6.958730158730159e-06,
+ "loss": 0.1509,
+ "step": 7654
+ },
+ {
+ "epoch": 43.74285714285714,
+ "grad_norm": 48.8301887512207,
+ "learning_rate": 6.952380952380953e-06,
+ "loss": 0.1093,
+ "step": 7655
+ },
+ {
+ "epoch": 43.74857142857143,
+ "grad_norm": 33.69208908081055,
+ "learning_rate": 6.946031746031746e-06,
+ "loss": 0.1604,
+ "step": 7656
+ },
+ {
+ "epoch": 43.754285714285714,
+ "grad_norm": 39.09320068359375,
+ "learning_rate": 6.93968253968254e-06,
+ "loss": 0.1591,
+ "step": 7657
+ },
+ {
+ "epoch": 43.76,
+ "grad_norm": 54.30573654174805,
+ "learning_rate": 6.933333333333334e-06,
+ "loss": 0.1459,
+ "step": 7658
+ },
+ {
+ "epoch": 43.76571428571429,
+ "grad_norm": 43.04143142700195,
+ "learning_rate": 6.926984126984128e-06,
+ "loss": 0.1916,
+ "step": 7659
+ },
+ {
+ "epoch": 43.77142857142857,
+ "grad_norm": 69.95057678222656,
+ "learning_rate": 6.9206349206349206e-06,
+ "loss": 0.1872,
+ "step": 7660
+ },
+ {
+ "epoch": 43.777142857142856,
+ "grad_norm": 32.16206741333008,
+ "learning_rate": 6.914285714285715e-06,
+ "loss": 0.1827,
+ "step": 7661
+ },
+ {
+ "epoch": 43.78285714285714,
+ "grad_norm": 94.184814453125,
+ "learning_rate": 6.9079365079365075e-06,
+ "loss": 0.1363,
+ "step": 7662
+ },
+ {
+ "epoch": 43.78857142857143,
+ "grad_norm": 28.984830856323242,
+ "learning_rate": 6.901587301587302e-06,
+ "loss": 0.1286,
+ "step": 7663
+ },
+ {
+ "epoch": 43.794285714285714,
+ "grad_norm": 59.8381462097168,
+ "learning_rate": 6.8952380952380945e-06,
+ "loss": 0.1945,
+ "step": 7664
+ },
+ {
+ "epoch": 43.8,
+ "grad_norm": 54.81769943237305,
+ "learning_rate": 6.888888888888889e-06,
+ "loss": 0.1399,
+ "step": 7665
+ },
+ {
+ "epoch": 43.80571428571429,
+ "grad_norm": 25.788875579833984,
+ "learning_rate": 6.882539682539683e-06,
+ "loss": 0.2116,
+ "step": 7666
+ },
+ {
+ "epoch": 43.81142857142857,
+ "grad_norm": 51.8724365234375,
+ "learning_rate": 6.876190476190477e-06,
+ "loss": 0.1393,
+ "step": 7667
+ },
+ {
+ "epoch": 43.817142857142855,
+ "grad_norm": 21.43828582763672,
+ "learning_rate": 6.869841269841271e-06,
+ "loss": 0.155,
+ "step": 7668
+ },
+ {
+ "epoch": 43.822857142857146,
+ "grad_norm": 41.8471794128418,
+ "learning_rate": 6.863492063492064e-06,
+ "loss": 0.1177,
+ "step": 7669
+ },
+ {
+ "epoch": 43.82857142857143,
+ "grad_norm": 51.57056427001953,
+ "learning_rate": 6.857142857142858e-06,
+ "loss": 0.1241,
+ "step": 7670
+ },
+ {
+ "epoch": 43.83428571428571,
+ "grad_norm": 46.511741638183594,
+ "learning_rate": 6.8507936507936506e-06,
+ "loss": 0.1043,
+ "step": 7671
+ },
+ {
+ "epoch": 43.84,
+ "grad_norm": 88.49575805664062,
+ "learning_rate": 6.844444444444445e-06,
+ "loss": 0.1316,
+ "step": 7672
+ },
+ {
+ "epoch": 43.84571428571429,
+ "grad_norm": 36.16876220703125,
+ "learning_rate": 6.8380952380952375e-06,
+ "loss": 0.1327,
+ "step": 7673
+ },
+ {
+ "epoch": 43.85142857142857,
+ "grad_norm": 50.89331817626953,
+ "learning_rate": 6.831746031746032e-06,
+ "loss": 0.2681,
+ "step": 7674
+ },
+ {
+ "epoch": 43.857142857142854,
+ "grad_norm": 44.32616424560547,
+ "learning_rate": 6.825396825396825e-06,
+ "loss": 0.1589,
+ "step": 7675
+ },
+ {
+ "epoch": 43.862857142857145,
+ "grad_norm": 45.69709396362305,
+ "learning_rate": 6.81904761904762e-06,
+ "loss": 0.1618,
+ "step": 7676
+ },
+ {
+ "epoch": 43.86857142857143,
+ "grad_norm": 831.8772583007812,
+ "learning_rate": 6.812698412698414e-06,
+ "loss": 0.2403,
+ "step": 7677
+ },
+ {
+ "epoch": 43.87428571428571,
+ "grad_norm": 50.53889465332031,
+ "learning_rate": 6.806349206349207e-06,
+ "loss": 0.1664,
+ "step": 7678
+ },
+ {
+ "epoch": 43.88,
+ "grad_norm": 33.22833251953125,
+ "learning_rate": 6.800000000000001e-06,
+ "loss": 0.126,
+ "step": 7679
+ },
+ {
+ "epoch": 43.885714285714286,
+ "grad_norm": 50.8762321472168,
+ "learning_rate": 6.793650793650794e-06,
+ "loss": 0.1478,
+ "step": 7680
+ },
+ {
+ "epoch": 43.89142857142857,
+ "grad_norm": 57.65444564819336,
+ "learning_rate": 6.787301587301588e-06,
+ "loss": 0.1661,
+ "step": 7681
+ },
+ {
+ "epoch": 43.89714285714286,
+ "grad_norm": 64.80712890625,
+ "learning_rate": 6.7809523809523806e-06,
+ "loss": 0.1689,
+ "step": 7682
+ },
+ {
+ "epoch": 43.902857142857144,
+ "grad_norm": 43.664512634277344,
+ "learning_rate": 6.774603174603175e-06,
+ "loss": 0.2021,
+ "step": 7683
+ },
+ {
+ "epoch": 43.90857142857143,
+ "grad_norm": 65.74862670898438,
+ "learning_rate": 6.768253968253968e-06,
+ "loss": 0.1362,
+ "step": 7684
+ },
+ {
+ "epoch": 43.91428571428571,
+ "grad_norm": 34.35681915283203,
+ "learning_rate": 6.761904761904763e-06,
+ "loss": 0.1945,
+ "step": 7685
+ },
+ {
+ "epoch": 43.92,
+ "grad_norm": 34.14244079589844,
+ "learning_rate": 6.755555555555555e-06,
+ "loss": 0.0913,
+ "step": 7686
+ },
+ {
+ "epoch": 43.925714285714285,
+ "grad_norm": 42.42035675048828,
+ "learning_rate": 6.74920634920635e-06,
+ "loss": 0.1462,
+ "step": 7687
+ },
+ {
+ "epoch": 43.93142857142857,
+ "grad_norm": 74.27864837646484,
+ "learning_rate": 6.742857142857144e-06,
+ "loss": 0.2258,
+ "step": 7688
+ },
+ {
+ "epoch": 43.93714285714286,
+ "grad_norm": 64.61466217041016,
+ "learning_rate": 6.736507936507937e-06,
+ "loss": 0.1289,
+ "step": 7689
+ },
+ {
+ "epoch": 43.94285714285714,
+ "grad_norm": 50.40987014770508,
+ "learning_rate": 6.730158730158731e-06,
+ "loss": 0.1687,
+ "step": 7690
+ },
+ {
+ "epoch": 43.94857142857143,
+ "grad_norm": 110.01002502441406,
+ "learning_rate": 6.723809523809524e-06,
+ "loss": 0.1591,
+ "step": 7691
+ },
+ {
+ "epoch": 43.95428571428572,
+ "grad_norm": 58.86008071899414,
+ "learning_rate": 6.717460317460318e-06,
+ "loss": 0.1716,
+ "step": 7692
+ },
+ {
+ "epoch": 43.96,
+ "grad_norm": 54.54946517944336,
+ "learning_rate": 6.711111111111111e-06,
+ "loss": 0.1859,
+ "step": 7693
+ },
+ {
+ "epoch": 43.965714285714284,
+ "grad_norm": 209.70298767089844,
+ "learning_rate": 6.704761904761906e-06,
+ "loss": 0.1432,
+ "step": 7694
+ },
+ {
+ "epoch": 43.97142857142857,
+ "grad_norm": 33.95160675048828,
+ "learning_rate": 6.698412698412698e-06,
+ "loss": 0.1285,
+ "step": 7695
+ },
+ {
+ "epoch": 43.97714285714286,
+ "grad_norm": 32.52368927001953,
+ "learning_rate": 6.692063492063493e-06,
+ "loss": 0.155,
+ "step": 7696
+ },
+ {
+ "epoch": 43.98285714285714,
+ "grad_norm": 53.18463134765625,
+ "learning_rate": 6.685714285714285e-06,
+ "loss": 0.2814,
+ "step": 7697
+ },
+ {
+ "epoch": 43.988571428571426,
+ "grad_norm": 494.98291015625,
+ "learning_rate": 6.67936507936508e-06,
+ "loss": 0.1706,
+ "step": 7698
+ },
+ {
+ "epoch": 43.994285714285716,
+ "grad_norm": 43.096839904785156,
+ "learning_rate": 6.673015873015874e-06,
+ "loss": 0.1665,
+ "step": 7699
+ },
+ {
+ "epoch": 44.0,
+ "grad_norm": 48.978614807128906,
+ "learning_rate": 6.666666666666667e-06,
+ "loss": 0.1927,
+ "step": 7700
+ },
+ {
+ "epoch": 44.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5757384300231934,
+ "eval_map": 0.9387,
+ "eval_map_50": 0.9684,
+ "eval_map_75": 0.9649,
+ "eval_map_large": 0.9387,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9387,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7902,
+ "eval_mar_10": 0.9749,
+ "eval_mar_100": 0.9749,
+ "eval_mar_100_per_class": 0.9749,
+ "eval_mar_large": 0.9749,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.311,
+ "eval_samples_per_second": 22.087,
+ "eval_steps_per_second": 2.78,
+ "step": 7700
+ },
+ {
+ "epoch": 44.005714285714284,
+ "grad_norm": 59.333335876464844,
+ "learning_rate": 6.660317460317461e-06,
+ "loss": 0.1075,
+ "step": 7701
+ },
+ {
+ "epoch": 44.011428571428574,
+ "grad_norm": 34.29730987548828,
+ "learning_rate": 6.6539682539682545e-06,
+ "loss": 0.1525,
+ "step": 7702
+ },
+ {
+ "epoch": 44.01714285714286,
+ "grad_norm": 286.7693786621094,
+ "learning_rate": 6.647619047619048e-06,
+ "loss": 0.1714,
+ "step": 7703
+ },
+ {
+ "epoch": 44.02285714285714,
+ "grad_norm": 65.73849487304688,
+ "learning_rate": 6.6412698412698414e-06,
+ "loss": 0.1158,
+ "step": 7704
+ },
+ {
+ "epoch": 44.02857142857143,
+ "grad_norm": 36.001380920410156,
+ "learning_rate": 6.634920634920636e-06,
+ "loss": 0.2047,
+ "step": 7705
+ },
+ {
+ "epoch": 44.034285714285716,
+ "grad_norm": 41.00141143798828,
+ "learning_rate": 6.628571428571428e-06,
+ "loss": 0.1346,
+ "step": 7706
+ },
+ {
+ "epoch": 44.04,
+ "grad_norm": 39.80067825317383,
+ "learning_rate": 6.622222222222223e-06,
+ "loss": 0.1475,
+ "step": 7707
+ },
+ {
+ "epoch": 44.04571428571428,
+ "grad_norm": 31.52075958251953,
+ "learning_rate": 6.615873015873015e-06,
+ "loss": 0.1168,
+ "step": 7708
+ },
+ {
+ "epoch": 44.05142857142857,
+ "grad_norm": 45.78147506713867,
+ "learning_rate": 6.60952380952381e-06,
+ "loss": 0.1976,
+ "step": 7709
+ },
+ {
+ "epoch": 44.05714285714286,
+ "grad_norm": 38.48584747314453,
+ "learning_rate": 6.603174603174604e-06,
+ "loss": 0.2292,
+ "step": 7710
+ },
+ {
+ "epoch": 44.06285714285714,
+ "grad_norm": 48.30115509033203,
+ "learning_rate": 6.596825396825397e-06,
+ "loss": 0.1574,
+ "step": 7711
+ },
+ {
+ "epoch": 44.06857142857143,
+ "grad_norm": 34.71587371826172,
+ "learning_rate": 6.590476190476191e-06,
+ "loss": 0.1413,
+ "step": 7712
+ },
+ {
+ "epoch": 44.074285714285715,
+ "grad_norm": 26.006423950195312,
+ "learning_rate": 6.5841269841269845e-06,
+ "loss": 0.1238,
+ "step": 7713
+ },
+ {
+ "epoch": 44.08,
+ "grad_norm": 58.09585952758789,
+ "learning_rate": 6.577777777777779e-06,
+ "loss": 0.1434,
+ "step": 7714
+ },
+ {
+ "epoch": 44.08571428571429,
+ "grad_norm": 53.749900817871094,
+ "learning_rate": 6.5714285714285714e-06,
+ "loss": 0.2007,
+ "step": 7715
+ },
+ {
+ "epoch": 44.09142857142857,
+ "grad_norm": 64.02667999267578,
+ "learning_rate": 6.565079365079366e-06,
+ "loss": 0.1173,
+ "step": 7716
+ },
+ {
+ "epoch": 44.097142857142856,
+ "grad_norm": 19.437864303588867,
+ "learning_rate": 6.558730158730158e-06,
+ "loss": 0.1861,
+ "step": 7717
+ },
+ {
+ "epoch": 44.10285714285714,
+ "grad_norm": 41.478515625,
+ "learning_rate": 6.552380952380953e-06,
+ "loss": 0.1524,
+ "step": 7718
+ },
+ {
+ "epoch": 44.10857142857143,
+ "grad_norm": 24.171451568603516,
+ "learning_rate": 6.546031746031746e-06,
+ "loss": 0.1573,
+ "step": 7719
+ },
+ {
+ "epoch": 44.114285714285714,
+ "grad_norm": 24.408952713012695,
+ "learning_rate": 6.53968253968254e-06,
+ "loss": 0.118,
+ "step": 7720
+ },
+ {
+ "epoch": 44.12,
+ "grad_norm": 48.23006820678711,
+ "learning_rate": 6.533333333333333e-06,
+ "loss": 0.1784,
+ "step": 7721
+ },
+ {
+ "epoch": 44.12571428571429,
+ "grad_norm": 33.18339157104492,
+ "learning_rate": 6.5269841269841275e-06,
+ "loss": 0.1202,
+ "step": 7722
+ },
+ {
+ "epoch": 44.13142857142857,
+ "grad_norm": 47.897403717041016,
+ "learning_rate": 6.520634920634922e-06,
+ "loss": 0.0878,
+ "step": 7723
+ },
+ {
+ "epoch": 44.137142857142855,
+ "grad_norm": 66.20220184326172,
+ "learning_rate": 6.5142857142857145e-06,
+ "loss": 0.1532,
+ "step": 7724
+ },
+ {
+ "epoch": 44.142857142857146,
+ "grad_norm": 22.750667572021484,
+ "learning_rate": 6.507936507936509e-06,
+ "loss": 0.1685,
+ "step": 7725
+ },
+ {
+ "epoch": 44.14857142857143,
+ "grad_norm": 27.192466735839844,
+ "learning_rate": 6.5015873015873014e-06,
+ "loss": 0.1222,
+ "step": 7726
+ },
+ {
+ "epoch": 44.15428571428571,
+ "grad_norm": 37.51421356201172,
+ "learning_rate": 6.495238095238096e-06,
+ "loss": 0.1223,
+ "step": 7727
+ },
+ {
+ "epoch": 44.16,
+ "grad_norm": 76.6365737915039,
+ "learning_rate": 6.488888888888888e-06,
+ "loss": 0.1615,
+ "step": 7728
+ },
+ {
+ "epoch": 44.16571428571429,
+ "grad_norm": 64.56356811523438,
+ "learning_rate": 6.482539682539683e-06,
+ "loss": 0.1891,
+ "step": 7729
+ },
+ {
+ "epoch": 44.17142857142857,
+ "grad_norm": 66.12844848632812,
+ "learning_rate": 6.476190476190476e-06,
+ "loss": 0.1378,
+ "step": 7730
+ },
+ {
+ "epoch": 44.177142857142854,
+ "grad_norm": 56.333126068115234,
+ "learning_rate": 6.4698412698412706e-06,
+ "loss": 0.1402,
+ "step": 7731
+ },
+ {
+ "epoch": 44.182857142857145,
+ "grad_norm": 64.17578887939453,
+ "learning_rate": 6.463492063492063e-06,
+ "loss": 0.1244,
+ "step": 7732
+ },
+ {
+ "epoch": 44.18857142857143,
+ "grad_norm": 50.45033645629883,
+ "learning_rate": 6.4571428571428575e-06,
+ "loss": 0.1571,
+ "step": 7733
+ },
+ {
+ "epoch": 44.19428571428571,
+ "grad_norm": 64.60138702392578,
+ "learning_rate": 6.450793650793652e-06,
+ "loss": 0.1621,
+ "step": 7734
+ },
+ {
+ "epoch": 44.2,
+ "grad_norm": 59.69667434692383,
+ "learning_rate": 6.4444444444444445e-06,
+ "loss": 0.2149,
+ "step": 7735
+ },
+ {
+ "epoch": 44.205714285714286,
+ "grad_norm": 52.08457946777344,
+ "learning_rate": 6.438095238095239e-06,
+ "loss": 0.0926,
+ "step": 7736
+ },
+ {
+ "epoch": 44.21142857142857,
+ "grad_norm": 95.57523345947266,
+ "learning_rate": 6.4317460317460314e-06,
+ "loss": 0.1676,
+ "step": 7737
+ },
+ {
+ "epoch": 44.21714285714286,
+ "grad_norm": 32.85334777832031,
+ "learning_rate": 6.425396825396826e-06,
+ "loss": 0.1463,
+ "step": 7738
+ },
+ {
+ "epoch": 44.222857142857144,
+ "grad_norm": 39.23460006713867,
+ "learning_rate": 6.419047619047619e-06,
+ "loss": 0.1318,
+ "step": 7739
+ },
+ {
+ "epoch": 44.22857142857143,
+ "grad_norm": 51.70003890991211,
+ "learning_rate": 6.412698412698414e-06,
+ "loss": 0.1424,
+ "step": 7740
+ },
+ {
+ "epoch": 44.23428571428571,
+ "grad_norm": 85.29315185546875,
+ "learning_rate": 6.406349206349206e-06,
+ "loss": 0.1678,
+ "step": 7741
+ },
+ {
+ "epoch": 44.24,
+ "grad_norm": 109.13520812988281,
+ "learning_rate": 6.4000000000000006e-06,
+ "loss": 0.1376,
+ "step": 7742
+ },
+ {
+ "epoch": 44.245714285714286,
+ "grad_norm": 24.532550811767578,
+ "learning_rate": 6.393650793650793e-06,
+ "loss": 0.1545,
+ "step": 7743
+ },
+ {
+ "epoch": 44.25142857142857,
+ "grad_norm": 671.1575317382812,
+ "learning_rate": 6.3873015873015875e-06,
+ "loss": 0.1926,
+ "step": 7744
+ },
+ {
+ "epoch": 44.25714285714286,
+ "grad_norm": 44.302879333496094,
+ "learning_rate": 6.380952380952382e-06,
+ "loss": 0.1135,
+ "step": 7745
+ },
+ {
+ "epoch": 44.26285714285714,
+ "grad_norm": 55.46490478515625,
+ "learning_rate": 6.3746031746031745e-06,
+ "loss": 0.1392,
+ "step": 7746
+ },
+ {
+ "epoch": 44.26857142857143,
+ "grad_norm": 51.76401901245117,
+ "learning_rate": 6.368253968253969e-06,
+ "loss": 0.1314,
+ "step": 7747
+ },
+ {
+ "epoch": 44.27428571428572,
+ "grad_norm": 49.445411682128906,
+ "learning_rate": 6.361904761904762e-06,
+ "loss": 0.1592,
+ "step": 7748
+ },
+ {
+ "epoch": 44.28,
+ "grad_norm": 37.76740264892578,
+ "learning_rate": 6.355555555555557e-06,
+ "loss": 0.0979,
+ "step": 7749
+ },
+ {
+ "epoch": 44.285714285714285,
+ "grad_norm": 40.30945587158203,
+ "learning_rate": 6.349206349206349e-06,
+ "loss": 0.0989,
+ "step": 7750
+ },
+ {
+ "epoch": 44.29142857142857,
+ "grad_norm": 35.8820686340332,
+ "learning_rate": 6.342857142857144e-06,
+ "loss": 0.1374,
+ "step": 7751
+ },
+ {
+ "epoch": 44.29714285714286,
+ "grad_norm": 59.061431884765625,
+ "learning_rate": 6.336507936507936e-06,
+ "loss": 0.1363,
+ "step": 7752
+ },
+ {
+ "epoch": 44.30285714285714,
+ "grad_norm": 126.11488342285156,
+ "learning_rate": 6.3301587301587306e-06,
+ "loss": 0.154,
+ "step": 7753
+ },
+ {
+ "epoch": 44.308571428571426,
+ "grad_norm": 29.980083465576172,
+ "learning_rate": 6.323809523809523e-06,
+ "loss": 0.1223,
+ "step": 7754
+ },
+ {
+ "epoch": 44.31428571428572,
+ "grad_norm": 129.24725341796875,
+ "learning_rate": 6.3174603174603175e-06,
+ "loss": 0.1297,
+ "step": 7755
+ },
+ {
+ "epoch": 44.32,
+ "grad_norm": 47.06840133666992,
+ "learning_rate": 6.311111111111112e-06,
+ "loss": 0.1736,
+ "step": 7756
+ },
+ {
+ "epoch": 44.325714285714284,
+ "grad_norm": 760.0050048828125,
+ "learning_rate": 6.304761904761905e-06,
+ "loss": 0.1587,
+ "step": 7757
+ },
+ {
+ "epoch": 44.331428571428575,
+ "grad_norm": 34.820648193359375,
+ "learning_rate": 6.2984126984127e-06,
+ "loss": 0.1292,
+ "step": 7758
+ },
+ {
+ "epoch": 44.33714285714286,
+ "grad_norm": 29.063579559326172,
+ "learning_rate": 6.292063492063492e-06,
+ "loss": 0.1625,
+ "step": 7759
+ },
+ {
+ "epoch": 44.34285714285714,
+ "grad_norm": 432.6778564453125,
+ "learning_rate": 6.285714285714287e-06,
+ "loss": 0.1781,
+ "step": 7760
+ },
+ {
+ "epoch": 44.348571428571425,
+ "grad_norm": 25.651865005493164,
+ "learning_rate": 6.279365079365079e-06,
+ "loss": 0.126,
+ "step": 7761
+ },
+ {
+ "epoch": 44.354285714285716,
+ "grad_norm": 34.330894470214844,
+ "learning_rate": 6.273015873015874e-06,
+ "loss": 0.1425,
+ "step": 7762
+ },
+ {
+ "epoch": 44.36,
+ "grad_norm": 860.991943359375,
+ "learning_rate": 6.266666666666666e-06,
+ "loss": 0.2029,
+ "step": 7763
+ },
+ {
+ "epoch": 44.36571428571428,
+ "grad_norm": 33.004478454589844,
+ "learning_rate": 6.2603174603174606e-06,
+ "loss": 0.1603,
+ "step": 7764
+ },
+ {
+ "epoch": 44.371428571428574,
+ "grad_norm": 35.191619873046875,
+ "learning_rate": 6.253968253968254e-06,
+ "loss": 0.1652,
+ "step": 7765
+ },
+ {
+ "epoch": 44.37714285714286,
+ "grad_norm": 34.987613677978516,
+ "learning_rate": 6.247619047619048e-06,
+ "loss": 0.1469,
+ "step": 7766
+ },
+ {
+ "epoch": 44.38285714285714,
+ "grad_norm": 48.29430389404297,
+ "learning_rate": 6.241269841269842e-06,
+ "loss": 0.0925,
+ "step": 7767
+ },
+ {
+ "epoch": 44.38857142857143,
+ "grad_norm": 44.30611801147461,
+ "learning_rate": 6.234920634920635e-06,
+ "loss": 0.1699,
+ "step": 7768
+ },
+ {
+ "epoch": 44.394285714285715,
+ "grad_norm": 37.219600677490234,
+ "learning_rate": 6.228571428571429e-06,
+ "loss": 0.1443,
+ "step": 7769
+ },
+ {
+ "epoch": 44.4,
+ "grad_norm": 54.558162689208984,
+ "learning_rate": 6.222222222222222e-06,
+ "loss": 0.1487,
+ "step": 7770
+ },
+ {
+ "epoch": 44.40571428571428,
+ "grad_norm": 96.38822937011719,
+ "learning_rate": 6.215873015873016e-06,
+ "loss": 0.1501,
+ "step": 7771
+ },
+ {
+ "epoch": 44.41142857142857,
+ "grad_norm": 20.549890518188477,
+ "learning_rate": 6.209523809523809e-06,
+ "loss": 0.1393,
+ "step": 7772
+ },
+ {
+ "epoch": 44.417142857142856,
+ "grad_norm": 22.02899742126465,
+ "learning_rate": 6.203174603174604e-06,
+ "loss": 0.1303,
+ "step": 7773
+ },
+ {
+ "epoch": 44.42285714285714,
+ "grad_norm": 82.55475616455078,
+ "learning_rate": 6.196825396825397e-06,
+ "loss": 0.149,
+ "step": 7774
+ },
+ {
+ "epoch": 44.42857142857143,
+ "grad_norm": 47.80804443359375,
+ "learning_rate": 6.190476190476191e-06,
+ "loss": 0.1799,
+ "step": 7775
+ },
+ {
+ "epoch": 44.434285714285714,
+ "grad_norm": 75.9185562133789,
+ "learning_rate": 6.184126984126985e-06,
+ "loss": 0.193,
+ "step": 7776
+ },
+ {
+ "epoch": 44.44,
+ "grad_norm": 45.21895217895508,
+ "learning_rate": 6.177777777777778e-06,
+ "loss": 0.1714,
+ "step": 7777
+ },
+ {
+ "epoch": 44.44571428571429,
+ "grad_norm": 23.10559844970703,
+ "learning_rate": 6.171428571428572e-06,
+ "loss": 0.1295,
+ "step": 7778
+ },
+ {
+ "epoch": 44.45142857142857,
+ "grad_norm": 55.595394134521484,
+ "learning_rate": 6.165079365079365e-06,
+ "loss": 0.1614,
+ "step": 7779
+ },
+ {
+ "epoch": 44.457142857142856,
+ "grad_norm": 47.207794189453125,
+ "learning_rate": 6.158730158730159e-06,
+ "loss": 0.1374,
+ "step": 7780
+ },
+ {
+ "epoch": 44.462857142857146,
+ "grad_norm": 34.826934814453125,
+ "learning_rate": 6.152380952380952e-06,
+ "loss": 0.1263,
+ "step": 7781
+ },
+ {
+ "epoch": 44.46857142857143,
+ "grad_norm": 163.04493713378906,
+ "learning_rate": 6.146031746031746e-06,
+ "loss": 0.1609,
+ "step": 7782
+ },
+ {
+ "epoch": 44.47428571428571,
+ "grad_norm": 67.82376861572266,
+ "learning_rate": 6.139682539682539e-06,
+ "loss": 0.1474,
+ "step": 7783
+ },
+ {
+ "epoch": 44.48,
+ "grad_norm": 37.07332992553711,
+ "learning_rate": 6.133333333333334e-06,
+ "loss": 0.1435,
+ "step": 7784
+ },
+ {
+ "epoch": 44.48571428571429,
+ "grad_norm": 54.88054275512695,
+ "learning_rate": 6.126984126984128e-06,
+ "loss": 0.1417,
+ "step": 7785
+ },
+ {
+ "epoch": 44.49142857142857,
+ "grad_norm": 29.38848114013672,
+ "learning_rate": 6.1206349206349214e-06,
+ "loss": 0.1092,
+ "step": 7786
+ },
+ {
+ "epoch": 44.497142857142855,
+ "grad_norm": 63.567604064941406,
+ "learning_rate": 6.114285714285715e-06,
+ "loss": 0.1559,
+ "step": 7787
+ },
+ {
+ "epoch": 44.502857142857145,
+ "grad_norm": 53.42649841308594,
+ "learning_rate": 6.107936507936508e-06,
+ "loss": 0.1857,
+ "step": 7788
+ },
+ {
+ "epoch": 44.50857142857143,
+ "grad_norm": 20.135610580444336,
+ "learning_rate": 6.101587301587302e-06,
+ "loss": 0.1098,
+ "step": 7789
+ },
+ {
+ "epoch": 44.51428571428571,
+ "grad_norm": 49.138065338134766,
+ "learning_rate": 6.095238095238095e-06,
+ "loss": 0.2962,
+ "step": 7790
+ },
+ {
+ "epoch": 44.52,
+ "grad_norm": 50.56275939941406,
+ "learning_rate": 6.088888888888889e-06,
+ "loss": 0.1249,
+ "step": 7791
+ },
+ {
+ "epoch": 44.52571428571429,
+ "grad_norm": 538.6000366210938,
+ "learning_rate": 6.082539682539682e-06,
+ "loss": 0.1384,
+ "step": 7792
+ },
+ {
+ "epoch": 44.53142857142857,
+ "grad_norm": 32.58999252319336,
+ "learning_rate": 6.076190476190477e-06,
+ "loss": 0.1256,
+ "step": 7793
+ },
+ {
+ "epoch": 44.537142857142854,
+ "grad_norm": 28.279216766357422,
+ "learning_rate": 6.06984126984127e-06,
+ "loss": 0.1275,
+ "step": 7794
+ },
+ {
+ "epoch": 44.542857142857144,
+ "grad_norm": 50.32058334350586,
+ "learning_rate": 6.063492063492064e-06,
+ "loss": 0.2252,
+ "step": 7795
+ },
+ {
+ "epoch": 44.54857142857143,
+ "grad_norm": 32.84303665161133,
+ "learning_rate": 6.057142857142858e-06,
+ "loss": 0.1643,
+ "step": 7796
+ },
+ {
+ "epoch": 44.55428571428571,
+ "grad_norm": 43.972381591796875,
+ "learning_rate": 6.0507936507936514e-06,
+ "loss": 0.1207,
+ "step": 7797
+ },
+ {
+ "epoch": 44.56,
+ "grad_norm": 31.350746154785156,
+ "learning_rate": 6.044444444444445e-06,
+ "loss": 0.1728,
+ "step": 7798
+ },
+ {
+ "epoch": 44.565714285714286,
+ "grad_norm": 57.16665267944336,
+ "learning_rate": 6.038095238095238e-06,
+ "loss": 0.0836,
+ "step": 7799
+ },
+ {
+ "epoch": 44.57142857142857,
+ "grad_norm": 24.479278564453125,
+ "learning_rate": 6.031746031746032e-06,
+ "loss": 0.122,
+ "step": 7800
+ },
+ {
+ "epoch": 44.57714285714286,
+ "grad_norm": 55.26851272583008,
+ "learning_rate": 6.025396825396825e-06,
+ "loss": 0.0994,
+ "step": 7801
+ },
+ {
+ "epoch": 44.582857142857144,
+ "grad_norm": 121.71125793457031,
+ "learning_rate": 6.01904761904762e-06,
+ "loss": 0.199,
+ "step": 7802
+ },
+ {
+ "epoch": 44.58857142857143,
+ "grad_norm": 133.99131774902344,
+ "learning_rate": 6.012698412698413e-06,
+ "loss": 0.1916,
+ "step": 7803
+ },
+ {
+ "epoch": 44.59428571428572,
+ "grad_norm": 44.67299270629883,
+ "learning_rate": 6.006349206349207e-06,
+ "loss": 0.2354,
+ "step": 7804
+ },
+ {
+ "epoch": 44.6,
+ "grad_norm": 90.81427001953125,
+ "learning_rate": 6e-06,
+ "loss": 0.1429,
+ "step": 7805
+ },
+ {
+ "epoch": 44.605714285714285,
+ "grad_norm": 26.58412742614746,
+ "learning_rate": 5.993650793650794e-06,
+ "loss": 0.137,
+ "step": 7806
+ },
+ {
+ "epoch": 44.61142857142857,
+ "grad_norm": 74.78943634033203,
+ "learning_rate": 5.987301587301587e-06,
+ "loss": 0.4208,
+ "step": 7807
+ },
+ {
+ "epoch": 44.61714285714286,
+ "grad_norm": 57.04520797729492,
+ "learning_rate": 5.9809523809523814e-06,
+ "loss": 0.1065,
+ "step": 7808
+ },
+ {
+ "epoch": 44.62285714285714,
+ "grad_norm": 34.23002243041992,
+ "learning_rate": 5.974603174603175e-06,
+ "loss": 0.1434,
+ "step": 7809
+ },
+ {
+ "epoch": 44.628571428571426,
+ "grad_norm": 2633.763671875,
+ "learning_rate": 5.968253968253968e-06,
+ "loss": 0.411,
+ "step": 7810
+ },
+ {
+ "epoch": 44.63428571428572,
+ "grad_norm": 53.742576599121094,
+ "learning_rate": 5.961904761904762e-06,
+ "loss": 0.13,
+ "step": 7811
+ },
+ {
+ "epoch": 44.64,
+ "grad_norm": 21.34613609313965,
+ "learning_rate": 5.955555555555556e-06,
+ "loss": 0.1278,
+ "step": 7812
+ },
+ {
+ "epoch": 44.645714285714284,
+ "grad_norm": 67.92548370361328,
+ "learning_rate": 5.94920634920635e-06,
+ "loss": 0.1736,
+ "step": 7813
+ },
+ {
+ "epoch": 44.651428571428575,
+ "grad_norm": 29.171289443969727,
+ "learning_rate": 5.942857142857143e-06,
+ "loss": 0.1587,
+ "step": 7814
+ },
+ {
+ "epoch": 44.65714285714286,
+ "grad_norm": 35.45895767211914,
+ "learning_rate": 5.936507936507937e-06,
+ "loss": 0.1653,
+ "step": 7815
+ },
+ {
+ "epoch": 44.66285714285714,
+ "grad_norm": 43.47918701171875,
+ "learning_rate": 5.93015873015873e-06,
+ "loss": 0.1434,
+ "step": 7816
+ },
+ {
+ "epoch": 44.668571428571425,
+ "grad_norm": 58.679508209228516,
+ "learning_rate": 5.923809523809524e-06,
+ "loss": 0.1682,
+ "step": 7817
+ },
+ {
+ "epoch": 44.674285714285716,
+ "grad_norm": 191.1954803466797,
+ "learning_rate": 5.917460317460317e-06,
+ "loss": 0.1325,
+ "step": 7818
+ },
+ {
+ "epoch": 44.68,
+ "grad_norm": 50.023590087890625,
+ "learning_rate": 5.9111111111111115e-06,
+ "loss": 0.1521,
+ "step": 7819
+ },
+ {
+ "epoch": 44.68571428571428,
+ "grad_norm": 51.93128967285156,
+ "learning_rate": 5.904761904761905e-06,
+ "loss": 0.2369,
+ "step": 7820
+ },
+ {
+ "epoch": 44.691428571428574,
+ "grad_norm": 39.430572509765625,
+ "learning_rate": 5.898412698412699e-06,
+ "loss": 0.1177,
+ "step": 7821
+ },
+ {
+ "epoch": 44.69714285714286,
+ "grad_norm": 13.830597877502441,
+ "learning_rate": 5.892063492063493e-06,
+ "loss": 0.1274,
+ "step": 7822
+ },
+ {
+ "epoch": 44.70285714285714,
+ "grad_norm": 76.25123596191406,
+ "learning_rate": 5.885714285714286e-06,
+ "loss": 0.1033,
+ "step": 7823
+ },
+ {
+ "epoch": 44.70857142857143,
+ "grad_norm": 60.84410095214844,
+ "learning_rate": 5.87936507936508e-06,
+ "loss": 0.1232,
+ "step": 7824
+ },
+ {
+ "epoch": 44.714285714285715,
+ "grad_norm": 41.37836456298828,
+ "learning_rate": 5.873015873015873e-06,
+ "loss": 0.1203,
+ "step": 7825
+ },
+ {
+ "epoch": 44.72,
+ "grad_norm": 33.750022888183594,
+ "learning_rate": 5.866666666666667e-06,
+ "loss": 0.1386,
+ "step": 7826
+ },
+ {
+ "epoch": 44.72571428571428,
+ "grad_norm": 33.331016540527344,
+ "learning_rate": 5.86031746031746e-06,
+ "loss": 0.1498,
+ "step": 7827
+ },
+ {
+ "epoch": 44.73142857142857,
+ "grad_norm": 41.63890075683594,
+ "learning_rate": 5.853968253968254e-06,
+ "loss": 0.2001,
+ "step": 7828
+ },
+ {
+ "epoch": 44.73714285714286,
+ "grad_norm": 29.633655548095703,
+ "learning_rate": 5.847619047619048e-06,
+ "loss": 0.1549,
+ "step": 7829
+ },
+ {
+ "epoch": 44.74285714285714,
+ "grad_norm": 31.78554344177246,
+ "learning_rate": 5.841269841269842e-06,
+ "loss": 0.192,
+ "step": 7830
+ },
+ {
+ "epoch": 44.74857142857143,
+ "grad_norm": 38.54855728149414,
+ "learning_rate": 5.834920634920636e-06,
+ "loss": 0.1658,
+ "step": 7831
+ },
+ {
+ "epoch": 44.754285714285714,
+ "grad_norm": 25.076963424682617,
+ "learning_rate": 5.828571428571429e-06,
+ "loss": 0.1816,
+ "step": 7832
+ },
+ {
+ "epoch": 44.76,
+ "grad_norm": 37.5577278137207,
+ "learning_rate": 5.822222222222223e-06,
+ "loss": 0.1466,
+ "step": 7833
+ },
+ {
+ "epoch": 44.76571428571429,
+ "grad_norm": 51.69167709350586,
+ "learning_rate": 5.815873015873016e-06,
+ "loss": 0.2645,
+ "step": 7834
+ },
+ {
+ "epoch": 44.77142857142857,
+ "grad_norm": 50.27036666870117,
+ "learning_rate": 5.80952380952381e-06,
+ "loss": 0.1469,
+ "step": 7835
+ },
+ {
+ "epoch": 44.777142857142856,
+ "grad_norm": 26.678077697753906,
+ "learning_rate": 5.803174603174603e-06,
+ "loss": 0.1616,
+ "step": 7836
+ },
+ {
+ "epoch": 44.78285714285714,
+ "grad_norm": 249.02993774414062,
+ "learning_rate": 5.796825396825397e-06,
+ "loss": 0.1815,
+ "step": 7837
+ },
+ {
+ "epoch": 44.78857142857143,
+ "grad_norm": 35.703819274902344,
+ "learning_rate": 5.790476190476191e-06,
+ "loss": 0.1801,
+ "step": 7838
+ },
+ {
+ "epoch": 44.794285714285714,
+ "grad_norm": 63.47907638549805,
+ "learning_rate": 5.7841269841269845e-06,
+ "loss": 0.142,
+ "step": 7839
+ },
+ {
+ "epoch": 44.8,
+ "grad_norm": 56.610721588134766,
+ "learning_rate": 5.777777777777778e-06,
+ "loss": 0.1931,
+ "step": 7840
+ },
+ {
+ "epoch": 44.80571428571429,
+ "grad_norm": 41.78334426879883,
+ "learning_rate": 5.7714285714285715e-06,
+ "loss": 0.1049,
+ "step": 7841
+ },
+ {
+ "epoch": 44.81142857142857,
+ "grad_norm": 42.41416549682617,
+ "learning_rate": 5.765079365079366e-06,
+ "loss": 0.1297,
+ "step": 7842
+ },
+ {
+ "epoch": 44.817142857142855,
+ "grad_norm": 21.613910675048828,
+ "learning_rate": 5.758730158730159e-06,
+ "loss": 0.1222,
+ "step": 7843
+ },
+ {
+ "epoch": 44.822857142857146,
+ "grad_norm": 54.29707336425781,
+ "learning_rate": 5.752380952380953e-06,
+ "loss": 0.1181,
+ "step": 7844
+ },
+ {
+ "epoch": 44.82857142857143,
+ "grad_norm": 25.819608688354492,
+ "learning_rate": 5.746031746031746e-06,
+ "loss": 0.1345,
+ "step": 7845
+ },
+ {
+ "epoch": 44.83428571428571,
+ "grad_norm": 39.601863861083984,
+ "learning_rate": 5.73968253968254e-06,
+ "loss": 0.1373,
+ "step": 7846
+ },
+ {
+ "epoch": 44.84,
+ "grad_norm": 25.690563201904297,
+ "learning_rate": 5.733333333333333e-06,
+ "loss": 0.2132,
+ "step": 7847
+ },
+ {
+ "epoch": 44.84571428571429,
+ "grad_norm": 66.87892150878906,
+ "learning_rate": 5.7269841269841275e-06,
+ "loss": 0.1445,
+ "step": 7848
+ },
+ {
+ "epoch": 44.85142857142857,
+ "grad_norm": 50.268497467041016,
+ "learning_rate": 5.720634920634921e-06,
+ "loss": 0.1438,
+ "step": 7849
+ },
+ {
+ "epoch": 44.857142857142854,
+ "grad_norm": 26.253446578979492,
+ "learning_rate": 5.7142857142857145e-06,
+ "loss": 0.1119,
+ "step": 7850
+ },
+ {
+ "epoch": 44.862857142857145,
+ "grad_norm": 59.0138053894043,
+ "learning_rate": 5.707936507936508e-06,
+ "loss": 0.1161,
+ "step": 7851
+ },
+ {
+ "epoch": 44.86857142857143,
+ "grad_norm": 68.45675659179688,
+ "learning_rate": 5.7015873015873015e-06,
+ "loss": 0.1035,
+ "step": 7852
+ },
+ {
+ "epoch": 44.87428571428571,
+ "grad_norm": 46.866146087646484,
+ "learning_rate": 5.695238095238096e-06,
+ "loss": 0.1345,
+ "step": 7853
+ },
+ {
+ "epoch": 44.88,
+ "grad_norm": 59.4177131652832,
+ "learning_rate": 5.688888888888889e-06,
+ "loss": 0.1827,
+ "step": 7854
+ },
+ {
+ "epoch": 44.885714285714286,
+ "grad_norm": 79.4287338256836,
+ "learning_rate": 5.682539682539683e-06,
+ "loss": 0.1751,
+ "step": 7855
+ },
+ {
+ "epoch": 44.89142857142857,
+ "grad_norm": 44.99930953979492,
+ "learning_rate": 5.676190476190476e-06,
+ "loss": 0.1545,
+ "step": 7856
+ },
+ {
+ "epoch": 44.89714285714286,
+ "grad_norm": 38.18852233886719,
+ "learning_rate": 5.669841269841271e-06,
+ "loss": 0.1851,
+ "step": 7857
+ },
+ {
+ "epoch": 44.902857142857144,
+ "grad_norm": 81.95789337158203,
+ "learning_rate": 5.663492063492064e-06,
+ "loss": 0.1157,
+ "step": 7858
+ },
+ {
+ "epoch": 44.90857142857143,
+ "grad_norm": 33.37058639526367,
+ "learning_rate": 5.6571428571428576e-06,
+ "loss": 0.1635,
+ "step": 7859
+ },
+ {
+ "epoch": 44.91428571428571,
+ "grad_norm": 81.13784790039062,
+ "learning_rate": 5.650793650793651e-06,
+ "loss": 0.174,
+ "step": 7860
+ },
+ {
+ "epoch": 44.92,
+ "grad_norm": 85.00211334228516,
+ "learning_rate": 5.6444444444444445e-06,
+ "loss": 0.1532,
+ "step": 7861
+ },
+ {
+ "epoch": 44.925714285714285,
+ "grad_norm": 49.13711166381836,
+ "learning_rate": 5.638095238095238e-06,
+ "loss": 0.1283,
+ "step": 7862
+ },
+ {
+ "epoch": 44.93142857142857,
+ "grad_norm": 41.752376556396484,
+ "learning_rate": 5.6317460317460315e-06,
+ "loss": 0.1513,
+ "step": 7863
+ },
+ {
+ "epoch": 44.93714285714286,
+ "grad_norm": 32.89028549194336,
+ "learning_rate": 5.625396825396825e-06,
+ "loss": 0.1995,
+ "step": 7864
+ },
+ {
+ "epoch": 44.94285714285714,
+ "grad_norm": 117.55921936035156,
+ "learning_rate": 5.619047619047619e-06,
+ "loss": 0.1514,
+ "step": 7865
+ },
+ {
+ "epoch": 44.94857142857143,
+ "grad_norm": 69.728271484375,
+ "learning_rate": 5.612698412698414e-06,
+ "loss": 0.1303,
+ "step": 7866
+ },
+ {
+ "epoch": 44.95428571428572,
+ "grad_norm": 726.4796142578125,
+ "learning_rate": 5.606349206349207e-06,
+ "loss": 0.1795,
+ "step": 7867
+ },
+ {
+ "epoch": 44.96,
+ "grad_norm": 54.38838195800781,
+ "learning_rate": 5.600000000000001e-06,
+ "loss": 0.136,
+ "step": 7868
+ },
+ {
+ "epoch": 44.965714285714284,
+ "grad_norm": 21.06503677368164,
+ "learning_rate": 5.593650793650794e-06,
+ "loss": 0.1315,
+ "step": 7869
+ },
+ {
+ "epoch": 44.97142857142857,
+ "grad_norm": 50.8365478515625,
+ "learning_rate": 5.5873015873015876e-06,
+ "loss": 0.1109,
+ "step": 7870
+ },
+ {
+ "epoch": 44.97714285714286,
+ "grad_norm": 65.22562408447266,
+ "learning_rate": 5.580952380952381e-06,
+ "loss": 0.2545,
+ "step": 7871
+ },
+ {
+ "epoch": 44.98285714285714,
+ "grad_norm": 26.463062286376953,
+ "learning_rate": 5.5746031746031745e-06,
+ "loss": 0.1487,
+ "step": 7872
+ },
+ {
+ "epoch": 44.988571428571426,
+ "grad_norm": 23.692731857299805,
+ "learning_rate": 5.568253968253968e-06,
+ "loss": 0.1816,
+ "step": 7873
+ },
+ {
+ "epoch": 44.994285714285716,
+ "grad_norm": 109.07501983642578,
+ "learning_rate": 5.561904761904762e-06,
+ "loss": 0.1082,
+ "step": 7874
+ },
+ {
+ "epoch": 45.0,
+ "grad_norm": 27.450218200683594,
+ "learning_rate": 5.555555555555556e-06,
+ "loss": 0.1496,
+ "step": 7875
+ },
+ {
+ "epoch": 45.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5875609517097473,
+ "eval_map": 0.9372,
+ "eval_map_50": 0.9697,
+ "eval_map_75": 0.9638,
+ "eval_map_large": 0.9373,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9372,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7952,
+ "eval_mar_10": 0.9759,
+ "eval_mar_100": 0.9759,
+ "eval_mar_100_per_class": 0.9759,
+ "eval_mar_large": 0.9759,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.7437,
+ "eval_samples_per_second": 21.392,
+ "eval_steps_per_second": 2.692,
+ "step": 7875
+ },
+ {
+ "epoch": 45.005714285714284,
+ "grad_norm": 59.07111358642578,
+ "learning_rate": 5.54920634920635e-06,
+ "loss": 0.0999,
+ "step": 7876
+ },
+ {
+ "epoch": 45.011428571428574,
+ "grad_norm": 28.458738327026367,
+ "learning_rate": 5.542857142857144e-06,
+ "loss": 0.1813,
+ "step": 7877
+ },
+ {
+ "epoch": 45.01714285714286,
+ "grad_norm": 43.924617767333984,
+ "learning_rate": 5.536507936507937e-06,
+ "loss": 0.1198,
+ "step": 7878
+ },
+ {
+ "epoch": 45.02285714285714,
+ "grad_norm": 42.84949493408203,
+ "learning_rate": 5.530158730158731e-06,
+ "loss": 0.1692,
+ "step": 7879
+ },
+ {
+ "epoch": 45.02857142857143,
+ "grad_norm": 68.04060363769531,
+ "learning_rate": 5.523809523809524e-06,
+ "loss": 0.1177,
+ "step": 7880
+ },
+ {
+ "epoch": 45.034285714285716,
+ "grad_norm": 122.39205169677734,
+ "learning_rate": 5.5174603174603176e-06,
+ "loss": 0.1593,
+ "step": 7881
+ },
+ {
+ "epoch": 45.04,
+ "grad_norm": 58.70307540893555,
+ "learning_rate": 5.511111111111111e-06,
+ "loss": 0.2077,
+ "step": 7882
+ },
+ {
+ "epoch": 45.04571428571428,
+ "grad_norm": 72.9378433227539,
+ "learning_rate": 5.5047619047619045e-06,
+ "loss": 0.12,
+ "step": 7883
+ },
+ {
+ "epoch": 45.05142857142857,
+ "grad_norm": 60.564754486083984,
+ "learning_rate": 5.498412698412699e-06,
+ "loss": 0.1211,
+ "step": 7884
+ },
+ {
+ "epoch": 45.05714285714286,
+ "grad_norm": 42.58056640625,
+ "learning_rate": 5.492063492063492e-06,
+ "loss": 0.1291,
+ "step": 7885
+ },
+ {
+ "epoch": 45.06285714285714,
+ "grad_norm": 43.20802688598633,
+ "learning_rate": 5.485714285714286e-06,
+ "loss": 0.1323,
+ "step": 7886
+ },
+ {
+ "epoch": 45.06857142857143,
+ "grad_norm": 35.570735931396484,
+ "learning_rate": 5.479365079365079e-06,
+ "loss": 0.1062,
+ "step": 7887
+ },
+ {
+ "epoch": 45.074285714285715,
+ "grad_norm": 35.60871505737305,
+ "learning_rate": 5.473015873015874e-06,
+ "loss": 0.1661,
+ "step": 7888
+ },
+ {
+ "epoch": 45.08,
+ "grad_norm": 26.704849243164062,
+ "learning_rate": 5.466666666666667e-06,
+ "loss": 0.1529,
+ "step": 7889
+ },
+ {
+ "epoch": 45.08571428571429,
+ "grad_norm": 53.47730255126953,
+ "learning_rate": 5.460317460317461e-06,
+ "loss": 0.2025,
+ "step": 7890
+ },
+ {
+ "epoch": 45.09142857142857,
+ "grad_norm": 70.42041015625,
+ "learning_rate": 5.453968253968254e-06,
+ "loss": 0.3081,
+ "step": 7891
+ },
+ {
+ "epoch": 45.097142857142856,
+ "grad_norm": 92.92615509033203,
+ "learning_rate": 5.4476190476190476e-06,
+ "loss": 0.1179,
+ "step": 7892
+ },
+ {
+ "epoch": 45.10285714285714,
+ "grad_norm": 71.29002380371094,
+ "learning_rate": 5.441269841269842e-06,
+ "loss": 0.1377,
+ "step": 7893
+ },
+ {
+ "epoch": 45.10857142857143,
+ "grad_norm": 59.49152374267578,
+ "learning_rate": 5.434920634920635e-06,
+ "loss": 0.2838,
+ "step": 7894
+ },
+ {
+ "epoch": 45.114285714285714,
+ "grad_norm": 37.16068649291992,
+ "learning_rate": 5.428571428571429e-06,
+ "loss": 0.1124,
+ "step": 7895
+ },
+ {
+ "epoch": 45.12,
+ "grad_norm": 41.775028228759766,
+ "learning_rate": 5.422222222222222e-06,
+ "loss": 0.1479,
+ "step": 7896
+ },
+ {
+ "epoch": 45.12571428571429,
+ "grad_norm": 85.41061401367188,
+ "learning_rate": 5.415873015873016e-06,
+ "loss": 0.1679,
+ "step": 7897
+ },
+ {
+ "epoch": 45.13142857142857,
+ "grad_norm": 44.06031799316406,
+ "learning_rate": 5.409523809523809e-06,
+ "loss": 0.156,
+ "step": 7898
+ },
+ {
+ "epoch": 45.137142857142855,
+ "grad_norm": 39.47423553466797,
+ "learning_rate": 5.403174603174604e-06,
+ "loss": 0.1432,
+ "step": 7899
+ },
+ {
+ "epoch": 45.142857142857146,
+ "grad_norm": 28.359256744384766,
+ "learning_rate": 5.396825396825397e-06,
+ "loss": 0.1443,
+ "step": 7900
+ },
+ {
+ "epoch": 45.14857142857143,
+ "grad_norm": 34.1742057800293,
+ "learning_rate": 5.390476190476191e-06,
+ "loss": 0.1378,
+ "step": 7901
+ },
+ {
+ "epoch": 45.15428571428571,
+ "grad_norm": 33.425750732421875,
+ "learning_rate": 5.384126984126984e-06,
+ "loss": 0.1248,
+ "step": 7902
+ },
+ {
+ "epoch": 45.16,
+ "grad_norm": 44.18424987792969,
+ "learning_rate": 5.3777777777777784e-06,
+ "loss": 0.172,
+ "step": 7903
+ },
+ {
+ "epoch": 45.16571428571429,
+ "grad_norm": 44.47559356689453,
+ "learning_rate": 5.371428571428572e-06,
+ "loss": 0.1656,
+ "step": 7904
+ },
+ {
+ "epoch": 45.17142857142857,
+ "grad_norm": 44.56504440307617,
+ "learning_rate": 5.365079365079365e-06,
+ "loss": 0.1147,
+ "step": 7905
+ },
+ {
+ "epoch": 45.177142857142854,
+ "grad_norm": 28.468868255615234,
+ "learning_rate": 5.358730158730159e-06,
+ "loss": 0.1694,
+ "step": 7906
+ },
+ {
+ "epoch": 45.182857142857145,
+ "grad_norm": 37.43157196044922,
+ "learning_rate": 5.352380952380952e-06,
+ "loss": 0.1554,
+ "step": 7907
+ },
+ {
+ "epoch": 45.18857142857143,
+ "grad_norm": 40.826416015625,
+ "learning_rate": 5.346031746031746e-06,
+ "loss": 0.1427,
+ "step": 7908
+ },
+ {
+ "epoch": 45.19428571428571,
+ "grad_norm": 44.846988677978516,
+ "learning_rate": 5.339682539682539e-06,
+ "loss": 0.1875,
+ "step": 7909
+ },
+ {
+ "epoch": 45.2,
+ "grad_norm": 43.36846923828125,
+ "learning_rate": 5.333333333333334e-06,
+ "loss": 0.1556,
+ "step": 7910
+ },
+ {
+ "epoch": 45.205714285714286,
+ "grad_norm": 23.776344299316406,
+ "learning_rate": 5.326984126984127e-06,
+ "loss": 0.1688,
+ "step": 7911
+ },
+ {
+ "epoch": 45.21142857142857,
+ "grad_norm": 32.264644622802734,
+ "learning_rate": 5.3206349206349215e-06,
+ "loss": 0.1537,
+ "step": 7912
+ },
+ {
+ "epoch": 45.21714285714286,
+ "grad_norm": 73.19031524658203,
+ "learning_rate": 5.314285714285715e-06,
+ "loss": 0.145,
+ "step": 7913
+ },
+ {
+ "epoch": 45.222857142857144,
+ "grad_norm": 59.46937942504883,
+ "learning_rate": 5.3079365079365084e-06,
+ "loss": 0.1153,
+ "step": 7914
+ },
+ {
+ "epoch": 45.22857142857143,
+ "grad_norm": 17.42439079284668,
+ "learning_rate": 5.301587301587302e-06,
+ "loss": 0.0885,
+ "step": 7915
+ },
+ {
+ "epoch": 45.23428571428571,
+ "grad_norm": 57.91249465942383,
+ "learning_rate": 5.295238095238095e-06,
+ "loss": 0.1386,
+ "step": 7916
+ },
+ {
+ "epoch": 45.24,
+ "grad_norm": 48.12847900390625,
+ "learning_rate": 5.288888888888889e-06,
+ "loss": 0.1431,
+ "step": 7917
+ },
+ {
+ "epoch": 45.245714285714286,
+ "grad_norm": 1868.663818359375,
+ "learning_rate": 5.282539682539682e-06,
+ "loss": 0.1713,
+ "step": 7918
+ },
+ {
+ "epoch": 45.25142857142857,
+ "grad_norm": 40.173118591308594,
+ "learning_rate": 5.276190476190476e-06,
+ "loss": 0.2278,
+ "step": 7919
+ },
+ {
+ "epoch": 45.25714285714286,
+ "grad_norm": 87.19586944580078,
+ "learning_rate": 5.26984126984127e-06,
+ "loss": 0.1878,
+ "step": 7920
+ },
+ {
+ "epoch": 45.26285714285714,
+ "grad_norm": 68.53565216064453,
+ "learning_rate": 5.263492063492064e-06,
+ "loss": 0.1328,
+ "step": 7921
+ },
+ {
+ "epoch": 45.26857142857143,
+ "grad_norm": 47.07212829589844,
+ "learning_rate": 5.257142857142858e-06,
+ "loss": 0.2188,
+ "step": 7922
+ },
+ {
+ "epoch": 45.27428571428572,
+ "grad_norm": 65.88520812988281,
+ "learning_rate": 5.2507936507936515e-06,
+ "loss": 0.1713,
+ "step": 7923
+ },
+ {
+ "epoch": 45.28,
+ "grad_norm": 1658.98828125,
+ "learning_rate": 5.244444444444445e-06,
+ "loss": 0.2176,
+ "step": 7924
+ },
+ {
+ "epoch": 45.285714285714285,
+ "grad_norm": 30.800500869750977,
+ "learning_rate": 5.2380952380952384e-06,
+ "loss": 0.1687,
+ "step": 7925
+ },
+ {
+ "epoch": 45.29142857142857,
+ "grad_norm": 718.1648559570312,
+ "learning_rate": 5.231746031746032e-06,
+ "loss": 0.2337,
+ "step": 7926
+ },
+ {
+ "epoch": 45.29714285714286,
+ "grad_norm": 77.49954223632812,
+ "learning_rate": 5.225396825396825e-06,
+ "loss": 0.1318,
+ "step": 7927
+ },
+ {
+ "epoch": 45.30285714285714,
+ "grad_norm": 52.57571029663086,
+ "learning_rate": 5.219047619047619e-06,
+ "loss": 0.18,
+ "step": 7928
+ },
+ {
+ "epoch": 45.308571428571426,
+ "grad_norm": 35.31848907470703,
+ "learning_rate": 5.212698412698413e-06,
+ "loss": 0.1682,
+ "step": 7929
+ },
+ {
+ "epoch": 45.31428571428572,
+ "grad_norm": 44.66339111328125,
+ "learning_rate": 5.206349206349207e-06,
+ "loss": 0.1446,
+ "step": 7930
+ },
+ {
+ "epoch": 45.32,
+ "grad_norm": 19.15682601928711,
+ "learning_rate": 5.2e-06,
+ "loss": 0.1189,
+ "step": 7931
+ },
+ {
+ "epoch": 45.325714285714284,
+ "grad_norm": 27.01017189025879,
+ "learning_rate": 5.193650793650794e-06,
+ "loss": 0.1335,
+ "step": 7932
+ },
+ {
+ "epoch": 45.331428571428575,
+ "grad_norm": 51.86173629760742,
+ "learning_rate": 5.187301587301588e-06,
+ "loss": 0.1628,
+ "step": 7933
+ },
+ {
+ "epoch": 45.33714285714286,
+ "grad_norm": 36.805477142333984,
+ "learning_rate": 5.1809523809523815e-06,
+ "loss": 0.1129,
+ "step": 7934
+ },
+ {
+ "epoch": 45.34285714285714,
+ "grad_norm": 41.201351165771484,
+ "learning_rate": 5.174603174603175e-06,
+ "loss": 0.1683,
+ "step": 7935
+ },
+ {
+ "epoch": 45.348571428571425,
+ "grad_norm": 72.04317474365234,
+ "learning_rate": 5.1682539682539685e-06,
+ "loss": 0.1878,
+ "step": 7936
+ },
+ {
+ "epoch": 45.354285714285716,
+ "grad_norm": 53.820682525634766,
+ "learning_rate": 5.161904761904762e-06,
+ "loss": 0.1308,
+ "step": 7937
+ },
+ {
+ "epoch": 45.36,
+ "grad_norm": 68.02818298339844,
+ "learning_rate": 5.155555555555555e-06,
+ "loss": 0.2302,
+ "step": 7938
+ },
+ {
+ "epoch": 45.36571428571428,
+ "grad_norm": 34.54709243774414,
+ "learning_rate": 5.14920634920635e-06,
+ "loss": 0.1217,
+ "step": 7939
+ },
+ {
+ "epoch": 45.371428571428574,
+ "grad_norm": 57.59125900268555,
+ "learning_rate": 5.142857142857143e-06,
+ "loss": 0.1571,
+ "step": 7940
+ },
+ {
+ "epoch": 45.37714285714286,
+ "grad_norm": 20.881120681762695,
+ "learning_rate": 5.136507936507937e-06,
+ "loss": 0.1987,
+ "step": 7941
+ },
+ {
+ "epoch": 45.38285714285714,
+ "grad_norm": 633.7258911132812,
+ "learning_rate": 5.13015873015873e-06,
+ "loss": 0.2172,
+ "step": 7942
+ },
+ {
+ "epoch": 45.38857142857143,
+ "grad_norm": 57.42353057861328,
+ "learning_rate": 5.123809523809524e-06,
+ "loss": 0.1497,
+ "step": 7943
+ },
+ {
+ "epoch": 45.394285714285715,
+ "grad_norm": 29.66927719116211,
+ "learning_rate": 5.117460317460317e-06,
+ "loss": 0.1191,
+ "step": 7944
+ },
+ {
+ "epoch": 45.4,
+ "grad_norm": 37.49243927001953,
+ "learning_rate": 5.1111111111111115e-06,
+ "loss": 0.151,
+ "step": 7945
+ },
+ {
+ "epoch": 45.40571428571428,
+ "grad_norm": 66.22801971435547,
+ "learning_rate": 5.104761904761905e-06,
+ "loss": 0.1189,
+ "step": 7946
+ },
+ {
+ "epoch": 45.41142857142857,
+ "grad_norm": 61.25311279296875,
+ "learning_rate": 5.0984126984126985e-06,
+ "loss": 0.1553,
+ "step": 7947
+ },
+ {
+ "epoch": 45.417142857142856,
+ "grad_norm": 20.99935531616211,
+ "learning_rate": 5.092063492063493e-06,
+ "loss": 0.1017,
+ "step": 7948
+ },
+ {
+ "epoch": 45.42285714285714,
+ "grad_norm": 342.1048583984375,
+ "learning_rate": 5.085714285714286e-06,
+ "loss": 0.117,
+ "step": 7949
+ },
+ {
+ "epoch": 45.42857142857143,
+ "grad_norm": 29.054729461669922,
+ "learning_rate": 5.07936507936508e-06,
+ "loss": 0.1472,
+ "step": 7950
+ },
+ {
+ "epoch": 45.434285714285714,
+ "grad_norm": 46.416114807128906,
+ "learning_rate": 5.073015873015873e-06,
+ "loss": 0.1255,
+ "step": 7951
+ },
+ {
+ "epoch": 45.44,
+ "grad_norm": 37.09671401977539,
+ "learning_rate": 5.066666666666667e-06,
+ "loss": 0.1401,
+ "step": 7952
+ },
+ {
+ "epoch": 45.44571428571429,
+ "grad_norm": 55.9088020324707,
+ "learning_rate": 5.06031746031746e-06,
+ "loss": 0.1078,
+ "step": 7953
+ },
+ {
+ "epoch": 45.45142857142857,
+ "grad_norm": 27.684017181396484,
+ "learning_rate": 5.053968253968254e-06,
+ "loss": 0.1642,
+ "step": 7954
+ },
+ {
+ "epoch": 45.457142857142856,
+ "grad_norm": 56.756187438964844,
+ "learning_rate": 5.047619047619047e-06,
+ "loss": 0.1364,
+ "step": 7955
+ },
+ {
+ "epoch": 45.462857142857146,
+ "grad_norm": 32.92528533935547,
+ "learning_rate": 5.0412698412698415e-06,
+ "loss": 0.1332,
+ "step": 7956
+ },
+ {
+ "epoch": 45.46857142857143,
+ "grad_norm": 80.59513092041016,
+ "learning_rate": 5.034920634920636e-06,
+ "loss": 0.1283,
+ "step": 7957
+ },
+ {
+ "epoch": 45.47428571428571,
+ "grad_norm": 39.07324981689453,
+ "learning_rate": 5.028571428571429e-06,
+ "loss": 0.1259,
+ "step": 7958
+ },
+ {
+ "epoch": 45.48,
+ "grad_norm": 45.028076171875,
+ "learning_rate": 5.022222222222223e-06,
+ "loss": 0.1298,
+ "step": 7959
+ },
+ {
+ "epoch": 45.48571428571429,
+ "grad_norm": 42.70392608642578,
+ "learning_rate": 5.015873015873016e-06,
+ "loss": 0.1357,
+ "step": 7960
+ },
+ {
+ "epoch": 45.49142857142857,
+ "grad_norm": 29.76717758178711,
+ "learning_rate": 5.00952380952381e-06,
+ "loss": 0.1171,
+ "step": 7961
+ },
+ {
+ "epoch": 45.497142857142855,
+ "grad_norm": 68.0356216430664,
+ "learning_rate": 5.003174603174603e-06,
+ "loss": 0.1502,
+ "step": 7962
+ },
+ {
+ "epoch": 45.502857142857145,
+ "grad_norm": 79.7303237915039,
+ "learning_rate": 4.996825396825397e-06,
+ "loss": 0.1387,
+ "step": 7963
+ },
+ {
+ "epoch": 45.50857142857143,
+ "grad_norm": 34.87709045410156,
+ "learning_rate": 4.99047619047619e-06,
+ "loss": 0.1394,
+ "step": 7964
+ },
+ {
+ "epoch": 45.51428571428571,
+ "grad_norm": 58.27235412597656,
+ "learning_rate": 4.9841269841269845e-06,
+ "loss": 0.1435,
+ "step": 7965
+ },
+ {
+ "epoch": 45.52,
+ "grad_norm": 34.056663513183594,
+ "learning_rate": 4.977777777777778e-06,
+ "loss": 0.1526,
+ "step": 7966
+ },
+ {
+ "epoch": 45.52571428571429,
+ "grad_norm": 35.867698669433594,
+ "learning_rate": 4.9714285714285715e-06,
+ "loss": 0.0987,
+ "step": 7967
+ },
+ {
+ "epoch": 45.53142857142857,
+ "grad_norm": 92.24200439453125,
+ "learning_rate": 4.965079365079366e-06,
+ "loss": 0.1502,
+ "step": 7968
+ },
+ {
+ "epoch": 45.537142857142854,
+ "grad_norm": 229.3932342529297,
+ "learning_rate": 4.958730158730159e-06,
+ "loss": 0.1199,
+ "step": 7969
+ },
+ {
+ "epoch": 45.542857142857144,
+ "grad_norm": 38.768646240234375,
+ "learning_rate": 4.952380952380953e-06,
+ "loss": 0.1456,
+ "step": 7970
+ },
+ {
+ "epoch": 45.54857142857143,
+ "grad_norm": 166.4169464111328,
+ "learning_rate": 4.946031746031746e-06,
+ "loss": 0.3404,
+ "step": 7971
+ },
+ {
+ "epoch": 45.55428571428571,
+ "grad_norm": 22.784006118774414,
+ "learning_rate": 4.93968253968254e-06,
+ "loss": 0.1605,
+ "step": 7972
+ },
+ {
+ "epoch": 45.56,
+ "grad_norm": 88.45488739013672,
+ "learning_rate": 4.933333333333333e-06,
+ "loss": 0.1392,
+ "step": 7973
+ },
+ {
+ "epoch": 45.565714285714286,
+ "grad_norm": 20.583641052246094,
+ "learning_rate": 4.926984126984127e-06,
+ "loss": 0.12,
+ "step": 7974
+ },
+ {
+ "epoch": 45.57142857142857,
+ "grad_norm": 28.72245979309082,
+ "learning_rate": 4.920634920634921e-06,
+ "loss": 0.1228,
+ "step": 7975
+ },
+ {
+ "epoch": 45.57714285714286,
+ "grad_norm": 49.244537353515625,
+ "learning_rate": 4.9142857142857145e-06,
+ "loss": 0.133,
+ "step": 7976
+ },
+ {
+ "epoch": 45.582857142857144,
+ "grad_norm": 64.7433090209961,
+ "learning_rate": 4.907936507936508e-06,
+ "loss": 0.1481,
+ "step": 7977
+ },
+ {
+ "epoch": 45.58857142857143,
+ "grad_norm": 43.253334045410156,
+ "learning_rate": 4.9015873015873015e-06,
+ "loss": 0.1384,
+ "step": 7978
+ },
+ {
+ "epoch": 45.59428571428572,
+ "grad_norm": 102.45452880859375,
+ "learning_rate": 4.895238095238096e-06,
+ "loss": 0.1451,
+ "step": 7979
+ },
+ {
+ "epoch": 45.6,
+ "grad_norm": 59.437744140625,
+ "learning_rate": 4.888888888888889e-06,
+ "loss": 0.1486,
+ "step": 7980
+ },
+ {
+ "epoch": 45.605714285714285,
+ "grad_norm": 50.85014724731445,
+ "learning_rate": 4.882539682539683e-06,
+ "loss": 0.1787,
+ "step": 7981
+ },
+ {
+ "epoch": 45.61142857142857,
+ "grad_norm": 95.12709045410156,
+ "learning_rate": 4.876190476190476e-06,
+ "loss": 0.1944,
+ "step": 7982
+ },
+ {
+ "epoch": 45.61714285714286,
+ "grad_norm": 44.42409133911133,
+ "learning_rate": 4.86984126984127e-06,
+ "loss": 0.1506,
+ "step": 7983
+ },
+ {
+ "epoch": 45.62285714285714,
+ "grad_norm": 27.613187789916992,
+ "learning_rate": 4.863492063492064e-06,
+ "loss": 0.1313,
+ "step": 7984
+ },
+ {
+ "epoch": 45.628571428571426,
+ "grad_norm": 37.380916595458984,
+ "learning_rate": 4.857142857142858e-06,
+ "loss": 0.1489,
+ "step": 7985
+ },
+ {
+ "epoch": 45.63428571428572,
+ "grad_norm": 18.24190902709961,
+ "learning_rate": 4.850793650793651e-06,
+ "loss": 0.1606,
+ "step": 7986
+ },
+ {
+ "epoch": 45.64,
+ "grad_norm": 21.4625244140625,
+ "learning_rate": 4.8444444444444446e-06,
+ "loss": 0.1006,
+ "step": 7987
+ },
+ {
+ "epoch": 45.645714285714284,
+ "grad_norm": 25.19818687438965,
+ "learning_rate": 4.838095238095238e-06,
+ "loss": 0.1763,
+ "step": 7988
+ },
+ {
+ "epoch": 45.651428571428575,
+ "grad_norm": 42.4224853515625,
+ "learning_rate": 4.8317460317460315e-06,
+ "loss": 0.1362,
+ "step": 7989
+ },
+ {
+ "epoch": 45.65714285714286,
+ "grad_norm": 54.029212951660156,
+ "learning_rate": 4.825396825396826e-06,
+ "loss": 0.1629,
+ "step": 7990
+ },
+ {
+ "epoch": 45.66285714285714,
+ "grad_norm": 38.289039611816406,
+ "learning_rate": 4.819047619047619e-06,
+ "loss": 0.0991,
+ "step": 7991
+ },
+ {
+ "epoch": 45.668571428571425,
+ "grad_norm": 30.55815887451172,
+ "learning_rate": 4.812698412698413e-06,
+ "loss": 0.1261,
+ "step": 7992
+ },
+ {
+ "epoch": 45.674285714285716,
+ "grad_norm": 52.03242874145508,
+ "learning_rate": 4.806349206349207e-06,
+ "loss": 0.1151,
+ "step": 7993
+ },
+ {
+ "epoch": 45.68,
+ "grad_norm": 54.05915832519531,
+ "learning_rate": 4.800000000000001e-06,
+ "loss": 0.1356,
+ "step": 7994
+ },
+ {
+ "epoch": 45.68571428571428,
+ "grad_norm": 59.59641647338867,
+ "learning_rate": 4.793650793650794e-06,
+ "loss": 0.1261,
+ "step": 7995
+ },
+ {
+ "epoch": 45.691428571428574,
+ "grad_norm": 42.24554443359375,
+ "learning_rate": 4.787301587301588e-06,
+ "loss": 0.2133,
+ "step": 7996
+ },
+ {
+ "epoch": 45.69714285714286,
+ "grad_norm": 32.48039627075195,
+ "learning_rate": 4.780952380952381e-06,
+ "loss": 0.1592,
+ "step": 7997
+ },
+ {
+ "epoch": 45.70285714285714,
+ "grad_norm": 66.41365051269531,
+ "learning_rate": 4.7746031746031746e-06,
+ "loss": 0.1625,
+ "step": 7998
+ },
+ {
+ "epoch": 45.70857142857143,
+ "grad_norm": 23.149810791015625,
+ "learning_rate": 4.768253968253968e-06,
+ "loss": 0.231,
+ "step": 7999
+ },
+ {
+ "epoch": 45.714285714285715,
+ "grad_norm": 75.46231842041016,
+ "learning_rate": 4.7619047619047615e-06,
+ "loss": 0.174,
+ "step": 8000
+ },
+ {
+ "epoch": 45.72,
+ "grad_norm": 32.4194450378418,
+ "learning_rate": 4.755555555555556e-06,
+ "loss": 0.1508,
+ "step": 8001
+ },
+ {
+ "epoch": 45.72571428571428,
+ "grad_norm": 46.35731506347656,
+ "learning_rate": 4.749206349206349e-06,
+ "loss": 0.286,
+ "step": 8002
+ },
+ {
+ "epoch": 45.73142857142857,
+ "grad_norm": 44.204750061035156,
+ "learning_rate": 4.742857142857144e-06,
+ "loss": 0.145,
+ "step": 8003
+ },
+ {
+ "epoch": 45.73714285714286,
+ "grad_norm": 20.596189498901367,
+ "learning_rate": 4.736507936507937e-06,
+ "loss": 0.1717,
+ "step": 8004
+ },
+ {
+ "epoch": 45.74285714285714,
+ "grad_norm": 52.89987564086914,
+ "learning_rate": 4.730158730158731e-06,
+ "loss": 0.1003,
+ "step": 8005
+ },
+ {
+ "epoch": 45.74857142857143,
+ "grad_norm": 47.98521041870117,
+ "learning_rate": 4.723809523809524e-06,
+ "loss": 0.1458,
+ "step": 8006
+ },
+ {
+ "epoch": 45.754285714285714,
+ "grad_norm": 58.42574691772461,
+ "learning_rate": 4.717460317460318e-06,
+ "loss": 0.1715,
+ "step": 8007
+ },
+ {
+ "epoch": 45.76,
+ "grad_norm": 23.98272132873535,
+ "learning_rate": 4.711111111111111e-06,
+ "loss": 0.1402,
+ "step": 8008
+ },
+ {
+ "epoch": 45.76571428571429,
+ "grad_norm": 30.55145835876465,
+ "learning_rate": 4.7047619047619046e-06,
+ "loss": 0.1579,
+ "step": 8009
+ },
+ {
+ "epoch": 45.77142857142857,
+ "grad_norm": 56.62355422973633,
+ "learning_rate": 4.698412698412698e-06,
+ "loss": 0.1431,
+ "step": 8010
+ },
+ {
+ "epoch": 45.777142857142856,
+ "grad_norm": 23.12961769104004,
+ "learning_rate": 4.692063492063492e-06,
+ "loss": 0.1337,
+ "step": 8011
+ },
+ {
+ "epoch": 45.78285714285714,
+ "grad_norm": 67.6146240234375,
+ "learning_rate": 4.685714285714286e-06,
+ "loss": 0.1221,
+ "step": 8012
+ },
+ {
+ "epoch": 45.78857142857143,
+ "grad_norm": 59.8460693359375,
+ "learning_rate": 4.67936507936508e-06,
+ "loss": 0.148,
+ "step": 8013
+ },
+ {
+ "epoch": 45.794285714285714,
+ "grad_norm": 40.833213806152344,
+ "learning_rate": 4.673015873015874e-06,
+ "loss": 0.0933,
+ "step": 8014
+ },
+ {
+ "epoch": 45.8,
+ "grad_norm": 40.59428787231445,
+ "learning_rate": 4.666666666666667e-06,
+ "loss": 0.1348,
+ "step": 8015
+ },
+ {
+ "epoch": 45.80571428571429,
+ "grad_norm": 46.43875503540039,
+ "learning_rate": 4.660317460317461e-06,
+ "loss": 0.1362,
+ "step": 8016
+ },
+ {
+ "epoch": 45.81142857142857,
+ "grad_norm": 29.034568786621094,
+ "learning_rate": 4.653968253968254e-06,
+ "loss": 0.1268,
+ "step": 8017
+ },
+ {
+ "epoch": 45.817142857142855,
+ "grad_norm": 46.89924240112305,
+ "learning_rate": 4.647619047619048e-06,
+ "loss": 0.1295,
+ "step": 8018
+ },
+ {
+ "epoch": 45.822857142857146,
+ "grad_norm": 30.950572967529297,
+ "learning_rate": 4.641269841269841e-06,
+ "loss": 0.1392,
+ "step": 8019
+ },
+ {
+ "epoch": 45.82857142857143,
+ "grad_norm": 40.0648307800293,
+ "learning_rate": 4.634920634920635e-06,
+ "loss": 0.1877,
+ "step": 8020
+ },
+ {
+ "epoch": 45.83428571428571,
+ "grad_norm": 72.41403198242188,
+ "learning_rate": 4.628571428571429e-06,
+ "loss": 0.167,
+ "step": 8021
+ },
+ {
+ "epoch": 45.84,
+ "grad_norm": 66.96370697021484,
+ "learning_rate": 4.622222222222222e-06,
+ "loss": 0.1308,
+ "step": 8022
+ },
+ {
+ "epoch": 45.84571428571429,
+ "grad_norm": 53.45942306518555,
+ "learning_rate": 4.615873015873016e-06,
+ "loss": 0.1318,
+ "step": 8023
+ },
+ {
+ "epoch": 45.85142857142857,
+ "grad_norm": 150.67694091796875,
+ "learning_rate": 4.609523809523809e-06,
+ "loss": 0.1171,
+ "step": 8024
+ },
+ {
+ "epoch": 45.857142857142854,
+ "grad_norm": 52.64244079589844,
+ "learning_rate": 4.603174603174604e-06,
+ "loss": 0.1788,
+ "step": 8025
+ },
+ {
+ "epoch": 45.862857142857145,
+ "grad_norm": 25.661348342895508,
+ "learning_rate": 4.596825396825397e-06,
+ "loss": 0.1838,
+ "step": 8026
+ },
+ {
+ "epoch": 45.86857142857143,
+ "grad_norm": 33.09591293334961,
+ "learning_rate": 4.590476190476191e-06,
+ "loss": 0.1409,
+ "step": 8027
+ },
+ {
+ "epoch": 45.87428571428571,
+ "grad_norm": 27.691877365112305,
+ "learning_rate": 4.584126984126984e-06,
+ "loss": 0.1439,
+ "step": 8028
+ },
+ {
+ "epoch": 45.88,
+ "grad_norm": 18.00971031188965,
+ "learning_rate": 4.5777777777777785e-06,
+ "loss": 0.1229,
+ "step": 8029
+ },
+ {
+ "epoch": 45.885714285714286,
+ "grad_norm": 56.43489074707031,
+ "learning_rate": 4.571428571428572e-06,
+ "loss": 0.1047,
+ "step": 8030
+ },
+ {
+ "epoch": 45.89142857142857,
+ "grad_norm": 33.09745788574219,
+ "learning_rate": 4.5650793650793654e-06,
+ "loss": 0.121,
+ "step": 8031
+ },
+ {
+ "epoch": 45.89714285714286,
+ "grad_norm": 30.577470779418945,
+ "learning_rate": 4.558730158730159e-06,
+ "loss": 0.1106,
+ "step": 8032
+ },
+ {
+ "epoch": 45.902857142857144,
+ "grad_norm": 28.69456672668457,
+ "learning_rate": 4.552380952380952e-06,
+ "loss": 0.1294,
+ "step": 8033
+ },
+ {
+ "epoch": 45.90857142857143,
+ "grad_norm": 50.44547653198242,
+ "learning_rate": 4.546031746031746e-06,
+ "loss": 0.1363,
+ "step": 8034
+ },
+ {
+ "epoch": 45.91428571428571,
+ "grad_norm": 677.958984375,
+ "learning_rate": 4.539682539682539e-06,
+ "loss": 0.1529,
+ "step": 8035
+ },
+ {
+ "epoch": 45.92,
+ "grad_norm": 49.4383430480957,
+ "learning_rate": 4.533333333333334e-06,
+ "loss": 0.1313,
+ "step": 8036
+ },
+ {
+ "epoch": 45.925714285714285,
+ "grad_norm": 129.2937469482422,
+ "learning_rate": 4.526984126984127e-06,
+ "loss": 0.1538,
+ "step": 8037
+ },
+ {
+ "epoch": 45.93142857142857,
+ "grad_norm": 42.8577766418457,
+ "learning_rate": 4.520634920634921e-06,
+ "loss": 0.1205,
+ "step": 8038
+ },
+ {
+ "epoch": 45.93714285714286,
+ "grad_norm": 48.1625862121582,
+ "learning_rate": 4.514285714285715e-06,
+ "loss": 0.1368,
+ "step": 8039
+ },
+ {
+ "epoch": 45.94285714285714,
+ "grad_norm": 56.93679428100586,
+ "learning_rate": 4.5079365079365085e-06,
+ "loss": 0.1108,
+ "step": 8040
+ },
+ {
+ "epoch": 45.94857142857143,
+ "grad_norm": 28.19448471069336,
+ "learning_rate": 4.501587301587302e-06,
+ "loss": 0.2028,
+ "step": 8041
+ },
+ {
+ "epoch": 45.95428571428572,
+ "grad_norm": 53.25984191894531,
+ "learning_rate": 4.4952380952380954e-06,
+ "loss": 0.1149,
+ "step": 8042
+ },
+ {
+ "epoch": 45.96,
+ "grad_norm": 32.59288024902344,
+ "learning_rate": 4.488888888888889e-06,
+ "loss": 0.1506,
+ "step": 8043
+ },
+ {
+ "epoch": 45.965714285714284,
+ "grad_norm": 31.4505558013916,
+ "learning_rate": 4.482539682539682e-06,
+ "loss": 0.1363,
+ "step": 8044
+ },
+ {
+ "epoch": 45.97142857142857,
+ "grad_norm": 30.830820083618164,
+ "learning_rate": 4.476190476190476e-06,
+ "loss": 0.1905,
+ "step": 8045
+ },
+ {
+ "epoch": 45.97714285714286,
+ "grad_norm": 44.29206848144531,
+ "learning_rate": 4.469841269841269e-06,
+ "loss": 0.1376,
+ "step": 8046
+ },
+ {
+ "epoch": 45.98285714285714,
+ "grad_norm": 727.3399658203125,
+ "learning_rate": 4.463492063492064e-06,
+ "loss": 0.1943,
+ "step": 8047
+ },
+ {
+ "epoch": 45.988571428571426,
+ "grad_norm": 26.93597984313965,
+ "learning_rate": 4.457142857142858e-06,
+ "loss": 0.1025,
+ "step": 8048
+ },
+ {
+ "epoch": 45.994285714285716,
+ "grad_norm": 54.799049377441406,
+ "learning_rate": 4.4507936507936515e-06,
+ "loss": 0.1231,
+ "step": 8049
+ },
+ {
+ "epoch": 46.0,
+ "grad_norm": 323.103515625,
+ "learning_rate": 4.444444444444445e-06,
+ "loss": 0.1579,
+ "step": 8050
+ },
+ {
+ "epoch": 46.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5804963111877441,
+ "eval_map": 0.9393,
+ "eval_map_50": 0.971,
+ "eval_map_75": 0.9643,
+ "eval_map_large": 0.9393,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9393,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7914,
+ "eval_mar_10": 0.9752,
+ "eval_mar_100": 0.9765,
+ "eval_mar_100_per_class": 0.9765,
+ "eval_mar_large": 0.9765,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 12.9868,
+ "eval_samples_per_second": 22.638,
+ "eval_steps_per_second": 2.849,
+ "step": 8050
+ },
+ {
+ "epoch": 46.005714285714284,
+ "grad_norm": 38.558109283447266,
+ "learning_rate": 4.4380952380952385e-06,
+ "loss": 0.1036,
+ "step": 8051
+ },
+ {
+ "epoch": 46.011428571428574,
+ "grad_norm": 33.96249771118164,
+ "learning_rate": 4.431746031746032e-06,
+ "loss": 0.139,
+ "step": 8052
+ },
+ {
+ "epoch": 46.01714285714286,
+ "grad_norm": 72.55626678466797,
+ "learning_rate": 4.4253968253968254e-06,
+ "loss": 0.1293,
+ "step": 8053
+ },
+ {
+ "epoch": 46.02285714285714,
+ "grad_norm": 40.11007308959961,
+ "learning_rate": 4.419047619047619e-06,
+ "loss": 0.1389,
+ "step": 8054
+ },
+ {
+ "epoch": 46.02857142857143,
+ "grad_norm": 45.6063346862793,
+ "learning_rate": 4.412698412698412e-06,
+ "loss": 0.1658,
+ "step": 8055
+ },
+ {
+ "epoch": 46.034285714285716,
+ "grad_norm": 160.0494384765625,
+ "learning_rate": 4.406349206349207e-06,
+ "loss": 0.198,
+ "step": 8056
+ },
+ {
+ "epoch": 46.04,
+ "grad_norm": 59.35651779174805,
+ "learning_rate": 4.4e-06,
+ "loss": 0.1533,
+ "step": 8057
+ },
+ {
+ "epoch": 46.04571428571428,
+ "grad_norm": 60.67522048950195,
+ "learning_rate": 4.393650793650794e-06,
+ "loss": 0.1191,
+ "step": 8058
+ },
+ {
+ "epoch": 46.05142857142857,
+ "grad_norm": 36.21040725708008,
+ "learning_rate": 4.387301587301588e-06,
+ "loss": 0.1049,
+ "step": 8059
+ },
+ {
+ "epoch": 46.05714285714286,
+ "grad_norm": 25.05276107788086,
+ "learning_rate": 4.3809523809523815e-06,
+ "loss": 0.142,
+ "step": 8060
+ },
+ {
+ "epoch": 46.06285714285714,
+ "grad_norm": 37.21646499633789,
+ "learning_rate": 4.374603174603175e-06,
+ "loss": 0.1007,
+ "step": 8061
+ },
+ {
+ "epoch": 46.06857142857143,
+ "grad_norm": 92.95510864257812,
+ "learning_rate": 4.3682539682539685e-06,
+ "loss": 0.1653,
+ "step": 8062
+ },
+ {
+ "epoch": 46.074285714285715,
+ "grad_norm": 45.78368377685547,
+ "learning_rate": 4.361904761904762e-06,
+ "loss": 0.0935,
+ "step": 8063
+ },
+ {
+ "epoch": 46.08,
+ "grad_norm": 78.9537582397461,
+ "learning_rate": 4.3555555555555555e-06,
+ "loss": 0.1634,
+ "step": 8064
+ },
+ {
+ "epoch": 46.08571428571429,
+ "grad_norm": 47.754878997802734,
+ "learning_rate": 4.34920634920635e-06,
+ "loss": 0.1306,
+ "step": 8065
+ },
+ {
+ "epoch": 46.09142857142857,
+ "grad_norm": 38.64006805419922,
+ "learning_rate": 4.342857142857143e-06,
+ "loss": 0.1382,
+ "step": 8066
+ },
+ {
+ "epoch": 46.097142857142856,
+ "grad_norm": 47.222618103027344,
+ "learning_rate": 4.336507936507937e-06,
+ "loss": 0.1524,
+ "step": 8067
+ },
+ {
+ "epoch": 46.10285714285714,
+ "grad_norm": 28.96839714050293,
+ "learning_rate": 4.33015873015873e-06,
+ "loss": 0.138,
+ "step": 8068
+ },
+ {
+ "epoch": 46.10857142857143,
+ "grad_norm": 28.85514259338379,
+ "learning_rate": 4.323809523809524e-06,
+ "loss": 0.1345,
+ "step": 8069
+ },
+ {
+ "epoch": 46.114285714285714,
+ "grad_norm": 120.97235107421875,
+ "learning_rate": 4.317460317460318e-06,
+ "loss": 0.2,
+ "step": 8070
+ },
+ {
+ "epoch": 46.12,
+ "grad_norm": 45.22092056274414,
+ "learning_rate": 4.3111111111111115e-06,
+ "loss": 0.1386,
+ "step": 8071
+ },
+ {
+ "epoch": 46.12571428571429,
+ "grad_norm": 17.343658447265625,
+ "learning_rate": 4.304761904761905e-06,
+ "loss": 0.1392,
+ "step": 8072
+ },
+ {
+ "epoch": 46.13142857142857,
+ "grad_norm": 55.66539001464844,
+ "learning_rate": 4.2984126984126985e-06,
+ "loss": 0.1834,
+ "step": 8073
+ },
+ {
+ "epoch": 46.137142857142855,
+ "grad_norm": 65.15888977050781,
+ "learning_rate": 4.292063492063492e-06,
+ "loss": 0.1463,
+ "step": 8074
+ },
+ {
+ "epoch": 46.142857142857146,
+ "grad_norm": 41.014129638671875,
+ "learning_rate": 4.285714285714286e-06,
+ "loss": 0.1523,
+ "step": 8075
+ },
+ {
+ "epoch": 46.14857142857143,
+ "grad_norm": 30.696699142456055,
+ "learning_rate": 4.27936507936508e-06,
+ "loss": 0.1527,
+ "step": 8076
+ },
+ {
+ "epoch": 46.15428571428571,
+ "grad_norm": 35.69332504272461,
+ "learning_rate": 4.273015873015873e-06,
+ "loss": 0.1674,
+ "step": 8077
+ },
+ {
+ "epoch": 46.16,
+ "grad_norm": 35.99774932861328,
+ "learning_rate": 4.266666666666667e-06,
+ "loss": 0.1502,
+ "step": 8078
+ },
+ {
+ "epoch": 46.16571428571429,
+ "grad_norm": 106.03459167480469,
+ "learning_rate": 4.26031746031746e-06,
+ "loss": 0.116,
+ "step": 8079
+ },
+ {
+ "epoch": 46.17142857142857,
+ "grad_norm": 39.56972885131836,
+ "learning_rate": 4.253968253968254e-06,
+ "loss": 0.4655,
+ "step": 8080
+ },
+ {
+ "epoch": 46.177142857142854,
+ "grad_norm": 17.103309631347656,
+ "learning_rate": 4.247619047619047e-06,
+ "loss": 0.1246,
+ "step": 8081
+ },
+ {
+ "epoch": 46.182857142857145,
+ "grad_norm": 23.25718879699707,
+ "learning_rate": 4.2412698412698415e-06,
+ "loss": 0.1346,
+ "step": 8082
+ },
+ {
+ "epoch": 46.18857142857143,
+ "grad_norm": 2234.8271484375,
+ "learning_rate": 4.234920634920635e-06,
+ "loss": 0.2929,
+ "step": 8083
+ },
+ {
+ "epoch": 46.19428571428571,
+ "grad_norm": 123.91931915283203,
+ "learning_rate": 4.228571428571429e-06,
+ "loss": 0.1155,
+ "step": 8084
+ },
+ {
+ "epoch": 46.2,
+ "grad_norm": 22.70152473449707,
+ "learning_rate": 4.222222222222223e-06,
+ "loss": 0.132,
+ "step": 8085
+ },
+ {
+ "epoch": 46.205714285714286,
+ "grad_norm": 35.118003845214844,
+ "learning_rate": 4.215873015873016e-06,
+ "loss": 0.13,
+ "step": 8086
+ },
+ {
+ "epoch": 46.21142857142857,
+ "grad_norm": 38.66529083251953,
+ "learning_rate": 4.20952380952381e-06,
+ "loss": 0.1394,
+ "step": 8087
+ },
+ {
+ "epoch": 46.21714285714286,
+ "grad_norm": 45.83235168457031,
+ "learning_rate": 4.203174603174603e-06,
+ "loss": 0.0785,
+ "step": 8088
+ },
+ {
+ "epoch": 46.222857142857144,
+ "grad_norm": 28.21929359436035,
+ "learning_rate": 4.196825396825397e-06,
+ "loss": 0.1272,
+ "step": 8089
+ },
+ {
+ "epoch": 46.22857142857143,
+ "grad_norm": 27.014911651611328,
+ "learning_rate": 4.19047619047619e-06,
+ "loss": 0.1115,
+ "step": 8090
+ },
+ {
+ "epoch": 46.23428571428571,
+ "grad_norm": 70.28314208984375,
+ "learning_rate": 4.184126984126984e-06,
+ "loss": 0.1855,
+ "step": 8091
+ },
+ {
+ "epoch": 46.24,
+ "grad_norm": 97.12055206298828,
+ "learning_rate": 4.177777777777778e-06,
+ "loss": 0.2051,
+ "step": 8092
+ },
+ {
+ "epoch": 46.245714285714286,
+ "grad_norm": 92.34823608398438,
+ "learning_rate": 4.1714285714285715e-06,
+ "loss": 0.1157,
+ "step": 8093
+ },
+ {
+ "epoch": 46.25142857142857,
+ "grad_norm": 30.1092586517334,
+ "learning_rate": 4.165079365079366e-06,
+ "loss": 0.1308,
+ "step": 8094
+ },
+ {
+ "epoch": 46.25714285714286,
+ "grad_norm": 60.77410888671875,
+ "learning_rate": 4.158730158730159e-06,
+ "loss": 0.1948,
+ "step": 8095
+ },
+ {
+ "epoch": 46.26285714285714,
+ "grad_norm": 54.656578063964844,
+ "learning_rate": 4.152380952380953e-06,
+ "loss": 0.1284,
+ "step": 8096
+ },
+ {
+ "epoch": 46.26857142857143,
+ "grad_norm": 18.921110153198242,
+ "learning_rate": 4.146031746031746e-06,
+ "loss": 0.1085,
+ "step": 8097
+ },
+ {
+ "epoch": 46.27428571428572,
+ "grad_norm": 32.94758605957031,
+ "learning_rate": 4.13968253968254e-06,
+ "loss": 0.1655,
+ "step": 8098
+ },
+ {
+ "epoch": 46.28,
+ "grad_norm": 52.38801193237305,
+ "learning_rate": 4.133333333333333e-06,
+ "loss": 0.0871,
+ "step": 8099
+ },
+ {
+ "epoch": 46.285714285714285,
+ "grad_norm": 60.48457336425781,
+ "learning_rate": 4.126984126984127e-06,
+ "loss": 0.1785,
+ "step": 8100
+ },
+ {
+ "epoch": 46.29142857142857,
+ "grad_norm": 21.47099494934082,
+ "learning_rate": 4.120634920634921e-06,
+ "loss": 0.093,
+ "step": 8101
+ },
+ {
+ "epoch": 46.29714285714286,
+ "grad_norm": 47.40420150756836,
+ "learning_rate": 4.114285714285715e-06,
+ "loss": 0.1423,
+ "step": 8102
+ },
+ {
+ "epoch": 46.30285714285714,
+ "grad_norm": 27.537425994873047,
+ "learning_rate": 4.107936507936508e-06,
+ "loss": 0.125,
+ "step": 8103
+ },
+ {
+ "epoch": 46.308571428571426,
+ "grad_norm": 25.858352661132812,
+ "learning_rate": 4.1015873015873015e-06,
+ "loss": 0.1388,
+ "step": 8104
+ },
+ {
+ "epoch": 46.31428571428572,
+ "grad_norm": 37.65393829345703,
+ "learning_rate": 4.095238095238096e-06,
+ "loss": 0.1413,
+ "step": 8105
+ },
+ {
+ "epoch": 46.32,
+ "grad_norm": 34.300838470458984,
+ "learning_rate": 4.088888888888889e-06,
+ "loss": 0.1718,
+ "step": 8106
+ },
+ {
+ "epoch": 46.325714285714284,
+ "grad_norm": 225.12020874023438,
+ "learning_rate": 4.082539682539683e-06,
+ "loss": 0.1327,
+ "step": 8107
+ },
+ {
+ "epoch": 46.331428571428575,
+ "grad_norm": 91.56395721435547,
+ "learning_rate": 4.076190476190476e-06,
+ "loss": 0.1801,
+ "step": 8108
+ },
+ {
+ "epoch": 46.33714285714286,
+ "grad_norm": 146.67095947265625,
+ "learning_rate": 4.06984126984127e-06,
+ "loss": 0.1194,
+ "step": 8109
+ },
+ {
+ "epoch": 46.34285714285714,
+ "grad_norm": 27.21466827392578,
+ "learning_rate": 4.063492063492063e-06,
+ "loss": 0.1009,
+ "step": 8110
+ },
+ {
+ "epoch": 46.348571428571425,
+ "grad_norm": 57.456424713134766,
+ "learning_rate": 4.057142857142858e-06,
+ "loss": 0.1041,
+ "step": 8111
+ },
+ {
+ "epoch": 46.354285714285716,
+ "grad_norm": 51.558509826660156,
+ "learning_rate": 4.050793650793651e-06,
+ "loss": 0.1208,
+ "step": 8112
+ },
+ {
+ "epoch": 46.36,
+ "grad_norm": 72.62211608886719,
+ "learning_rate": 4.044444444444445e-06,
+ "loss": 0.2069,
+ "step": 8113
+ },
+ {
+ "epoch": 46.36571428571428,
+ "grad_norm": 385.8822326660156,
+ "learning_rate": 4.038095238095238e-06,
+ "loss": 0.1599,
+ "step": 8114
+ },
+ {
+ "epoch": 46.371428571428574,
+ "grad_norm": 30.5274715423584,
+ "learning_rate": 4.0317460317460316e-06,
+ "loss": 0.1327,
+ "step": 8115
+ },
+ {
+ "epoch": 46.37714285714286,
+ "grad_norm": 40.12117385864258,
+ "learning_rate": 4.025396825396826e-06,
+ "loss": 0.2773,
+ "step": 8116
+ },
+ {
+ "epoch": 46.38285714285714,
+ "grad_norm": 33.24130630493164,
+ "learning_rate": 4.019047619047619e-06,
+ "loss": 0.1292,
+ "step": 8117
+ },
+ {
+ "epoch": 46.38857142857143,
+ "grad_norm": 26.06099510192871,
+ "learning_rate": 4.012698412698413e-06,
+ "loss": 0.0963,
+ "step": 8118
+ },
+ {
+ "epoch": 46.394285714285715,
+ "grad_norm": 25.47860336303711,
+ "learning_rate": 4.006349206349206e-06,
+ "loss": 0.1537,
+ "step": 8119
+ },
+ {
+ "epoch": 46.4,
+ "grad_norm": 14.787314414978027,
+ "learning_rate": 4.000000000000001e-06,
+ "loss": 0.1415,
+ "step": 8120
+ },
+ {
+ "epoch": 46.40571428571428,
+ "grad_norm": 71.64945220947266,
+ "learning_rate": 3.993650793650794e-06,
+ "loss": 0.1475,
+ "step": 8121
+ },
+ {
+ "epoch": 46.41142857142857,
+ "grad_norm": 45.85343933105469,
+ "learning_rate": 3.987301587301588e-06,
+ "loss": 0.1218,
+ "step": 8122
+ },
+ {
+ "epoch": 46.417142857142856,
+ "grad_norm": 22.669567108154297,
+ "learning_rate": 3.980952380952381e-06,
+ "loss": 0.1365,
+ "step": 8123
+ },
+ {
+ "epoch": 46.42285714285714,
+ "grad_norm": 61.678863525390625,
+ "learning_rate": 3.974603174603175e-06,
+ "loss": 0.2096,
+ "step": 8124
+ },
+ {
+ "epoch": 46.42857142857143,
+ "grad_norm": 40.377811431884766,
+ "learning_rate": 3.968253968253968e-06,
+ "loss": 0.1392,
+ "step": 8125
+ },
+ {
+ "epoch": 46.434285714285714,
+ "grad_norm": 76.2131576538086,
+ "learning_rate": 3.9619047619047616e-06,
+ "loss": 0.1054,
+ "step": 8126
+ },
+ {
+ "epoch": 46.44,
+ "grad_norm": 52.06477737426758,
+ "learning_rate": 3.955555555555555e-06,
+ "loss": 0.1174,
+ "step": 8127
+ },
+ {
+ "epoch": 46.44571428571429,
+ "grad_norm": 111.36573028564453,
+ "learning_rate": 3.949206349206349e-06,
+ "loss": 0.1364,
+ "step": 8128
+ },
+ {
+ "epoch": 46.45142857142857,
+ "grad_norm": 35.40325164794922,
+ "learning_rate": 3.942857142857143e-06,
+ "loss": 0.1621,
+ "step": 8129
+ },
+ {
+ "epoch": 46.457142857142856,
+ "grad_norm": 25.186216354370117,
+ "learning_rate": 3.936507936507937e-06,
+ "loss": 0.1325,
+ "step": 8130
+ },
+ {
+ "epoch": 46.462857142857146,
+ "grad_norm": 38.83059310913086,
+ "learning_rate": 3.930158730158731e-06,
+ "loss": 0.1178,
+ "step": 8131
+ },
+ {
+ "epoch": 46.46857142857143,
+ "grad_norm": 29.10702896118164,
+ "learning_rate": 3.923809523809524e-06,
+ "loss": 0.0836,
+ "step": 8132
+ },
+ {
+ "epoch": 46.47428571428571,
+ "grad_norm": 23.07155990600586,
+ "learning_rate": 3.917460317460318e-06,
+ "loss": 0.1087,
+ "step": 8133
+ },
+ {
+ "epoch": 46.48,
+ "grad_norm": 60.923797607421875,
+ "learning_rate": 3.911111111111111e-06,
+ "loss": 0.1187,
+ "step": 8134
+ },
+ {
+ "epoch": 46.48571428571429,
+ "grad_norm": 652.712158203125,
+ "learning_rate": 3.904761904761905e-06,
+ "loss": 0.1137,
+ "step": 8135
+ },
+ {
+ "epoch": 46.49142857142857,
+ "grad_norm": 55.697235107421875,
+ "learning_rate": 3.898412698412698e-06,
+ "loss": 0.1279,
+ "step": 8136
+ },
+ {
+ "epoch": 46.497142857142855,
+ "grad_norm": 26.30288314819336,
+ "learning_rate": 3.892063492063492e-06,
+ "loss": 0.1884,
+ "step": 8137
+ },
+ {
+ "epoch": 46.502857142857145,
+ "grad_norm": 70.62115478515625,
+ "learning_rate": 3.885714285714286e-06,
+ "loss": 0.1151,
+ "step": 8138
+ },
+ {
+ "epoch": 46.50857142857143,
+ "grad_norm": 30.58101463317871,
+ "learning_rate": 3.87936507936508e-06,
+ "loss": 0.1007,
+ "step": 8139
+ },
+ {
+ "epoch": 46.51428571428571,
+ "grad_norm": 24.60630989074707,
+ "learning_rate": 3.873015873015874e-06,
+ "loss": 0.1614,
+ "step": 8140
+ },
+ {
+ "epoch": 46.52,
+ "grad_norm": 1553.7259521484375,
+ "learning_rate": 3.866666666666667e-06,
+ "loss": 0.1584,
+ "step": 8141
+ },
+ {
+ "epoch": 46.52571428571429,
+ "grad_norm": 42.48031997680664,
+ "learning_rate": 3.860317460317461e-06,
+ "loss": 0.123,
+ "step": 8142
+ },
+ {
+ "epoch": 46.53142857142857,
+ "grad_norm": 49.869937896728516,
+ "learning_rate": 3.853968253968254e-06,
+ "loss": 0.112,
+ "step": 8143
+ },
+ {
+ "epoch": 46.537142857142854,
+ "grad_norm": 23.666994094848633,
+ "learning_rate": 3.847619047619048e-06,
+ "loss": 0.1292,
+ "step": 8144
+ },
+ {
+ "epoch": 46.542857142857144,
+ "grad_norm": 27.957948684692383,
+ "learning_rate": 3.841269841269841e-06,
+ "loss": 0.1092,
+ "step": 8145
+ },
+ {
+ "epoch": 46.54857142857143,
+ "grad_norm": 45.88031768798828,
+ "learning_rate": 3.834920634920635e-06,
+ "loss": 0.142,
+ "step": 8146
+ },
+ {
+ "epoch": 46.55428571428571,
+ "grad_norm": 52.62200927734375,
+ "learning_rate": 3.828571428571429e-06,
+ "loss": 0.1337,
+ "step": 8147
+ },
+ {
+ "epoch": 46.56,
+ "grad_norm": 24.335599899291992,
+ "learning_rate": 3.8222222222222224e-06,
+ "loss": 0.1494,
+ "step": 8148
+ },
+ {
+ "epoch": 46.565714285714286,
+ "grad_norm": 43.13810729980469,
+ "learning_rate": 3.815873015873016e-06,
+ "loss": 0.1263,
+ "step": 8149
+ },
+ {
+ "epoch": 46.57142857142857,
+ "grad_norm": 24.405460357666016,
+ "learning_rate": 3.8095238095238102e-06,
+ "loss": 0.15,
+ "step": 8150
+ },
+ {
+ "epoch": 46.57714285714286,
+ "grad_norm": 54.83547592163086,
+ "learning_rate": 3.8031746031746037e-06,
+ "loss": 0.1236,
+ "step": 8151
+ },
+ {
+ "epoch": 46.582857142857144,
+ "grad_norm": 52.128448486328125,
+ "learning_rate": 3.796825396825397e-06,
+ "loss": 0.1105,
+ "step": 8152
+ },
+ {
+ "epoch": 46.58857142857143,
+ "grad_norm": 14.59903621673584,
+ "learning_rate": 3.7904761904761907e-06,
+ "loss": 0.1386,
+ "step": 8153
+ },
+ {
+ "epoch": 46.59428571428572,
+ "grad_norm": 39.54267883300781,
+ "learning_rate": 3.7841269841269846e-06,
+ "loss": 0.1793,
+ "step": 8154
+ },
+ {
+ "epoch": 46.6,
+ "grad_norm": 41.793216705322266,
+ "learning_rate": 3.777777777777778e-06,
+ "loss": 0.1349,
+ "step": 8155
+ },
+ {
+ "epoch": 46.605714285714285,
+ "grad_norm": 44.470699310302734,
+ "learning_rate": 3.7714285714285716e-06,
+ "loss": 0.1634,
+ "step": 8156
+ },
+ {
+ "epoch": 46.61142857142857,
+ "grad_norm": 21.93581199645996,
+ "learning_rate": 3.765079365079365e-06,
+ "loss": 0.1589,
+ "step": 8157
+ },
+ {
+ "epoch": 46.61714285714286,
+ "grad_norm": 69.88360595703125,
+ "learning_rate": 3.758730158730159e-06,
+ "loss": 0.2723,
+ "step": 8158
+ },
+ {
+ "epoch": 46.62285714285714,
+ "grad_norm": 21.550399780273438,
+ "learning_rate": 3.7523809523809524e-06,
+ "loss": 0.1487,
+ "step": 8159
+ },
+ {
+ "epoch": 46.628571428571426,
+ "grad_norm": 65.6437759399414,
+ "learning_rate": 3.746031746031746e-06,
+ "loss": 0.123,
+ "step": 8160
+ },
+ {
+ "epoch": 46.63428571428572,
+ "grad_norm": 13.96087646484375,
+ "learning_rate": 3.7396825396825394e-06,
+ "loss": 0.1618,
+ "step": 8161
+ },
+ {
+ "epoch": 46.64,
+ "grad_norm": 31.26278305053711,
+ "learning_rate": 3.7333333333333337e-06,
+ "loss": 0.1606,
+ "step": 8162
+ },
+ {
+ "epoch": 46.645714285714284,
+ "grad_norm": 44.1922721862793,
+ "learning_rate": 3.7269841269841272e-06,
+ "loss": 0.1782,
+ "step": 8163
+ },
+ {
+ "epoch": 46.651428571428575,
+ "grad_norm": 55.542381286621094,
+ "learning_rate": 3.720634920634921e-06,
+ "loss": 0.1617,
+ "step": 8164
+ },
+ {
+ "epoch": 46.65714285714286,
+ "grad_norm": 26.718368530273438,
+ "learning_rate": 3.7142857142857146e-06,
+ "loss": 0.118,
+ "step": 8165
+ },
+ {
+ "epoch": 46.66285714285714,
+ "grad_norm": 26.83867835998535,
+ "learning_rate": 3.707936507936508e-06,
+ "loss": 0.1379,
+ "step": 8166
+ },
+ {
+ "epoch": 46.668571428571425,
+ "grad_norm": 72.89448547363281,
+ "learning_rate": 3.7015873015873016e-06,
+ "loss": 0.1193,
+ "step": 8167
+ },
+ {
+ "epoch": 46.674285714285716,
+ "grad_norm": 30.765777587890625,
+ "learning_rate": 3.6952380952380955e-06,
+ "loss": 0.1249,
+ "step": 8168
+ },
+ {
+ "epoch": 46.68,
+ "grad_norm": 23.857946395874023,
+ "learning_rate": 3.688888888888889e-06,
+ "loss": 0.1176,
+ "step": 8169
+ },
+ {
+ "epoch": 46.68571428571428,
+ "grad_norm": 54.70825958251953,
+ "learning_rate": 3.6825396825396824e-06,
+ "loss": 0.2107,
+ "step": 8170
+ },
+ {
+ "epoch": 46.691428571428574,
+ "grad_norm": 36.463069915771484,
+ "learning_rate": 3.676190476190476e-06,
+ "loss": 0.0937,
+ "step": 8171
+ },
+ {
+ "epoch": 46.69714285714286,
+ "grad_norm": 24.24658203125,
+ "learning_rate": 3.66984126984127e-06,
+ "loss": 0.1428,
+ "step": 8172
+ },
+ {
+ "epoch": 46.70285714285714,
+ "grad_norm": 32.16752243041992,
+ "learning_rate": 3.663492063492064e-06,
+ "loss": 0.1049,
+ "step": 8173
+ },
+ {
+ "epoch": 46.70857142857143,
+ "grad_norm": 21.37236976623535,
+ "learning_rate": 3.6571428571428576e-06,
+ "loss": 0.1869,
+ "step": 8174
+ },
+ {
+ "epoch": 46.714285714285715,
+ "grad_norm": 50.4166145324707,
+ "learning_rate": 3.650793650793651e-06,
+ "loss": 0.1869,
+ "step": 8175
+ },
+ {
+ "epoch": 46.72,
+ "grad_norm": 1364.17138671875,
+ "learning_rate": 3.6444444444444446e-06,
+ "loss": 0.2007,
+ "step": 8176
+ },
+ {
+ "epoch": 46.72571428571428,
+ "grad_norm": 53.10227584838867,
+ "learning_rate": 3.6380952380952385e-06,
+ "loss": 0.136,
+ "step": 8177
+ },
+ {
+ "epoch": 46.73142857142857,
+ "grad_norm": 27.524919509887695,
+ "learning_rate": 3.631746031746032e-06,
+ "loss": 0.1336,
+ "step": 8178
+ },
+ {
+ "epoch": 46.73714285714286,
+ "grad_norm": 35.467166900634766,
+ "learning_rate": 3.6253968253968255e-06,
+ "loss": 0.133,
+ "step": 8179
+ },
+ {
+ "epoch": 46.74285714285714,
+ "grad_norm": 83.05083465576172,
+ "learning_rate": 3.619047619047619e-06,
+ "loss": 0.1201,
+ "step": 8180
+ },
+ {
+ "epoch": 46.74857142857143,
+ "grad_norm": 24.13372230529785,
+ "learning_rate": 3.612698412698413e-06,
+ "loss": 0.2466,
+ "step": 8181
+ },
+ {
+ "epoch": 46.754285714285714,
+ "grad_norm": 1623.7891845703125,
+ "learning_rate": 3.6063492063492064e-06,
+ "loss": 0.1579,
+ "step": 8182
+ },
+ {
+ "epoch": 46.76,
+ "grad_norm": 42.3116340637207,
+ "learning_rate": 3.6e-06,
+ "loss": 0.1609,
+ "step": 8183
+ },
+ {
+ "epoch": 46.76571428571429,
+ "grad_norm": 63.3863525390625,
+ "learning_rate": 3.5936507936507933e-06,
+ "loss": 0.178,
+ "step": 8184
+ },
+ {
+ "epoch": 46.77142857142857,
+ "grad_norm": 21.229982376098633,
+ "learning_rate": 3.5873015873015877e-06,
+ "loss": 0.1605,
+ "step": 8185
+ },
+ {
+ "epoch": 46.777142857142856,
+ "grad_norm": 23.970304489135742,
+ "learning_rate": 3.5809523809523816e-06,
+ "loss": 0.1469,
+ "step": 8186
+ },
+ {
+ "epoch": 46.78285714285714,
+ "grad_norm": 44.26129913330078,
+ "learning_rate": 3.574603174603175e-06,
+ "loss": 0.226,
+ "step": 8187
+ },
+ {
+ "epoch": 46.78857142857143,
+ "grad_norm": 97.34606170654297,
+ "learning_rate": 3.5682539682539685e-06,
+ "loss": 0.1419,
+ "step": 8188
+ },
+ {
+ "epoch": 46.794285714285714,
+ "grad_norm": 59.434635162353516,
+ "learning_rate": 3.561904761904762e-06,
+ "loss": 0.2077,
+ "step": 8189
+ },
+ {
+ "epoch": 46.8,
+ "grad_norm": 51.30827331542969,
+ "learning_rate": 3.555555555555556e-06,
+ "loss": 0.1433,
+ "step": 8190
+ },
+ {
+ "epoch": 46.80571428571429,
+ "grad_norm": 1357.738525390625,
+ "learning_rate": 3.5492063492063494e-06,
+ "loss": 0.1907,
+ "step": 8191
+ },
+ {
+ "epoch": 46.81142857142857,
+ "grad_norm": 59.56105041503906,
+ "learning_rate": 3.542857142857143e-06,
+ "loss": 0.1449,
+ "step": 8192
+ },
+ {
+ "epoch": 46.817142857142855,
+ "grad_norm": 45.39702606201172,
+ "learning_rate": 3.5365079365079364e-06,
+ "loss": 0.1678,
+ "step": 8193
+ },
+ {
+ "epoch": 46.822857142857146,
+ "grad_norm": 34.24027633666992,
+ "learning_rate": 3.5301587301587303e-06,
+ "loss": 0.1231,
+ "step": 8194
+ },
+ {
+ "epoch": 46.82857142857143,
+ "grad_norm": 1358.833251953125,
+ "learning_rate": 3.5238095238095238e-06,
+ "loss": 0.1588,
+ "step": 8195
+ },
+ {
+ "epoch": 46.83428571428571,
+ "grad_norm": 51.168968200683594,
+ "learning_rate": 3.517460317460318e-06,
+ "loss": 0.1441,
+ "step": 8196
+ },
+ {
+ "epoch": 46.84,
+ "grad_norm": 29.741315841674805,
+ "learning_rate": 3.5111111111111116e-06,
+ "loss": 0.1355,
+ "step": 8197
+ },
+ {
+ "epoch": 46.84571428571429,
+ "grad_norm": 30.0971736907959,
+ "learning_rate": 3.504761904761905e-06,
+ "loss": 0.1656,
+ "step": 8198
+ },
+ {
+ "epoch": 46.85142857142857,
+ "grad_norm": 15.887604713439941,
+ "learning_rate": 3.4984126984126985e-06,
+ "loss": 0.0932,
+ "step": 8199
+ },
+ {
+ "epoch": 46.857142857142854,
+ "grad_norm": 88.35197448730469,
+ "learning_rate": 3.4920634920634924e-06,
+ "loss": 0.1495,
+ "step": 8200
+ },
+ {
+ "epoch": 46.862857142857145,
+ "grad_norm": 553.6282348632812,
+ "learning_rate": 3.485714285714286e-06,
+ "loss": 0.1997,
+ "step": 8201
+ },
+ {
+ "epoch": 46.86857142857143,
+ "grad_norm": 39.761356353759766,
+ "learning_rate": 3.4793650793650794e-06,
+ "loss": 0.1478,
+ "step": 8202
+ },
+ {
+ "epoch": 46.87428571428571,
+ "grad_norm": 62.88118362426758,
+ "learning_rate": 3.473015873015873e-06,
+ "loss": 0.1478,
+ "step": 8203
+ },
+ {
+ "epoch": 46.88,
+ "grad_norm": 41.82820510864258,
+ "learning_rate": 3.466666666666667e-06,
+ "loss": 0.1475,
+ "step": 8204
+ },
+ {
+ "epoch": 46.885714285714286,
+ "grad_norm": 24.58503532409668,
+ "learning_rate": 3.4603174603174603e-06,
+ "loss": 0.1154,
+ "step": 8205
+ },
+ {
+ "epoch": 46.89142857142857,
+ "grad_norm": 750.5741577148438,
+ "learning_rate": 3.4539682539682538e-06,
+ "loss": 0.1468,
+ "step": 8206
+ },
+ {
+ "epoch": 46.89714285714286,
+ "grad_norm": 38.60437774658203,
+ "learning_rate": 3.4476190476190472e-06,
+ "loss": 0.1349,
+ "step": 8207
+ },
+ {
+ "epoch": 46.902857142857144,
+ "grad_norm": 55.603458404541016,
+ "learning_rate": 3.4412698412698416e-06,
+ "loss": 0.1764,
+ "step": 8208
+ },
+ {
+ "epoch": 46.90857142857143,
+ "grad_norm": 22.69065284729004,
+ "learning_rate": 3.4349206349206355e-06,
+ "loss": 0.1271,
+ "step": 8209
+ },
+ {
+ "epoch": 46.91428571428571,
+ "grad_norm": 75.1495361328125,
+ "learning_rate": 3.428571428571429e-06,
+ "loss": 0.1061,
+ "step": 8210
+ },
+ {
+ "epoch": 46.92,
+ "grad_norm": 462.46527099609375,
+ "learning_rate": 3.4222222222222224e-06,
+ "loss": 0.1508,
+ "step": 8211
+ },
+ {
+ "epoch": 46.925714285714285,
+ "grad_norm": 45.36282730102539,
+ "learning_rate": 3.415873015873016e-06,
+ "loss": 0.168,
+ "step": 8212
+ },
+ {
+ "epoch": 46.93142857142857,
+ "grad_norm": 45.03870391845703,
+ "learning_rate": 3.40952380952381e-06,
+ "loss": 0.1222,
+ "step": 8213
+ },
+ {
+ "epoch": 46.93714285714286,
+ "grad_norm": 25.038911819458008,
+ "learning_rate": 3.4031746031746033e-06,
+ "loss": 0.2271,
+ "step": 8214
+ },
+ {
+ "epoch": 46.94285714285714,
+ "grad_norm": 31.28525733947754,
+ "learning_rate": 3.396825396825397e-06,
+ "loss": 0.1915,
+ "step": 8215
+ },
+ {
+ "epoch": 46.94857142857143,
+ "grad_norm": 63.485836029052734,
+ "learning_rate": 3.3904761904761903e-06,
+ "loss": 0.1574,
+ "step": 8216
+ },
+ {
+ "epoch": 46.95428571428572,
+ "grad_norm": 22.424684524536133,
+ "learning_rate": 3.384126984126984e-06,
+ "loss": 0.1187,
+ "step": 8217
+ },
+ {
+ "epoch": 46.96,
+ "grad_norm": 32.10712814331055,
+ "learning_rate": 3.3777777777777777e-06,
+ "loss": 0.1317,
+ "step": 8218
+ },
+ {
+ "epoch": 46.965714285714284,
+ "grad_norm": 30.546588897705078,
+ "learning_rate": 3.371428571428572e-06,
+ "loss": 0.2055,
+ "step": 8219
+ },
+ {
+ "epoch": 46.97142857142857,
+ "grad_norm": 34.96418380737305,
+ "learning_rate": 3.3650793650793655e-06,
+ "loss": 0.136,
+ "step": 8220
+ },
+ {
+ "epoch": 46.97714285714286,
+ "grad_norm": 65.4540023803711,
+ "learning_rate": 3.358730158730159e-06,
+ "loss": 0.1742,
+ "step": 8221
+ },
+ {
+ "epoch": 46.98285714285714,
+ "grad_norm": 45.166603088378906,
+ "learning_rate": 3.352380952380953e-06,
+ "loss": 0.1549,
+ "step": 8222
+ },
+ {
+ "epoch": 46.988571428571426,
+ "grad_norm": 37.14235305786133,
+ "learning_rate": 3.3460317460317464e-06,
+ "loss": 0.0934,
+ "step": 8223
+ },
+ {
+ "epoch": 46.994285714285716,
+ "grad_norm": 50.102142333984375,
+ "learning_rate": 3.33968253968254e-06,
+ "loss": 0.171,
+ "step": 8224
+ },
+ {
+ "epoch": 47.0,
+ "grad_norm": 46.10976791381836,
+ "learning_rate": 3.3333333333333333e-06,
+ "loss": 0.1926,
+ "step": 8225
+ },
+ {
+ "epoch": 47.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5867149829864502,
+ "eval_map": 0.9388,
+ "eval_map_50": 0.97,
+ "eval_map_75": 0.9642,
+ "eval_map_large": 0.9388,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9388,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7911,
+ "eval_mar_10": 0.9752,
+ "eval_mar_100": 0.9765,
+ "eval_mar_100_per_class": 0.9765,
+ "eval_mar_large": 0.9765,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 12.8376,
+ "eval_samples_per_second": 22.901,
+ "eval_steps_per_second": 2.882,
+ "step": 8225
+ },
+ {
+ "epoch": 47.005714285714284,
+ "grad_norm": 831.6156616210938,
+ "learning_rate": 3.3269841269841272e-06,
+ "loss": 0.1533,
+ "step": 8226
+ },
+ {
+ "epoch": 47.011428571428574,
+ "grad_norm": 48.87065887451172,
+ "learning_rate": 3.3206349206349207e-06,
+ "loss": 0.1053,
+ "step": 8227
+ },
+ {
+ "epoch": 47.01714285714286,
+ "grad_norm": 63.857749938964844,
+ "learning_rate": 3.314285714285714e-06,
+ "loss": 0.2056,
+ "step": 8228
+ },
+ {
+ "epoch": 47.02285714285714,
+ "grad_norm": 35.554115295410156,
+ "learning_rate": 3.3079365079365077e-06,
+ "loss": 0.1378,
+ "step": 8229
+ },
+ {
+ "epoch": 47.02857142857143,
+ "grad_norm": 45.24153137207031,
+ "learning_rate": 3.301587301587302e-06,
+ "loss": 0.1045,
+ "step": 8230
+ },
+ {
+ "epoch": 47.034285714285716,
+ "grad_norm": 20.985979080200195,
+ "learning_rate": 3.2952380952380955e-06,
+ "loss": 0.1071,
+ "step": 8231
+ },
+ {
+ "epoch": 47.04,
+ "grad_norm": 36.83102798461914,
+ "learning_rate": 3.2888888888888894e-06,
+ "loss": 0.1437,
+ "step": 8232
+ },
+ {
+ "epoch": 47.04571428571428,
+ "grad_norm": 94.6207504272461,
+ "learning_rate": 3.282539682539683e-06,
+ "loss": 0.173,
+ "step": 8233
+ },
+ {
+ "epoch": 47.05142857142857,
+ "grad_norm": 25.87105369567871,
+ "learning_rate": 3.2761904761904764e-06,
+ "loss": 0.13,
+ "step": 8234
+ },
+ {
+ "epoch": 47.05714285714286,
+ "grad_norm": 27.784732818603516,
+ "learning_rate": 3.26984126984127e-06,
+ "loss": 0.1602,
+ "step": 8235
+ },
+ {
+ "epoch": 47.06285714285714,
+ "grad_norm": 27.864486694335938,
+ "learning_rate": 3.2634920634920638e-06,
+ "loss": 0.1795,
+ "step": 8236
+ },
+ {
+ "epoch": 47.06857142857143,
+ "grad_norm": 36.08664321899414,
+ "learning_rate": 3.2571428571428572e-06,
+ "loss": 0.1531,
+ "step": 8237
+ },
+ {
+ "epoch": 47.074285714285715,
+ "grad_norm": 79.20808410644531,
+ "learning_rate": 3.2507936507936507e-06,
+ "loss": 0.1488,
+ "step": 8238
+ },
+ {
+ "epoch": 47.08,
+ "grad_norm": 60.46731185913086,
+ "learning_rate": 3.244444444444444e-06,
+ "loss": 0.1718,
+ "step": 8239
+ },
+ {
+ "epoch": 47.08571428571429,
+ "grad_norm": 27.0144100189209,
+ "learning_rate": 3.238095238095238e-06,
+ "loss": 0.145,
+ "step": 8240
+ },
+ {
+ "epoch": 47.09142857142857,
+ "grad_norm": 49.7216682434082,
+ "learning_rate": 3.2317460317460316e-06,
+ "loss": 0.1116,
+ "step": 8241
+ },
+ {
+ "epoch": 47.097142857142856,
+ "grad_norm": 38.72159957885742,
+ "learning_rate": 3.225396825396826e-06,
+ "loss": 0.1109,
+ "step": 8242
+ },
+ {
+ "epoch": 47.10285714285714,
+ "grad_norm": 47.735015869140625,
+ "learning_rate": 3.2190476190476194e-06,
+ "loss": 0.1321,
+ "step": 8243
+ },
+ {
+ "epoch": 47.10857142857143,
+ "grad_norm": 1039.72509765625,
+ "learning_rate": 3.212698412698413e-06,
+ "loss": 0.1194,
+ "step": 8244
+ },
+ {
+ "epoch": 47.114285714285714,
+ "grad_norm": 51.46315383911133,
+ "learning_rate": 3.206349206349207e-06,
+ "loss": 0.1445,
+ "step": 8245
+ },
+ {
+ "epoch": 47.12,
+ "grad_norm": 20.40951919555664,
+ "learning_rate": 3.2000000000000003e-06,
+ "loss": 0.1499,
+ "step": 8246
+ },
+ {
+ "epoch": 47.12571428571429,
+ "grad_norm": 24.824369430541992,
+ "learning_rate": 3.1936507936507938e-06,
+ "loss": 0.1675,
+ "step": 8247
+ },
+ {
+ "epoch": 47.13142857142857,
+ "grad_norm": 37.45597839355469,
+ "learning_rate": 3.1873015873015872e-06,
+ "loss": 0.1664,
+ "step": 8248
+ },
+ {
+ "epoch": 47.137142857142855,
+ "grad_norm": 40.37081527709961,
+ "learning_rate": 3.180952380952381e-06,
+ "loss": 0.1506,
+ "step": 8249
+ },
+ {
+ "epoch": 47.142857142857146,
+ "grad_norm": 27.73122215270996,
+ "learning_rate": 3.1746031746031746e-06,
+ "loss": 0.1141,
+ "step": 8250
+ },
+ {
+ "epoch": 47.14857142857143,
+ "grad_norm": 48.40898132324219,
+ "learning_rate": 3.168253968253968e-06,
+ "loss": 0.0857,
+ "step": 8251
+ },
+ {
+ "epoch": 47.15428571428571,
+ "grad_norm": 77.0861587524414,
+ "learning_rate": 3.1619047619047616e-06,
+ "loss": 0.1449,
+ "step": 8252
+ },
+ {
+ "epoch": 47.16,
+ "grad_norm": 68.09453582763672,
+ "learning_rate": 3.155555555555556e-06,
+ "loss": 0.1245,
+ "step": 8253
+ },
+ {
+ "epoch": 47.16571428571429,
+ "grad_norm": 51.11342239379883,
+ "learning_rate": 3.14920634920635e-06,
+ "loss": 0.0918,
+ "step": 8254
+ },
+ {
+ "epoch": 47.17142857142857,
+ "grad_norm": 52.35264205932617,
+ "learning_rate": 3.1428571428571433e-06,
+ "loss": 0.1848,
+ "step": 8255
+ },
+ {
+ "epoch": 47.177142857142854,
+ "grad_norm": 25.312780380249023,
+ "learning_rate": 3.136507936507937e-06,
+ "loss": 0.0969,
+ "step": 8256
+ },
+ {
+ "epoch": 47.182857142857145,
+ "grad_norm": 55.124568939208984,
+ "learning_rate": 3.1301587301587303e-06,
+ "loss": 0.118,
+ "step": 8257
+ },
+ {
+ "epoch": 47.18857142857143,
+ "grad_norm": 237.57467651367188,
+ "learning_rate": 3.123809523809524e-06,
+ "loss": 0.1089,
+ "step": 8258
+ },
+ {
+ "epoch": 47.19428571428571,
+ "grad_norm": 38.97993087768555,
+ "learning_rate": 3.1174603174603177e-06,
+ "loss": 0.1345,
+ "step": 8259
+ },
+ {
+ "epoch": 47.2,
+ "grad_norm": 50.58306884765625,
+ "learning_rate": 3.111111111111111e-06,
+ "loss": 0.1569,
+ "step": 8260
+ },
+ {
+ "epoch": 47.205714285714286,
+ "grad_norm": 378.4385070800781,
+ "learning_rate": 3.1047619047619046e-06,
+ "loss": 0.1853,
+ "step": 8261
+ },
+ {
+ "epoch": 47.21142857142857,
+ "grad_norm": 55.626102447509766,
+ "learning_rate": 3.0984126984126985e-06,
+ "loss": 0.1259,
+ "step": 8262
+ },
+ {
+ "epoch": 47.21714285714286,
+ "grad_norm": 105.51905059814453,
+ "learning_rate": 3.0920634920634925e-06,
+ "loss": 0.1035,
+ "step": 8263
+ },
+ {
+ "epoch": 47.222857142857144,
+ "grad_norm": 44.22740936279297,
+ "learning_rate": 3.085714285714286e-06,
+ "loss": 0.119,
+ "step": 8264
+ },
+ {
+ "epoch": 47.22857142857143,
+ "grad_norm": 58.547607421875,
+ "learning_rate": 3.0793650793650794e-06,
+ "loss": 0.1681,
+ "step": 8265
+ },
+ {
+ "epoch": 47.23428571428571,
+ "grad_norm": 30.744277954101562,
+ "learning_rate": 3.073015873015873e-06,
+ "loss": 0.2352,
+ "step": 8266
+ },
+ {
+ "epoch": 47.24,
+ "grad_norm": 81.6842269897461,
+ "learning_rate": 3.066666666666667e-06,
+ "loss": 0.1274,
+ "step": 8267
+ },
+ {
+ "epoch": 47.245714285714286,
+ "grad_norm": 251.95037841796875,
+ "learning_rate": 3.0603174603174607e-06,
+ "loss": 0.1263,
+ "step": 8268
+ },
+ {
+ "epoch": 47.25142857142857,
+ "grad_norm": 24.623939514160156,
+ "learning_rate": 3.053968253968254e-06,
+ "loss": 0.1671,
+ "step": 8269
+ },
+ {
+ "epoch": 47.25714285714286,
+ "grad_norm": 36.25025177001953,
+ "learning_rate": 3.0476190476190477e-06,
+ "loss": 0.1635,
+ "step": 8270
+ },
+ {
+ "epoch": 47.26285714285714,
+ "grad_norm": 85.24287414550781,
+ "learning_rate": 3.041269841269841e-06,
+ "loss": 0.133,
+ "step": 8271
+ },
+ {
+ "epoch": 47.26857142857143,
+ "grad_norm": 27.511301040649414,
+ "learning_rate": 3.034920634920635e-06,
+ "loss": 0.1113,
+ "step": 8272
+ },
+ {
+ "epoch": 47.27428571428572,
+ "grad_norm": 19.489055633544922,
+ "learning_rate": 3.028571428571429e-06,
+ "loss": 0.0927,
+ "step": 8273
+ },
+ {
+ "epoch": 47.28,
+ "grad_norm": 37.98540115356445,
+ "learning_rate": 3.0222222222222225e-06,
+ "loss": 0.0935,
+ "step": 8274
+ },
+ {
+ "epoch": 47.285714285714285,
+ "grad_norm": 70.36798858642578,
+ "learning_rate": 3.015873015873016e-06,
+ "loss": 0.1276,
+ "step": 8275
+ },
+ {
+ "epoch": 47.29142857142857,
+ "grad_norm": 25.47026252746582,
+ "learning_rate": 3.00952380952381e-06,
+ "loss": 0.2125,
+ "step": 8276
+ },
+ {
+ "epoch": 47.29714285714286,
+ "grad_norm": 20.216646194458008,
+ "learning_rate": 3.0031746031746033e-06,
+ "loss": 0.1335,
+ "step": 8277
+ },
+ {
+ "epoch": 47.30285714285714,
+ "grad_norm": 46.408729553222656,
+ "learning_rate": 2.996825396825397e-06,
+ "loss": 0.1901,
+ "step": 8278
+ },
+ {
+ "epoch": 47.308571428571426,
+ "grad_norm": 33.401920318603516,
+ "learning_rate": 2.9904761904761907e-06,
+ "loss": 0.1619,
+ "step": 8279
+ },
+ {
+ "epoch": 47.31428571428572,
+ "grad_norm": 1567.3370361328125,
+ "learning_rate": 2.984126984126984e-06,
+ "loss": 0.3557,
+ "step": 8280
+ },
+ {
+ "epoch": 47.32,
+ "grad_norm": 52.10667037963867,
+ "learning_rate": 2.977777777777778e-06,
+ "loss": 0.1347,
+ "step": 8281
+ },
+ {
+ "epoch": 47.325714285714284,
+ "grad_norm": 37.514949798583984,
+ "learning_rate": 2.9714285714285716e-06,
+ "loss": 0.1396,
+ "step": 8282
+ },
+ {
+ "epoch": 47.331428571428575,
+ "grad_norm": 21.12006950378418,
+ "learning_rate": 2.965079365079365e-06,
+ "loss": 0.1,
+ "step": 8283
+ },
+ {
+ "epoch": 47.33714285714286,
+ "grad_norm": 70.54804992675781,
+ "learning_rate": 2.9587301587301586e-06,
+ "loss": 0.1464,
+ "step": 8284
+ },
+ {
+ "epoch": 47.34285714285714,
+ "grad_norm": 55.37355422973633,
+ "learning_rate": 2.9523809523809525e-06,
+ "loss": 0.0889,
+ "step": 8285
+ },
+ {
+ "epoch": 47.348571428571425,
+ "grad_norm": 41.585208892822266,
+ "learning_rate": 2.9460317460317464e-06,
+ "loss": 0.1637,
+ "step": 8286
+ },
+ {
+ "epoch": 47.354285714285716,
+ "grad_norm": 34.12653350830078,
+ "learning_rate": 2.93968253968254e-06,
+ "loss": 0.1423,
+ "step": 8287
+ },
+ {
+ "epoch": 47.36,
+ "grad_norm": 36.18317413330078,
+ "learning_rate": 2.9333333333333333e-06,
+ "loss": 0.1318,
+ "step": 8288
+ },
+ {
+ "epoch": 47.36571428571428,
+ "grad_norm": 43.84857940673828,
+ "learning_rate": 2.926984126984127e-06,
+ "loss": 0.1479,
+ "step": 8289
+ },
+ {
+ "epoch": 47.371428571428574,
+ "grad_norm": 67.99090576171875,
+ "learning_rate": 2.920634920634921e-06,
+ "loss": 0.1318,
+ "step": 8290
+ },
+ {
+ "epoch": 47.37714285714286,
+ "grad_norm": 13.11436939239502,
+ "learning_rate": 2.9142857142857146e-06,
+ "loss": 0.1368,
+ "step": 8291
+ },
+ {
+ "epoch": 47.38285714285714,
+ "grad_norm": 30.894506454467773,
+ "learning_rate": 2.907936507936508e-06,
+ "loss": 0.1138,
+ "step": 8292
+ },
+ {
+ "epoch": 47.38857142857143,
+ "grad_norm": 38.45000457763672,
+ "learning_rate": 2.9015873015873016e-06,
+ "loss": 0.1314,
+ "step": 8293
+ },
+ {
+ "epoch": 47.394285714285715,
+ "grad_norm": 17.25142478942871,
+ "learning_rate": 2.8952380952380955e-06,
+ "loss": 0.151,
+ "step": 8294
+ },
+ {
+ "epoch": 47.4,
+ "grad_norm": 40.75666427612305,
+ "learning_rate": 2.888888888888889e-06,
+ "loss": 0.1359,
+ "step": 8295
+ },
+ {
+ "epoch": 47.40571428571428,
+ "grad_norm": 49.49689865112305,
+ "learning_rate": 2.882539682539683e-06,
+ "loss": 0.1432,
+ "step": 8296
+ },
+ {
+ "epoch": 47.41142857142857,
+ "grad_norm": 39.747493743896484,
+ "learning_rate": 2.8761904761904764e-06,
+ "loss": 0.131,
+ "step": 8297
+ },
+ {
+ "epoch": 47.417142857142856,
+ "grad_norm": 11.636551856994629,
+ "learning_rate": 2.86984126984127e-06,
+ "loss": 0.1374,
+ "step": 8298
+ },
+ {
+ "epoch": 47.42285714285714,
+ "grad_norm": 46.46279525756836,
+ "learning_rate": 2.8634920634920638e-06,
+ "loss": 0.0989,
+ "step": 8299
+ },
+ {
+ "epoch": 47.42857142857143,
+ "grad_norm": 62.19584274291992,
+ "learning_rate": 2.8571428571428573e-06,
+ "loss": 0.1192,
+ "step": 8300
+ },
+ {
+ "epoch": 47.434285714285714,
+ "grad_norm": 37.39603805541992,
+ "learning_rate": 2.8507936507936507e-06,
+ "loss": 0.1281,
+ "step": 8301
+ },
+ {
+ "epoch": 47.44,
+ "grad_norm": 70.29913330078125,
+ "learning_rate": 2.8444444444444446e-06,
+ "loss": 0.1249,
+ "step": 8302
+ },
+ {
+ "epoch": 47.44571428571429,
+ "grad_norm": 39.924232482910156,
+ "learning_rate": 2.838095238095238e-06,
+ "loss": 0.1295,
+ "step": 8303
+ },
+ {
+ "epoch": 47.45142857142857,
+ "grad_norm": 22.518259048461914,
+ "learning_rate": 2.831746031746032e-06,
+ "loss": 0.1061,
+ "step": 8304
+ },
+ {
+ "epoch": 47.457142857142856,
+ "grad_norm": 49.80412673950195,
+ "learning_rate": 2.8253968253968255e-06,
+ "loss": 0.1511,
+ "step": 8305
+ },
+ {
+ "epoch": 47.462857142857146,
+ "grad_norm": 41.26884841918945,
+ "learning_rate": 2.819047619047619e-06,
+ "loss": 0.1306,
+ "step": 8306
+ },
+ {
+ "epoch": 47.46857142857143,
+ "grad_norm": 45.56854248046875,
+ "learning_rate": 2.8126984126984125e-06,
+ "loss": 0.1149,
+ "step": 8307
+ },
+ {
+ "epoch": 47.47428571428571,
+ "grad_norm": 55.0831298828125,
+ "learning_rate": 2.806349206349207e-06,
+ "loss": 0.0992,
+ "step": 8308
+ },
+ {
+ "epoch": 47.48,
+ "grad_norm": 23.75543975830078,
+ "learning_rate": 2.8000000000000003e-06,
+ "loss": 0.1156,
+ "step": 8309
+ },
+ {
+ "epoch": 47.48571428571429,
+ "grad_norm": 22.74629783630371,
+ "learning_rate": 2.7936507936507938e-06,
+ "loss": 0.1275,
+ "step": 8310
+ },
+ {
+ "epoch": 47.49142857142857,
+ "grad_norm": 56.682891845703125,
+ "learning_rate": 2.7873015873015873e-06,
+ "loss": 0.1803,
+ "step": 8311
+ },
+ {
+ "epoch": 47.497142857142855,
+ "grad_norm": 25.42420196533203,
+ "learning_rate": 2.780952380952381e-06,
+ "loss": 0.2765,
+ "step": 8312
+ },
+ {
+ "epoch": 47.502857142857145,
+ "grad_norm": 48.49197769165039,
+ "learning_rate": 2.774603174603175e-06,
+ "loss": 0.1611,
+ "step": 8313
+ },
+ {
+ "epoch": 47.50857142857143,
+ "grad_norm": 31.067285537719727,
+ "learning_rate": 2.7682539682539686e-06,
+ "loss": 0.1706,
+ "step": 8314
+ },
+ {
+ "epoch": 47.51428571428571,
+ "grad_norm": 47.51041030883789,
+ "learning_rate": 2.761904761904762e-06,
+ "loss": 0.1307,
+ "step": 8315
+ },
+ {
+ "epoch": 47.52,
+ "grad_norm": 40.67095184326172,
+ "learning_rate": 2.7555555555555555e-06,
+ "loss": 0.0961,
+ "step": 8316
+ },
+ {
+ "epoch": 47.52571428571429,
+ "grad_norm": 146.58514404296875,
+ "learning_rate": 2.7492063492063494e-06,
+ "loss": 0.1653,
+ "step": 8317
+ },
+ {
+ "epoch": 47.53142857142857,
+ "grad_norm": 80.32205200195312,
+ "learning_rate": 2.742857142857143e-06,
+ "loss": 0.1155,
+ "step": 8318
+ },
+ {
+ "epoch": 47.537142857142854,
+ "grad_norm": 47.289634704589844,
+ "learning_rate": 2.736507936507937e-06,
+ "loss": 0.1319,
+ "step": 8319
+ },
+ {
+ "epoch": 47.542857142857144,
+ "grad_norm": 23.88016700744629,
+ "learning_rate": 2.7301587301587303e-06,
+ "loss": 0.1174,
+ "step": 8320
+ },
+ {
+ "epoch": 47.54857142857143,
+ "grad_norm": 50.47907638549805,
+ "learning_rate": 2.7238095238095238e-06,
+ "loss": 0.1272,
+ "step": 8321
+ },
+ {
+ "epoch": 47.55428571428571,
+ "grad_norm": 45.27683639526367,
+ "learning_rate": 2.7174603174603177e-06,
+ "loss": 0.1088,
+ "step": 8322
+ },
+ {
+ "epoch": 47.56,
+ "grad_norm": 23.826622009277344,
+ "learning_rate": 2.711111111111111e-06,
+ "loss": 0.1979,
+ "step": 8323
+ },
+ {
+ "epoch": 47.565714285714286,
+ "grad_norm": 30.396648406982422,
+ "learning_rate": 2.7047619047619047e-06,
+ "loss": 0.1101,
+ "step": 8324
+ },
+ {
+ "epoch": 47.57142857142857,
+ "grad_norm": 35.595489501953125,
+ "learning_rate": 2.6984126984126986e-06,
+ "loss": 0.1466,
+ "step": 8325
+ },
+ {
+ "epoch": 47.57714285714286,
+ "grad_norm": 40.22230911254883,
+ "learning_rate": 2.692063492063492e-06,
+ "loss": 0.1931,
+ "step": 8326
+ },
+ {
+ "epoch": 47.582857142857144,
+ "grad_norm": 161.3650360107422,
+ "learning_rate": 2.685714285714286e-06,
+ "loss": 0.133,
+ "step": 8327
+ },
+ {
+ "epoch": 47.58857142857143,
+ "grad_norm": 35.126338958740234,
+ "learning_rate": 2.6793650793650794e-06,
+ "loss": 0.127,
+ "step": 8328
+ },
+ {
+ "epoch": 47.59428571428572,
+ "grad_norm": 34.326942443847656,
+ "learning_rate": 2.673015873015873e-06,
+ "loss": 0.1369,
+ "step": 8329
+ },
+ {
+ "epoch": 47.6,
+ "grad_norm": 59.76482009887695,
+ "learning_rate": 2.666666666666667e-06,
+ "loss": 0.1403,
+ "step": 8330
+ },
+ {
+ "epoch": 47.605714285714285,
+ "grad_norm": 42.53165817260742,
+ "learning_rate": 2.6603174603174607e-06,
+ "loss": 0.1916,
+ "step": 8331
+ },
+ {
+ "epoch": 47.61142857142857,
+ "grad_norm": 68.71272277832031,
+ "learning_rate": 2.6539682539682542e-06,
+ "loss": 0.1097,
+ "step": 8332
+ },
+ {
+ "epoch": 47.61714285714286,
+ "grad_norm": 35.917396545410156,
+ "learning_rate": 2.6476190476190477e-06,
+ "loss": 0.0865,
+ "step": 8333
+ },
+ {
+ "epoch": 47.62285714285714,
+ "grad_norm": 25.682573318481445,
+ "learning_rate": 2.641269841269841e-06,
+ "loss": 0.1399,
+ "step": 8334
+ },
+ {
+ "epoch": 47.628571428571426,
+ "grad_norm": 43.88801574707031,
+ "learning_rate": 2.634920634920635e-06,
+ "loss": 0.1751,
+ "step": 8335
+ },
+ {
+ "epoch": 47.63428571428572,
+ "grad_norm": 27.394786834716797,
+ "learning_rate": 2.628571428571429e-06,
+ "loss": 0.1477,
+ "step": 8336
+ },
+ {
+ "epoch": 47.64,
+ "grad_norm": 95.20401763916016,
+ "learning_rate": 2.6222222222222225e-06,
+ "loss": 0.1482,
+ "step": 8337
+ },
+ {
+ "epoch": 47.645714285714284,
+ "grad_norm": 29.056629180908203,
+ "learning_rate": 2.615873015873016e-06,
+ "loss": 0.1455,
+ "step": 8338
+ },
+ {
+ "epoch": 47.651428571428575,
+ "grad_norm": 42.398990631103516,
+ "learning_rate": 2.6095238095238094e-06,
+ "loss": 0.1229,
+ "step": 8339
+ },
+ {
+ "epoch": 47.65714285714286,
+ "grad_norm": 44.73271179199219,
+ "learning_rate": 2.6031746031746034e-06,
+ "loss": 0.1161,
+ "step": 8340
+ },
+ {
+ "epoch": 47.66285714285714,
+ "grad_norm": 46.483375549316406,
+ "learning_rate": 2.596825396825397e-06,
+ "loss": 0.1542,
+ "step": 8341
+ },
+ {
+ "epoch": 47.668571428571425,
+ "grad_norm": 54.983882904052734,
+ "learning_rate": 2.5904761904761907e-06,
+ "loss": 0.1515,
+ "step": 8342
+ },
+ {
+ "epoch": 47.674285714285716,
+ "grad_norm": 111.94491577148438,
+ "learning_rate": 2.5841269841269842e-06,
+ "loss": 0.1714,
+ "step": 8343
+ },
+ {
+ "epoch": 47.68,
+ "grad_norm": 20.73655128479004,
+ "learning_rate": 2.5777777777777777e-06,
+ "loss": 0.1511,
+ "step": 8344
+ },
+ {
+ "epoch": 47.68571428571428,
+ "grad_norm": 78.76811218261719,
+ "learning_rate": 2.5714285714285716e-06,
+ "loss": 0.126,
+ "step": 8345
+ },
+ {
+ "epoch": 47.691428571428574,
+ "grad_norm": 59.38375473022461,
+ "learning_rate": 2.565079365079365e-06,
+ "loss": 0.1051,
+ "step": 8346
+ },
+ {
+ "epoch": 47.69714285714286,
+ "grad_norm": 39.290348052978516,
+ "learning_rate": 2.5587301587301586e-06,
+ "loss": 0.117,
+ "step": 8347
+ },
+ {
+ "epoch": 47.70285714285714,
+ "grad_norm": 22.81290054321289,
+ "learning_rate": 2.5523809523809525e-06,
+ "loss": 0.1429,
+ "step": 8348
+ },
+ {
+ "epoch": 47.70857142857143,
+ "grad_norm": 88.15597534179688,
+ "learning_rate": 2.5460317460317464e-06,
+ "loss": 0.0944,
+ "step": 8349
+ },
+ {
+ "epoch": 47.714285714285715,
+ "grad_norm": 369.8224792480469,
+ "learning_rate": 2.53968253968254e-06,
+ "loss": 0.1044,
+ "step": 8350
+ },
+ {
+ "epoch": 47.72,
+ "grad_norm": 48.856605529785156,
+ "learning_rate": 2.5333333333333334e-06,
+ "loss": 0.1982,
+ "step": 8351
+ },
+ {
+ "epoch": 47.72571428571428,
+ "grad_norm": 25.321481704711914,
+ "learning_rate": 2.526984126984127e-06,
+ "loss": 0.1451,
+ "step": 8352
+ },
+ {
+ "epoch": 47.73142857142857,
+ "grad_norm": 77.55184173583984,
+ "learning_rate": 2.5206349206349207e-06,
+ "loss": 0.1326,
+ "step": 8353
+ },
+ {
+ "epoch": 47.73714285714286,
+ "grad_norm": 27.76304817199707,
+ "learning_rate": 2.5142857142857147e-06,
+ "loss": 0.1235,
+ "step": 8354
+ },
+ {
+ "epoch": 47.74285714285714,
+ "grad_norm": 391.8480529785156,
+ "learning_rate": 2.507936507936508e-06,
+ "loss": 0.1613,
+ "step": 8355
+ },
+ {
+ "epoch": 47.74857142857143,
+ "grad_norm": 26.454538345336914,
+ "learning_rate": 2.5015873015873016e-06,
+ "loss": 0.1165,
+ "step": 8356
+ },
+ {
+ "epoch": 47.754285714285714,
+ "grad_norm": 26.777204513549805,
+ "learning_rate": 2.495238095238095e-06,
+ "loss": 0.1185,
+ "step": 8357
+ },
+ {
+ "epoch": 47.76,
+ "grad_norm": 599.7885131835938,
+ "learning_rate": 2.488888888888889e-06,
+ "loss": 0.1889,
+ "step": 8358
+ },
+ {
+ "epoch": 47.76571428571429,
+ "grad_norm": 382.1898193359375,
+ "learning_rate": 2.482539682539683e-06,
+ "loss": 0.1575,
+ "step": 8359
+ },
+ {
+ "epoch": 47.77142857142857,
+ "grad_norm": 57.55063247680664,
+ "learning_rate": 2.4761904761904764e-06,
+ "loss": 0.1624,
+ "step": 8360
+ },
+ {
+ "epoch": 47.777142857142856,
+ "grad_norm": 24.63785743713379,
+ "learning_rate": 2.46984126984127e-06,
+ "loss": 0.1153,
+ "step": 8361
+ },
+ {
+ "epoch": 47.78285714285714,
+ "grad_norm": 36.32056427001953,
+ "learning_rate": 2.4634920634920634e-06,
+ "loss": 0.1473,
+ "step": 8362
+ },
+ {
+ "epoch": 47.78857142857143,
+ "grad_norm": 65.92711639404297,
+ "learning_rate": 2.4571428571428573e-06,
+ "loss": 0.1222,
+ "step": 8363
+ },
+ {
+ "epoch": 47.794285714285714,
+ "grad_norm": 34.27105712890625,
+ "learning_rate": 2.4507936507936508e-06,
+ "loss": 0.1124,
+ "step": 8364
+ },
+ {
+ "epoch": 47.8,
+ "grad_norm": 27.015478134155273,
+ "learning_rate": 2.4444444444444447e-06,
+ "loss": 0.1552,
+ "step": 8365
+ },
+ {
+ "epoch": 47.80571428571429,
+ "grad_norm": 33.48320770263672,
+ "learning_rate": 2.438095238095238e-06,
+ "loss": 0.1325,
+ "step": 8366
+ },
+ {
+ "epoch": 47.81142857142857,
+ "grad_norm": 29.862972259521484,
+ "learning_rate": 2.431746031746032e-06,
+ "loss": 0.1393,
+ "step": 8367
+ },
+ {
+ "epoch": 47.817142857142855,
+ "grad_norm": 63.37629699707031,
+ "learning_rate": 2.4253968253968255e-06,
+ "loss": 0.1837,
+ "step": 8368
+ },
+ {
+ "epoch": 47.822857142857146,
+ "grad_norm": 42.46182632446289,
+ "learning_rate": 2.419047619047619e-06,
+ "loss": 0.1581,
+ "step": 8369
+ },
+ {
+ "epoch": 47.82857142857143,
+ "grad_norm": 68.42111206054688,
+ "learning_rate": 2.412698412698413e-06,
+ "loss": 0.1119,
+ "step": 8370
+ },
+ {
+ "epoch": 47.83428571428571,
+ "grad_norm": 38.69887924194336,
+ "learning_rate": 2.4063492063492064e-06,
+ "loss": 0.1239,
+ "step": 8371
+ },
+ {
+ "epoch": 47.84,
+ "grad_norm": 19.0052490234375,
+ "learning_rate": 2.4000000000000003e-06,
+ "loss": 0.1433,
+ "step": 8372
+ },
+ {
+ "epoch": 47.84571428571429,
+ "grad_norm": 56.69364929199219,
+ "learning_rate": 2.393650793650794e-06,
+ "loss": 0.5268,
+ "step": 8373
+ },
+ {
+ "epoch": 47.85142857142857,
+ "grad_norm": 104.00349426269531,
+ "learning_rate": 2.3873015873015873e-06,
+ "loss": 0.0927,
+ "step": 8374
+ },
+ {
+ "epoch": 47.857142857142854,
+ "grad_norm": 27.823810577392578,
+ "learning_rate": 2.3809523809523808e-06,
+ "loss": 0.1616,
+ "step": 8375
+ },
+ {
+ "epoch": 47.862857142857145,
+ "grad_norm": 34.778770446777344,
+ "learning_rate": 2.3746031746031747e-06,
+ "loss": 0.1227,
+ "step": 8376
+ },
+ {
+ "epoch": 47.86857142857143,
+ "grad_norm": 278.9198913574219,
+ "learning_rate": 2.3682539682539686e-06,
+ "loss": 0.1321,
+ "step": 8377
+ },
+ {
+ "epoch": 47.87428571428571,
+ "grad_norm": 61.145484924316406,
+ "learning_rate": 2.361904761904762e-06,
+ "loss": 0.1147,
+ "step": 8378
+ },
+ {
+ "epoch": 47.88,
+ "grad_norm": 30.837554931640625,
+ "learning_rate": 2.3555555555555555e-06,
+ "loss": 0.1414,
+ "step": 8379
+ },
+ {
+ "epoch": 47.885714285714286,
+ "grad_norm": 78.45512390136719,
+ "learning_rate": 2.349206349206349e-06,
+ "loss": 0.1562,
+ "step": 8380
+ },
+ {
+ "epoch": 47.89142857142857,
+ "grad_norm": 56.4572639465332,
+ "learning_rate": 2.342857142857143e-06,
+ "loss": 0.1366,
+ "step": 8381
+ },
+ {
+ "epoch": 47.89714285714286,
+ "grad_norm": 30.032258987426758,
+ "learning_rate": 2.336507936507937e-06,
+ "loss": 0.1084,
+ "step": 8382
+ },
+ {
+ "epoch": 47.902857142857144,
+ "grad_norm": 70.42892456054688,
+ "learning_rate": 2.3301587301587303e-06,
+ "loss": 0.1693,
+ "step": 8383
+ },
+ {
+ "epoch": 47.90857142857143,
+ "grad_norm": 37.96274185180664,
+ "learning_rate": 2.323809523809524e-06,
+ "loss": 0.1191,
+ "step": 8384
+ },
+ {
+ "epoch": 47.91428571428571,
+ "grad_norm": 25.497051239013672,
+ "learning_rate": 2.3174603174603177e-06,
+ "loss": 0.1744,
+ "step": 8385
+ },
+ {
+ "epoch": 47.92,
+ "grad_norm": 42.66016387939453,
+ "learning_rate": 2.311111111111111e-06,
+ "loss": 0.1295,
+ "step": 8386
+ },
+ {
+ "epoch": 47.925714285714285,
+ "grad_norm": 25.076026916503906,
+ "learning_rate": 2.3047619047619047e-06,
+ "loss": 0.1547,
+ "step": 8387
+ },
+ {
+ "epoch": 47.93142857142857,
+ "grad_norm": 30.716888427734375,
+ "learning_rate": 2.2984126984126986e-06,
+ "loss": 0.1679,
+ "step": 8388
+ },
+ {
+ "epoch": 47.93714285714286,
+ "grad_norm": 38.1251106262207,
+ "learning_rate": 2.292063492063492e-06,
+ "loss": 0.2028,
+ "step": 8389
+ },
+ {
+ "epoch": 47.94285714285714,
+ "grad_norm": 42.802391052246094,
+ "learning_rate": 2.285714285714286e-06,
+ "loss": 0.1305,
+ "step": 8390
+ },
+ {
+ "epoch": 47.94857142857143,
+ "grad_norm": 104.67507934570312,
+ "learning_rate": 2.2793650793650795e-06,
+ "loss": 0.1812,
+ "step": 8391
+ },
+ {
+ "epoch": 47.95428571428572,
+ "grad_norm": 25.324993133544922,
+ "learning_rate": 2.273015873015873e-06,
+ "loss": 0.1411,
+ "step": 8392
+ },
+ {
+ "epoch": 47.96,
+ "grad_norm": 37.27313995361328,
+ "learning_rate": 2.266666666666667e-06,
+ "loss": 0.0953,
+ "step": 8393
+ },
+ {
+ "epoch": 47.965714285714284,
+ "grad_norm": 36.83076477050781,
+ "learning_rate": 2.2603174603174603e-06,
+ "loss": 0.1841,
+ "step": 8394
+ },
+ {
+ "epoch": 47.97142857142857,
+ "grad_norm": 67.00370025634766,
+ "learning_rate": 2.2539682539682542e-06,
+ "loss": 0.2445,
+ "step": 8395
+ },
+ {
+ "epoch": 47.97714285714286,
+ "grad_norm": 32.93381881713867,
+ "learning_rate": 2.2476190476190477e-06,
+ "loss": 0.1419,
+ "step": 8396
+ },
+ {
+ "epoch": 47.98285714285714,
+ "grad_norm": 82.33846282958984,
+ "learning_rate": 2.241269841269841e-06,
+ "loss": 0.1457,
+ "step": 8397
+ },
+ {
+ "epoch": 47.988571428571426,
+ "grad_norm": 40.23613739013672,
+ "learning_rate": 2.2349206349206347e-06,
+ "loss": 0.1066,
+ "step": 8398
+ },
+ {
+ "epoch": 47.994285714285716,
+ "grad_norm": 121.78873443603516,
+ "learning_rate": 2.228571428571429e-06,
+ "loss": 0.1353,
+ "step": 8399
+ },
+ {
+ "epoch": 48.0,
+ "grad_norm": 13.992203712463379,
+ "learning_rate": 2.2222222222222225e-06,
+ "loss": 0.1188,
+ "step": 8400
+ },
+ {
+ "epoch": 48.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5805517435073853,
+ "eval_map": 0.9383,
+ "eval_map_50": 0.9725,
+ "eval_map_75": 0.9652,
+ "eval_map_large": 0.9383,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9383,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7952,
+ "eval_mar_10": 0.9727,
+ "eval_mar_100": 0.9743,
+ "eval_mar_100_per_class": 0.9743,
+ "eval_mar_large": 0.9743,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 12.762,
+ "eval_samples_per_second": 23.037,
+ "eval_steps_per_second": 2.899,
+ "step": 8400
+ },
+ {
+ "epoch": 48.005714285714284,
+ "grad_norm": 34.68983840942383,
+ "learning_rate": 2.215873015873016e-06,
+ "loss": 0.0969,
+ "step": 8401
+ },
+ {
+ "epoch": 48.011428571428574,
+ "grad_norm": 44.068450927734375,
+ "learning_rate": 2.2095238095238095e-06,
+ "loss": 0.1547,
+ "step": 8402
+ },
+ {
+ "epoch": 48.01714285714286,
+ "grad_norm": 68.57250213623047,
+ "learning_rate": 2.2031746031746034e-06,
+ "loss": 0.1059,
+ "step": 8403
+ },
+ {
+ "epoch": 48.02285714285714,
+ "grad_norm": 53.91074752807617,
+ "learning_rate": 2.196825396825397e-06,
+ "loss": 0.1564,
+ "step": 8404
+ },
+ {
+ "epoch": 48.02857142857143,
+ "grad_norm": 17.755817413330078,
+ "learning_rate": 2.1904761904761908e-06,
+ "loss": 0.107,
+ "step": 8405
+ },
+ {
+ "epoch": 48.034285714285716,
+ "grad_norm": 42.74783706665039,
+ "learning_rate": 2.1841269841269842e-06,
+ "loss": 0.1607,
+ "step": 8406
+ },
+ {
+ "epoch": 48.04,
+ "grad_norm": 51.61184310913086,
+ "learning_rate": 2.1777777777777777e-06,
+ "loss": 0.0835,
+ "step": 8407
+ },
+ {
+ "epoch": 48.04571428571428,
+ "grad_norm": 664.2689208984375,
+ "learning_rate": 2.1714285714285716e-06,
+ "loss": 0.1338,
+ "step": 8408
+ },
+ {
+ "epoch": 48.05142857142857,
+ "grad_norm": 50.177127838134766,
+ "learning_rate": 2.165079365079365e-06,
+ "loss": 0.1307,
+ "step": 8409
+ },
+ {
+ "epoch": 48.05714285714286,
+ "grad_norm": 56.43240737915039,
+ "learning_rate": 2.158730158730159e-06,
+ "loss": 0.1712,
+ "step": 8410
+ },
+ {
+ "epoch": 48.06285714285714,
+ "grad_norm": 68.9998550415039,
+ "learning_rate": 2.1523809523809525e-06,
+ "loss": 0.0927,
+ "step": 8411
+ },
+ {
+ "epoch": 48.06857142857143,
+ "grad_norm": 39.705413818359375,
+ "learning_rate": 2.146031746031746e-06,
+ "loss": 0.1695,
+ "step": 8412
+ },
+ {
+ "epoch": 48.074285714285715,
+ "grad_norm": 52.13355255126953,
+ "learning_rate": 2.13968253968254e-06,
+ "loss": 0.1622,
+ "step": 8413
+ },
+ {
+ "epoch": 48.08,
+ "grad_norm": 33.74021530151367,
+ "learning_rate": 2.1333333333333334e-06,
+ "loss": 0.1119,
+ "step": 8414
+ },
+ {
+ "epoch": 48.08571428571429,
+ "grad_norm": 57.10844802856445,
+ "learning_rate": 2.126984126984127e-06,
+ "loss": 0.1016,
+ "step": 8415
+ },
+ {
+ "epoch": 48.09142857142857,
+ "grad_norm": 80.46121978759766,
+ "learning_rate": 2.1206349206349208e-06,
+ "loss": 0.1084,
+ "step": 8416
+ },
+ {
+ "epoch": 48.097142857142856,
+ "grad_norm": 57.60420227050781,
+ "learning_rate": 2.1142857142857147e-06,
+ "loss": 0.1187,
+ "step": 8417
+ },
+ {
+ "epoch": 48.10285714285714,
+ "grad_norm": 18.46352195739746,
+ "learning_rate": 2.107936507936508e-06,
+ "loss": 0.1117,
+ "step": 8418
+ },
+ {
+ "epoch": 48.10857142857143,
+ "grad_norm": 63.23112869262695,
+ "learning_rate": 2.1015873015873016e-06,
+ "loss": 0.0882,
+ "step": 8419
+ },
+ {
+ "epoch": 48.114285714285714,
+ "grad_norm": 41.47560501098633,
+ "learning_rate": 2.095238095238095e-06,
+ "loss": 0.4097,
+ "step": 8420
+ },
+ {
+ "epoch": 48.12,
+ "grad_norm": 58.211090087890625,
+ "learning_rate": 2.088888888888889e-06,
+ "loss": 0.1006,
+ "step": 8421
+ },
+ {
+ "epoch": 48.12571428571429,
+ "grad_norm": 33.181087493896484,
+ "learning_rate": 2.082539682539683e-06,
+ "loss": 0.1426,
+ "step": 8422
+ },
+ {
+ "epoch": 48.13142857142857,
+ "grad_norm": 38.934871673583984,
+ "learning_rate": 2.0761904761904764e-06,
+ "loss": 0.1232,
+ "step": 8423
+ },
+ {
+ "epoch": 48.137142857142855,
+ "grad_norm": 183.18692016601562,
+ "learning_rate": 2.06984126984127e-06,
+ "loss": 0.1435,
+ "step": 8424
+ },
+ {
+ "epoch": 48.142857142857146,
+ "grad_norm": 52.49740219116211,
+ "learning_rate": 2.0634920634920634e-06,
+ "loss": 0.0913,
+ "step": 8425
+ },
+ {
+ "epoch": 48.14857142857143,
+ "grad_norm": 44.68959045410156,
+ "learning_rate": 2.0571428571428573e-06,
+ "loss": 0.1456,
+ "step": 8426
+ },
+ {
+ "epoch": 48.15428571428571,
+ "grad_norm": 72.08333587646484,
+ "learning_rate": 2.0507936507936508e-06,
+ "loss": 0.2143,
+ "step": 8427
+ },
+ {
+ "epoch": 48.16,
+ "grad_norm": 40.389060974121094,
+ "learning_rate": 2.0444444444444447e-06,
+ "loss": 0.1633,
+ "step": 8428
+ },
+ {
+ "epoch": 48.16571428571429,
+ "grad_norm": 70.50105285644531,
+ "learning_rate": 2.038095238095238e-06,
+ "loss": 0.1382,
+ "step": 8429
+ },
+ {
+ "epoch": 48.17142857142857,
+ "grad_norm": 45.87871551513672,
+ "learning_rate": 2.0317460317460316e-06,
+ "loss": 0.1231,
+ "step": 8430
+ },
+ {
+ "epoch": 48.177142857142854,
+ "grad_norm": 24.484031677246094,
+ "learning_rate": 2.0253968253968256e-06,
+ "loss": 0.1068,
+ "step": 8431
+ },
+ {
+ "epoch": 48.182857142857145,
+ "grad_norm": 39.662105560302734,
+ "learning_rate": 2.019047619047619e-06,
+ "loss": 0.1016,
+ "step": 8432
+ },
+ {
+ "epoch": 48.18857142857143,
+ "grad_norm": 71.07365417480469,
+ "learning_rate": 2.012698412698413e-06,
+ "loss": 0.1826,
+ "step": 8433
+ },
+ {
+ "epoch": 48.19428571428571,
+ "grad_norm": 48.34844970703125,
+ "learning_rate": 2.0063492063492064e-06,
+ "loss": 0.1473,
+ "step": 8434
+ },
+ {
+ "epoch": 48.2,
+ "grad_norm": 54.211822509765625,
+ "learning_rate": 2.0000000000000003e-06,
+ "loss": 0.1128,
+ "step": 8435
+ },
+ {
+ "epoch": 48.205714285714286,
+ "grad_norm": 75.8840103149414,
+ "learning_rate": 1.993650793650794e-06,
+ "loss": 0.142,
+ "step": 8436
+ },
+ {
+ "epoch": 48.21142857142857,
+ "grad_norm": 21.041221618652344,
+ "learning_rate": 1.9873015873015873e-06,
+ "loss": 0.1227,
+ "step": 8437
+ },
+ {
+ "epoch": 48.21714285714286,
+ "grad_norm": 29.07630157470703,
+ "learning_rate": 1.9809523809523808e-06,
+ "loss": 0.185,
+ "step": 8438
+ },
+ {
+ "epoch": 48.222857142857144,
+ "grad_norm": 74.97119903564453,
+ "learning_rate": 1.9746031746031747e-06,
+ "loss": 0.1063,
+ "step": 8439
+ },
+ {
+ "epoch": 48.22857142857143,
+ "grad_norm": 32.257911682128906,
+ "learning_rate": 1.9682539682539686e-06,
+ "loss": 0.1198,
+ "step": 8440
+ },
+ {
+ "epoch": 48.23428571428571,
+ "grad_norm": 32.84730529785156,
+ "learning_rate": 1.961904761904762e-06,
+ "loss": 0.1552,
+ "step": 8441
+ },
+ {
+ "epoch": 48.24,
+ "grad_norm": 25.6329345703125,
+ "learning_rate": 1.9555555555555556e-06,
+ "loss": 0.2221,
+ "step": 8442
+ },
+ {
+ "epoch": 48.245714285714286,
+ "grad_norm": 43.75717544555664,
+ "learning_rate": 1.949206349206349e-06,
+ "loss": 0.1057,
+ "step": 8443
+ },
+ {
+ "epoch": 48.25142857142857,
+ "grad_norm": 105.43291473388672,
+ "learning_rate": 1.942857142857143e-06,
+ "loss": 0.1025,
+ "step": 8444
+ },
+ {
+ "epoch": 48.25714285714286,
+ "grad_norm": 51.59490203857422,
+ "learning_rate": 1.936507936507937e-06,
+ "loss": 0.1082,
+ "step": 8445
+ },
+ {
+ "epoch": 48.26285714285714,
+ "grad_norm": 24.162395477294922,
+ "learning_rate": 1.9301587301587303e-06,
+ "loss": 0.1386,
+ "step": 8446
+ },
+ {
+ "epoch": 48.26857142857143,
+ "grad_norm": 55.5849609375,
+ "learning_rate": 1.923809523809524e-06,
+ "loss": 0.2192,
+ "step": 8447
+ },
+ {
+ "epoch": 48.27428571428572,
+ "grad_norm": 107.56834411621094,
+ "learning_rate": 1.9174603174603173e-06,
+ "loss": 0.1601,
+ "step": 8448
+ },
+ {
+ "epoch": 48.28,
+ "grad_norm": 28.928194046020508,
+ "learning_rate": 1.9111111111111112e-06,
+ "loss": 0.1285,
+ "step": 8449
+ },
+ {
+ "epoch": 48.285714285714285,
+ "grad_norm": 19.675289154052734,
+ "learning_rate": 1.9047619047619051e-06,
+ "loss": 0.1165,
+ "step": 8450
+ },
+ {
+ "epoch": 48.29142857142857,
+ "grad_norm": 38.08034133911133,
+ "learning_rate": 1.8984126984126986e-06,
+ "loss": 0.1197,
+ "step": 8451
+ },
+ {
+ "epoch": 48.29714285714286,
+ "grad_norm": 31.34748077392578,
+ "learning_rate": 1.8920634920634923e-06,
+ "loss": 0.1073,
+ "step": 8452
+ },
+ {
+ "epoch": 48.30285714285714,
+ "grad_norm": 77.29824829101562,
+ "learning_rate": 1.8857142857142858e-06,
+ "loss": 0.1481,
+ "step": 8453
+ },
+ {
+ "epoch": 48.308571428571426,
+ "grad_norm": 22.881072998046875,
+ "learning_rate": 1.8793650793650795e-06,
+ "loss": 0.1077,
+ "step": 8454
+ },
+ {
+ "epoch": 48.31428571428572,
+ "grad_norm": 16.868284225463867,
+ "learning_rate": 1.873015873015873e-06,
+ "loss": 0.1626,
+ "step": 8455
+ },
+ {
+ "epoch": 48.32,
+ "grad_norm": 38.000587463378906,
+ "learning_rate": 1.8666666666666669e-06,
+ "loss": 0.1888,
+ "step": 8456
+ },
+ {
+ "epoch": 48.325714285714284,
+ "grad_norm": 54.66694641113281,
+ "learning_rate": 1.8603174603174606e-06,
+ "loss": 0.1385,
+ "step": 8457
+ },
+ {
+ "epoch": 48.331428571428575,
+ "grad_norm": 41.57939910888672,
+ "learning_rate": 1.853968253968254e-06,
+ "loss": 0.1119,
+ "step": 8458
+ },
+ {
+ "epoch": 48.33714285714286,
+ "grad_norm": 26.62154769897461,
+ "learning_rate": 1.8476190476190477e-06,
+ "loss": 0.0939,
+ "step": 8459
+ },
+ {
+ "epoch": 48.34285714285714,
+ "grad_norm": 57.56425094604492,
+ "learning_rate": 1.8412698412698412e-06,
+ "loss": 0.1442,
+ "step": 8460
+ },
+ {
+ "epoch": 48.348571428571425,
+ "grad_norm": 95.6938705444336,
+ "learning_rate": 1.834920634920635e-06,
+ "loss": 0.1888,
+ "step": 8461
+ },
+ {
+ "epoch": 48.354285714285716,
+ "grad_norm": 22.02704620361328,
+ "learning_rate": 1.8285714285714288e-06,
+ "loss": 0.1038,
+ "step": 8462
+ },
+ {
+ "epoch": 48.36,
+ "grad_norm": 28.582277297973633,
+ "learning_rate": 1.8222222222222223e-06,
+ "loss": 0.1591,
+ "step": 8463
+ },
+ {
+ "epoch": 48.36571428571428,
+ "grad_norm": 36.19487762451172,
+ "learning_rate": 1.815873015873016e-06,
+ "loss": 0.1642,
+ "step": 8464
+ },
+ {
+ "epoch": 48.371428571428574,
+ "grad_norm": 46.585838317871094,
+ "learning_rate": 1.8095238095238095e-06,
+ "loss": 0.1996,
+ "step": 8465
+ },
+ {
+ "epoch": 48.37714285714286,
+ "grad_norm": 26.59910774230957,
+ "learning_rate": 1.8031746031746032e-06,
+ "loss": 0.1811,
+ "step": 8466
+ },
+ {
+ "epoch": 48.38285714285714,
+ "grad_norm": 17.714712142944336,
+ "learning_rate": 1.7968253968253967e-06,
+ "loss": 0.1112,
+ "step": 8467
+ },
+ {
+ "epoch": 48.38857142857143,
+ "grad_norm": 54.50025939941406,
+ "learning_rate": 1.7904761904761908e-06,
+ "loss": 0.0952,
+ "step": 8468
+ },
+ {
+ "epoch": 48.394285714285715,
+ "grad_norm": 32.499568939208984,
+ "learning_rate": 1.7841269841269843e-06,
+ "loss": 0.1282,
+ "step": 8469
+ },
+ {
+ "epoch": 48.4,
+ "grad_norm": 35.08049774169922,
+ "learning_rate": 1.777777777777778e-06,
+ "loss": 0.1066,
+ "step": 8470
+ },
+ {
+ "epoch": 48.40571428571428,
+ "grad_norm": 36.948974609375,
+ "learning_rate": 1.7714285714285714e-06,
+ "loss": 0.1615,
+ "step": 8471
+ },
+ {
+ "epoch": 48.41142857142857,
+ "grad_norm": 29.219314575195312,
+ "learning_rate": 1.7650793650793651e-06,
+ "loss": 0.1151,
+ "step": 8472
+ },
+ {
+ "epoch": 48.417142857142856,
+ "grad_norm": 51.52848434448242,
+ "learning_rate": 1.758730158730159e-06,
+ "loss": 0.1129,
+ "step": 8473
+ },
+ {
+ "epoch": 48.42285714285714,
+ "grad_norm": 66.25816345214844,
+ "learning_rate": 1.7523809523809525e-06,
+ "loss": 0.1026,
+ "step": 8474
+ },
+ {
+ "epoch": 48.42857142857143,
+ "grad_norm": 52.06458282470703,
+ "learning_rate": 1.7460317460317462e-06,
+ "loss": 0.1346,
+ "step": 8475
+ },
+ {
+ "epoch": 48.434285714285714,
+ "grad_norm": 38.74394989013672,
+ "learning_rate": 1.7396825396825397e-06,
+ "loss": 0.1779,
+ "step": 8476
+ },
+ {
+ "epoch": 48.44,
+ "grad_norm": 48.79632568359375,
+ "learning_rate": 1.7333333333333334e-06,
+ "loss": 0.1647,
+ "step": 8477
+ },
+ {
+ "epoch": 48.44571428571429,
+ "grad_norm": 57.10668182373047,
+ "learning_rate": 1.7269841269841269e-06,
+ "loss": 0.1178,
+ "step": 8478
+ },
+ {
+ "epoch": 48.45142857142857,
+ "grad_norm": 30.231332778930664,
+ "learning_rate": 1.7206349206349208e-06,
+ "loss": 0.1314,
+ "step": 8479
+ },
+ {
+ "epoch": 48.457142857142856,
+ "grad_norm": 80.39659118652344,
+ "learning_rate": 1.7142857142857145e-06,
+ "loss": 0.1357,
+ "step": 8480
+ },
+ {
+ "epoch": 48.462857142857146,
+ "grad_norm": 65.28248596191406,
+ "learning_rate": 1.707936507936508e-06,
+ "loss": 0.1205,
+ "step": 8481
+ },
+ {
+ "epoch": 48.46857142857143,
+ "grad_norm": 91.30731964111328,
+ "learning_rate": 1.7015873015873017e-06,
+ "loss": 0.1867,
+ "step": 8482
+ },
+ {
+ "epoch": 48.47428571428571,
+ "grad_norm": 32.85758972167969,
+ "learning_rate": 1.6952380952380951e-06,
+ "loss": 0.1569,
+ "step": 8483
+ },
+ {
+ "epoch": 48.48,
+ "grad_norm": 18.715124130249023,
+ "learning_rate": 1.6888888888888888e-06,
+ "loss": 0.1912,
+ "step": 8484
+ },
+ {
+ "epoch": 48.48571428571429,
+ "grad_norm": 29.891252517700195,
+ "learning_rate": 1.6825396825396827e-06,
+ "loss": 0.1739,
+ "step": 8485
+ },
+ {
+ "epoch": 48.49142857142857,
+ "grad_norm": 33.41127014160156,
+ "learning_rate": 1.6761904761904764e-06,
+ "loss": 0.1207,
+ "step": 8486
+ },
+ {
+ "epoch": 48.497142857142855,
+ "grad_norm": 66.02049255371094,
+ "learning_rate": 1.66984126984127e-06,
+ "loss": 0.1557,
+ "step": 8487
+ },
+ {
+ "epoch": 48.502857142857145,
+ "grad_norm": 22.25055503845215,
+ "learning_rate": 1.6634920634920636e-06,
+ "loss": 0.1087,
+ "step": 8488
+ },
+ {
+ "epoch": 48.50857142857143,
+ "grad_norm": 29.54048728942871,
+ "learning_rate": 1.657142857142857e-06,
+ "loss": 0.1358,
+ "step": 8489
+ },
+ {
+ "epoch": 48.51428571428571,
+ "grad_norm": 17.46042251586914,
+ "learning_rate": 1.650793650793651e-06,
+ "loss": 0.0944,
+ "step": 8490
+ },
+ {
+ "epoch": 48.52,
+ "grad_norm": 69.29207611083984,
+ "learning_rate": 1.6444444444444447e-06,
+ "loss": 0.1351,
+ "step": 8491
+ },
+ {
+ "epoch": 48.52571428571429,
+ "grad_norm": 55.9390869140625,
+ "learning_rate": 1.6380952380952382e-06,
+ "loss": 0.1082,
+ "step": 8492
+ },
+ {
+ "epoch": 48.53142857142857,
+ "grad_norm": 40.031368255615234,
+ "learning_rate": 1.6317460317460319e-06,
+ "loss": 0.1397,
+ "step": 8493
+ },
+ {
+ "epoch": 48.537142857142854,
+ "grad_norm": 33.776851654052734,
+ "learning_rate": 1.6253968253968254e-06,
+ "loss": 0.127,
+ "step": 8494
+ },
+ {
+ "epoch": 48.542857142857144,
+ "grad_norm": 41.36642074584961,
+ "learning_rate": 1.619047619047619e-06,
+ "loss": 0.1472,
+ "step": 8495
+ },
+ {
+ "epoch": 48.54857142857143,
+ "grad_norm": 32.5374641418457,
+ "learning_rate": 1.612698412698413e-06,
+ "loss": 0.1165,
+ "step": 8496
+ },
+ {
+ "epoch": 48.55428571428571,
+ "grad_norm": 59.757659912109375,
+ "learning_rate": 1.6063492063492064e-06,
+ "loss": 0.1446,
+ "step": 8497
+ },
+ {
+ "epoch": 48.56,
+ "grad_norm": 28.34662628173828,
+ "learning_rate": 1.6000000000000001e-06,
+ "loss": 0.1066,
+ "step": 8498
+ },
+ {
+ "epoch": 48.565714285714286,
+ "grad_norm": 56.91286849975586,
+ "learning_rate": 1.5936507936507936e-06,
+ "loss": 0.0935,
+ "step": 8499
+ },
+ {
+ "epoch": 48.57142857142857,
+ "grad_norm": 27.71327018737793,
+ "learning_rate": 1.5873015873015873e-06,
+ "loss": 0.1222,
+ "step": 8500
+ },
+ {
+ "epoch": 48.57714285714286,
+ "grad_norm": 70.07229614257812,
+ "learning_rate": 1.5809523809523808e-06,
+ "loss": 0.2353,
+ "step": 8501
+ },
+ {
+ "epoch": 48.582857142857144,
+ "grad_norm": 61.30202865600586,
+ "learning_rate": 1.574603174603175e-06,
+ "loss": 0.2099,
+ "step": 8502
+ },
+ {
+ "epoch": 48.58857142857143,
+ "grad_norm": 30.183975219726562,
+ "learning_rate": 1.5682539682539684e-06,
+ "loss": 0.1917,
+ "step": 8503
+ },
+ {
+ "epoch": 48.59428571428572,
+ "grad_norm": 191.23448181152344,
+ "learning_rate": 1.561904761904762e-06,
+ "loss": 0.2012,
+ "step": 8504
+ },
+ {
+ "epoch": 48.6,
+ "grad_norm": 63.830299377441406,
+ "learning_rate": 1.5555555555555556e-06,
+ "loss": 0.1211,
+ "step": 8505
+ },
+ {
+ "epoch": 48.605714285714285,
+ "grad_norm": 23.363412857055664,
+ "learning_rate": 1.5492063492063493e-06,
+ "loss": 0.1067,
+ "step": 8506
+ },
+ {
+ "epoch": 48.61142857142857,
+ "grad_norm": 41.130428314208984,
+ "learning_rate": 1.542857142857143e-06,
+ "loss": 0.1431,
+ "step": 8507
+ },
+ {
+ "epoch": 48.61714285714286,
+ "grad_norm": 51.86970901489258,
+ "learning_rate": 1.5365079365079365e-06,
+ "loss": 0.1029,
+ "step": 8508
+ },
+ {
+ "epoch": 48.62285714285714,
+ "grad_norm": 46.21601486206055,
+ "learning_rate": 1.5301587301587304e-06,
+ "loss": 0.1305,
+ "step": 8509
+ },
+ {
+ "epoch": 48.628571428571426,
+ "grad_norm": 23.716323852539062,
+ "learning_rate": 1.5238095238095238e-06,
+ "loss": 0.113,
+ "step": 8510
+ },
+ {
+ "epoch": 48.63428571428572,
+ "grad_norm": 82.37190246582031,
+ "learning_rate": 1.5174603174603175e-06,
+ "loss": 0.1198,
+ "step": 8511
+ },
+ {
+ "epoch": 48.64,
+ "grad_norm": 20.815404891967773,
+ "learning_rate": 1.5111111111111112e-06,
+ "loss": 0.1161,
+ "step": 8512
+ },
+ {
+ "epoch": 48.645714285714284,
+ "grad_norm": 20.904815673828125,
+ "learning_rate": 1.504761904761905e-06,
+ "loss": 0.1524,
+ "step": 8513
+ },
+ {
+ "epoch": 48.651428571428575,
+ "grad_norm": 217.2928466796875,
+ "learning_rate": 1.4984126984126984e-06,
+ "loss": 0.1161,
+ "step": 8514
+ },
+ {
+ "epoch": 48.65714285714286,
+ "grad_norm": 46.758567810058594,
+ "learning_rate": 1.492063492063492e-06,
+ "loss": 0.1546,
+ "step": 8515
+ },
+ {
+ "epoch": 48.66285714285714,
+ "grad_norm": 39.870391845703125,
+ "learning_rate": 1.4857142857142858e-06,
+ "loss": 0.0949,
+ "step": 8516
+ },
+ {
+ "epoch": 48.668571428571425,
+ "grad_norm": 63.797542572021484,
+ "learning_rate": 1.4793650793650793e-06,
+ "loss": 0.1232,
+ "step": 8517
+ },
+ {
+ "epoch": 48.674285714285716,
+ "grad_norm": 41.95578384399414,
+ "learning_rate": 1.4730158730158732e-06,
+ "loss": 0.1823,
+ "step": 8518
+ },
+ {
+ "epoch": 48.68,
+ "grad_norm": 77.81548309326172,
+ "learning_rate": 1.4666666666666667e-06,
+ "loss": 0.1086,
+ "step": 8519
+ },
+ {
+ "epoch": 48.68571428571428,
+ "grad_norm": 877.9599609375,
+ "learning_rate": 1.4603174603174606e-06,
+ "loss": 0.1582,
+ "step": 8520
+ },
+ {
+ "epoch": 48.691428571428574,
+ "grad_norm": 50.353511810302734,
+ "learning_rate": 1.453968253968254e-06,
+ "loss": 0.168,
+ "step": 8521
+ },
+ {
+ "epoch": 48.69714285714286,
+ "grad_norm": 43.11552047729492,
+ "learning_rate": 1.4476190476190478e-06,
+ "loss": 0.1206,
+ "step": 8522
+ },
+ {
+ "epoch": 48.70285714285714,
+ "grad_norm": 48.17919158935547,
+ "learning_rate": 1.4412698412698414e-06,
+ "loss": 0.2004,
+ "step": 8523
+ },
+ {
+ "epoch": 48.70857142857143,
+ "grad_norm": 227.92027282714844,
+ "learning_rate": 1.434920634920635e-06,
+ "loss": 0.1767,
+ "step": 8524
+ },
+ {
+ "epoch": 48.714285714285715,
+ "grad_norm": 33.221317291259766,
+ "learning_rate": 1.4285714285714286e-06,
+ "loss": 0.1659,
+ "step": 8525
+ },
+ {
+ "epoch": 48.72,
+ "grad_norm": 17.4228458404541,
+ "learning_rate": 1.4222222222222223e-06,
+ "loss": 0.116,
+ "step": 8526
+ },
+ {
+ "epoch": 48.72571428571428,
+ "grad_norm": 35.15834045410156,
+ "learning_rate": 1.415873015873016e-06,
+ "loss": 0.221,
+ "step": 8527
+ },
+ {
+ "epoch": 48.73142857142857,
+ "grad_norm": 448.2790832519531,
+ "learning_rate": 1.4095238095238095e-06,
+ "loss": 0.2029,
+ "step": 8528
+ },
+ {
+ "epoch": 48.73714285714286,
+ "grad_norm": 43.80533981323242,
+ "learning_rate": 1.4031746031746034e-06,
+ "loss": 0.0964,
+ "step": 8529
+ },
+ {
+ "epoch": 48.74285714285714,
+ "grad_norm": 29.789897918701172,
+ "learning_rate": 1.3968253968253969e-06,
+ "loss": 0.133,
+ "step": 8530
+ },
+ {
+ "epoch": 48.74857142857143,
+ "grad_norm": 49.25103759765625,
+ "learning_rate": 1.3904761904761906e-06,
+ "loss": 0.1735,
+ "step": 8531
+ },
+ {
+ "epoch": 48.754285714285714,
+ "grad_norm": 47.496864318847656,
+ "learning_rate": 1.3841269841269843e-06,
+ "loss": 0.1617,
+ "step": 8532
+ },
+ {
+ "epoch": 48.76,
+ "grad_norm": 32.824214935302734,
+ "learning_rate": 1.3777777777777778e-06,
+ "loss": 0.1524,
+ "step": 8533
+ },
+ {
+ "epoch": 48.76571428571429,
+ "grad_norm": 22.41704750061035,
+ "learning_rate": 1.3714285714285715e-06,
+ "loss": 0.1183,
+ "step": 8534
+ },
+ {
+ "epoch": 48.77142857142857,
+ "grad_norm": 27.07340431213379,
+ "learning_rate": 1.3650793650793652e-06,
+ "loss": 0.1116,
+ "step": 8535
+ },
+ {
+ "epoch": 48.777142857142856,
+ "grad_norm": 32.53962707519531,
+ "learning_rate": 1.3587301587301588e-06,
+ "loss": 0.142,
+ "step": 8536
+ },
+ {
+ "epoch": 48.78285714285714,
+ "grad_norm": 52.79143524169922,
+ "learning_rate": 1.3523809523809523e-06,
+ "loss": 0.1096,
+ "step": 8537
+ },
+ {
+ "epoch": 48.78857142857143,
+ "grad_norm": 48.82529067993164,
+ "learning_rate": 1.346031746031746e-06,
+ "loss": 0.1123,
+ "step": 8538
+ },
+ {
+ "epoch": 48.794285714285714,
+ "grad_norm": 31.630796432495117,
+ "learning_rate": 1.3396825396825397e-06,
+ "loss": 0.098,
+ "step": 8539
+ },
+ {
+ "epoch": 48.8,
+ "grad_norm": 59.10980224609375,
+ "learning_rate": 1.3333333333333334e-06,
+ "loss": 0.1288,
+ "step": 8540
+ },
+ {
+ "epoch": 48.80571428571429,
+ "grad_norm": 34.39828109741211,
+ "learning_rate": 1.3269841269841271e-06,
+ "loss": 0.1212,
+ "step": 8541
+ },
+ {
+ "epoch": 48.81142857142857,
+ "grad_norm": 59.65375900268555,
+ "learning_rate": 1.3206349206349206e-06,
+ "loss": 0.1016,
+ "step": 8542
+ },
+ {
+ "epoch": 48.817142857142855,
+ "grad_norm": 31.157609939575195,
+ "learning_rate": 1.3142857142857145e-06,
+ "loss": 0.1139,
+ "step": 8543
+ },
+ {
+ "epoch": 48.822857142857146,
+ "grad_norm": 42.51649856567383,
+ "learning_rate": 1.307936507936508e-06,
+ "loss": 0.1153,
+ "step": 8544
+ },
+ {
+ "epoch": 48.82857142857143,
+ "grad_norm": 64.73578643798828,
+ "learning_rate": 1.3015873015873017e-06,
+ "loss": 0.1305,
+ "step": 8545
+ },
+ {
+ "epoch": 48.83428571428571,
+ "grad_norm": 42.71442413330078,
+ "learning_rate": 1.2952380952380954e-06,
+ "loss": 0.1291,
+ "step": 8546
+ },
+ {
+ "epoch": 48.84,
+ "grad_norm": 58.65089797973633,
+ "learning_rate": 1.2888888888888889e-06,
+ "loss": 0.2238,
+ "step": 8547
+ },
+ {
+ "epoch": 48.84571428571429,
+ "grad_norm": 21.79871368408203,
+ "learning_rate": 1.2825396825396825e-06,
+ "loss": 0.1413,
+ "step": 8548
+ },
+ {
+ "epoch": 48.85142857142857,
+ "grad_norm": 63.050376892089844,
+ "learning_rate": 1.2761904761904762e-06,
+ "loss": 0.1224,
+ "step": 8549
+ },
+ {
+ "epoch": 48.857142857142854,
+ "grad_norm": 77.241943359375,
+ "learning_rate": 1.26984126984127e-06,
+ "loss": 0.1062,
+ "step": 8550
+ },
+ {
+ "epoch": 48.862857142857145,
+ "grad_norm": 63.40999984741211,
+ "learning_rate": 1.2634920634920634e-06,
+ "loss": 0.1387,
+ "step": 8551
+ },
+ {
+ "epoch": 48.86857142857143,
+ "grad_norm": 60.35781478881836,
+ "learning_rate": 1.2571428571428573e-06,
+ "loss": 0.2139,
+ "step": 8552
+ },
+ {
+ "epoch": 48.87428571428571,
+ "grad_norm": 1141.8404541015625,
+ "learning_rate": 1.2507936507936508e-06,
+ "loss": 0.1724,
+ "step": 8553
+ },
+ {
+ "epoch": 48.88,
+ "grad_norm": 43.64670181274414,
+ "learning_rate": 1.2444444444444445e-06,
+ "loss": 0.1049,
+ "step": 8554
+ },
+ {
+ "epoch": 48.885714285714286,
+ "grad_norm": 37.39603805541992,
+ "learning_rate": 1.2380952380952382e-06,
+ "loss": 0.1447,
+ "step": 8555
+ },
+ {
+ "epoch": 48.89142857142857,
+ "grad_norm": 64.08164978027344,
+ "learning_rate": 1.2317460317460317e-06,
+ "loss": 0.1202,
+ "step": 8556
+ },
+ {
+ "epoch": 48.89714285714286,
+ "grad_norm": 62.142242431640625,
+ "learning_rate": 1.2253968253968254e-06,
+ "loss": 0.1119,
+ "step": 8557
+ },
+ {
+ "epoch": 48.902857142857144,
+ "grad_norm": 53.52615737915039,
+ "learning_rate": 1.219047619047619e-06,
+ "loss": 0.1599,
+ "step": 8558
+ },
+ {
+ "epoch": 48.90857142857143,
+ "grad_norm": 29.947540283203125,
+ "learning_rate": 1.2126984126984128e-06,
+ "loss": 0.1357,
+ "step": 8559
+ },
+ {
+ "epoch": 48.91428571428571,
+ "grad_norm": 46.05306625366211,
+ "learning_rate": 1.2063492063492065e-06,
+ "loss": 0.1385,
+ "step": 8560
+ },
+ {
+ "epoch": 48.92,
+ "grad_norm": 24.307514190673828,
+ "learning_rate": 1.2000000000000002e-06,
+ "loss": 0.1233,
+ "step": 8561
+ },
+ {
+ "epoch": 48.925714285714285,
+ "grad_norm": 30.282651901245117,
+ "learning_rate": 1.1936507936507936e-06,
+ "loss": 0.0997,
+ "step": 8562
+ },
+ {
+ "epoch": 48.93142857142857,
+ "grad_norm": 61.168792724609375,
+ "learning_rate": 1.1873015873015873e-06,
+ "loss": 0.1346,
+ "step": 8563
+ },
+ {
+ "epoch": 48.93714285714286,
+ "grad_norm": 24.79856300354004,
+ "learning_rate": 1.180952380952381e-06,
+ "loss": 0.1251,
+ "step": 8564
+ },
+ {
+ "epoch": 48.94285714285714,
+ "grad_norm": 53.22463607788086,
+ "learning_rate": 1.1746031746031745e-06,
+ "loss": 0.1459,
+ "step": 8565
+ },
+ {
+ "epoch": 48.94857142857143,
+ "grad_norm": 47.92363739013672,
+ "learning_rate": 1.1682539682539684e-06,
+ "loss": 0.0947,
+ "step": 8566
+ },
+ {
+ "epoch": 48.95428571428572,
+ "grad_norm": 45.30970764160156,
+ "learning_rate": 1.161904761904762e-06,
+ "loss": 0.1523,
+ "step": 8567
+ },
+ {
+ "epoch": 48.96,
+ "grad_norm": 14.523917198181152,
+ "learning_rate": 1.1555555555555556e-06,
+ "loss": 0.163,
+ "step": 8568
+ },
+ {
+ "epoch": 48.965714285714284,
+ "grad_norm": 54.226322174072266,
+ "learning_rate": 1.1492063492063493e-06,
+ "loss": 0.1489,
+ "step": 8569
+ },
+ {
+ "epoch": 48.97142857142857,
+ "grad_norm": 27.205904006958008,
+ "learning_rate": 1.142857142857143e-06,
+ "loss": 0.1173,
+ "step": 8570
+ },
+ {
+ "epoch": 48.97714285714286,
+ "grad_norm": 382.25030517578125,
+ "learning_rate": 1.1365079365079365e-06,
+ "loss": 0.2088,
+ "step": 8571
+ },
+ {
+ "epoch": 48.98285714285714,
+ "grad_norm": 43.594722747802734,
+ "learning_rate": 1.1301587301587302e-06,
+ "loss": 0.1103,
+ "step": 8572
+ },
+ {
+ "epoch": 48.988571428571426,
+ "grad_norm": 43.90495300292969,
+ "learning_rate": 1.1238095238095239e-06,
+ "loss": 0.1303,
+ "step": 8573
+ },
+ {
+ "epoch": 48.994285714285716,
+ "grad_norm": 37.924713134765625,
+ "learning_rate": 1.1174603174603173e-06,
+ "loss": 0.1356,
+ "step": 8574
+ },
+ {
+ "epoch": 49.0,
+ "grad_norm": 26.365081787109375,
+ "learning_rate": 1.1111111111111112e-06,
+ "loss": 0.1228,
+ "step": 8575
+ },
+ {
+ "epoch": 49.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5775097608566284,
+ "eval_map": 0.9383,
+ "eval_map_50": 0.9713,
+ "eval_map_75": 0.9641,
+ "eval_map_large": 0.9384,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9383,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7924,
+ "eval_mar_10": 0.9733,
+ "eval_mar_100": 0.9749,
+ "eval_mar_100_per_class": 0.9749,
+ "eval_mar_large": 0.9749,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 14.2189,
+ "eval_samples_per_second": 20.677,
+ "eval_steps_per_second": 2.602,
+ "step": 8575
+ },
+ {
+ "epoch": 49.005714285714284,
+ "grad_norm": 21.205801010131836,
+ "learning_rate": 1.1047619047619047e-06,
+ "loss": 0.1085,
+ "step": 8576
+ },
+ {
+ "epoch": 49.011428571428574,
+ "grad_norm": 32.06498336791992,
+ "learning_rate": 1.0984126984126984e-06,
+ "loss": 0.1302,
+ "step": 8577
+ },
+ {
+ "epoch": 49.01714285714286,
+ "grad_norm": 22.492408752441406,
+ "learning_rate": 1.0920634920634921e-06,
+ "loss": 0.1277,
+ "step": 8578
+ },
+ {
+ "epoch": 49.02285714285714,
+ "grad_norm": 95.90989685058594,
+ "learning_rate": 1.0857142857142858e-06,
+ "loss": 0.124,
+ "step": 8579
+ },
+ {
+ "epoch": 49.02857142857143,
+ "grad_norm": 31.616439819335938,
+ "learning_rate": 1.0793650793650795e-06,
+ "loss": 0.2782,
+ "step": 8580
+ },
+ {
+ "epoch": 49.034285714285716,
+ "grad_norm": 30.350252151489258,
+ "learning_rate": 1.073015873015873e-06,
+ "loss": 0.12,
+ "step": 8581
+ },
+ {
+ "epoch": 49.04,
+ "grad_norm": 45.6641960144043,
+ "learning_rate": 1.0666666666666667e-06,
+ "loss": 0.1092,
+ "step": 8582
+ },
+ {
+ "epoch": 49.04571428571428,
+ "grad_norm": 35.478946685791016,
+ "learning_rate": 1.0603174603174604e-06,
+ "loss": 0.0976,
+ "step": 8583
+ },
+ {
+ "epoch": 49.05142857142857,
+ "grad_norm": 56.84311294555664,
+ "learning_rate": 1.053968253968254e-06,
+ "loss": 0.122,
+ "step": 8584
+ },
+ {
+ "epoch": 49.05714285714286,
+ "grad_norm": 57.067138671875,
+ "learning_rate": 1.0476190476190476e-06,
+ "loss": 0.0985,
+ "step": 8585
+ },
+ {
+ "epoch": 49.06285714285714,
+ "grad_norm": 22.298297882080078,
+ "learning_rate": 1.0412698412698415e-06,
+ "loss": 0.116,
+ "step": 8586
+ },
+ {
+ "epoch": 49.06857142857143,
+ "grad_norm": 45.00107192993164,
+ "learning_rate": 1.034920634920635e-06,
+ "loss": 0.0977,
+ "step": 8587
+ },
+ {
+ "epoch": 49.074285714285715,
+ "grad_norm": 30.702762603759766,
+ "learning_rate": 1.0285714285714286e-06,
+ "loss": 0.1414,
+ "step": 8588
+ },
+ {
+ "epoch": 49.08,
+ "grad_norm": 2884.48095703125,
+ "learning_rate": 1.0222222222222223e-06,
+ "loss": 0.1384,
+ "step": 8589
+ },
+ {
+ "epoch": 49.08571428571429,
+ "grad_norm": 34.0587272644043,
+ "learning_rate": 1.0158730158730158e-06,
+ "loss": 0.1174,
+ "step": 8590
+ },
+ {
+ "epoch": 49.09142857142857,
+ "grad_norm": 47.35640335083008,
+ "learning_rate": 1.0095238095238095e-06,
+ "loss": 0.1177,
+ "step": 8591
+ },
+ {
+ "epoch": 49.097142857142856,
+ "grad_norm": 81.97245788574219,
+ "learning_rate": 1.0031746031746032e-06,
+ "loss": 0.1276,
+ "step": 8592
+ },
+ {
+ "epoch": 49.10285714285714,
+ "grad_norm": 71.25306701660156,
+ "learning_rate": 9.96825396825397e-07,
+ "loss": 0.1174,
+ "step": 8593
+ },
+ {
+ "epoch": 49.10857142857143,
+ "grad_norm": 36.5328483581543,
+ "learning_rate": 9.904761904761904e-07,
+ "loss": 0.1016,
+ "step": 8594
+ },
+ {
+ "epoch": 49.114285714285714,
+ "grad_norm": 33.6234245300293,
+ "learning_rate": 9.841269841269843e-07,
+ "loss": 0.1326,
+ "step": 8595
+ },
+ {
+ "epoch": 49.12,
+ "grad_norm": 109.39087677001953,
+ "learning_rate": 9.777777777777778e-07,
+ "loss": 0.0963,
+ "step": 8596
+ },
+ {
+ "epoch": 49.12571428571429,
+ "grad_norm": 42.822750091552734,
+ "learning_rate": 9.714285714285715e-07,
+ "loss": 0.1104,
+ "step": 8597
+ },
+ {
+ "epoch": 49.13142857142857,
+ "grad_norm": 45.368167877197266,
+ "learning_rate": 9.650793650793652e-07,
+ "loss": 0.1692,
+ "step": 8598
+ },
+ {
+ "epoch": 49.137142857142855,
+ "grad_norm": 23.05076789855957,
+ "learning_rate": 9.587301587301587e-07,
+ "loss": 0.153,
+ "step": 8599
+ },
+ {
+ "epoch": 49.142857142857146,
+ "grad_norm": 67.81226348876953,
+ "learning_rate": 9.523809523809526e-07,
+ "loss": 0.2117,
+ "step": 8600
+ },
+ {
+ "epoch": 49.14857142857143,
+ "grad_norm": 87.15576171875,
+ "learning_rate": 9.460317460317461e-07,
+ "loss": 0.1456,
+ "step": 8601
+ },
+ {
+ "epoch": 49.15428571428571,
+ "grad_norm": 22.477251052856445,
+ "learning_rate": 9.396825396825397e-07,
+ "loss": 0.1114,
+ "step": 8602
+ },
+ {
+ "epoch": 49.16,
+ "grad_norm": 48.177001953125,
+ "learning_rate": 9.333333333333334e-07,
+ "loss": 0.1037,
+ "step": 8603
+ },
+ {
+ "epoch": 49.16571428571429,
+ "grad_norm": 26.15044403076172,
+ "learning_rate": 9.26984126984127e-07,
+ "loss": 0.1582,
+ "step": 8604
+ },
+ {
+ "epoch": 49.17142857142857,
+ "grad_norm": 30.2648983001709,
+ "learning_rate": 9.206349206349206e-07,
+ "loss": 0.1152,
+ "step": 8605
+ },
+ {
+ "epoch": 49.177142857142854,
+ "grad_norm": 19.633533477783203,
+ "learning_rate": 9.142857142857144e-07,
+ "loss": 0.1913,
+ "step": 8606
+ },
+ {
+ "epoch": 49.182857142857145,
+ "grad_norm": 24.708240509033203,
+ "learning_rate": 9.07936507936508e-07,
+ "loss": 0.1436,
+ "step": 8607
+ },
+ {
+ "epoch": 49.18857142857143,
+ "grad_norm": 63.116065979003906,
+ "learning_rate": 9.015873015873016e-07,
+ "loss": 0.163,
+ "step": 8608
+ },
+ {
+ "epoch": 49.19428571428571,
+ "grad_norm": 43.32490539550781,
+ "learning_rate": 8.952380952380954e-07,
+ "loss": 0.1218,
+ "step": 8609
+ },
+ {
+ "epoch": 49.2,
+ "grad_norm": 20.782461166381836,
+ "learning_rate": 8.88888888888889e-07,
+ "loss": 0.1004,
+ "step": 8610
+ },
+ {
+ "epoch": 49.205714285714286,
+ "grad_norm": 61.93737030029297,
+ "learning_rate": 8.825396825396826e-07,
+ "loss": 0.1395,
+ "step": 8611
+ },
+ {
+ "epoch": 49.21142857142857,
+ "grad_norm": 41.53925704956055,
+ "learning_rate": 8.761904761904763e-07,
+ "loss": 0.1455,
+ "step": 8612
+ },
+ {
+ "epoch": 49.21714285714286,
+ "grad_norm": 48.69253921508789,
+ "learning_rate": 8.698412698412699e-07,
+ "loss": 0.0971,
+ "step": 8613
+ },
+ {
+ "epoch": 49.222857142857144,
+ "grad_norm": 17.066259384155273,
+ "learning_rate": 8.634920634920634e-07,
+ "loss": 0.1644,
+ "step": 8614
+ },
+ {
+ "epoch": 49.22857142857143,
+ "grad_norm": 38.65094757080078,
+ "learning_rate": 8.571428571428572e-07,
+ "loss": 0.1795,
+ "step": 8615
+ },
+ {
+ "epoch": 49.23428571428571,
+ "grad_norm": 15.854557991027832,
+ "learning_rate": 8.507936507936508e-07,
+ "loss": 0.1332,
+ "step": 8616
+ },
+ {
+ "epoch": 49.24,
+ "grad_norm": 33.58119583129883,
+ "learning_rate": 8.444444444444444e-07,
+ "loss": 0.1107,
+ "step": 8617
+ },
+ {
+ "epoch": 49.245714285714286,
+ "grad_norm": 19.7595157623291,
+ "learning_rate": 8.380952380952382e-07,
+ "loss": 0.1144,
+ "step": 8618
+ },
+ {
+ "epoch": 49.25142857142857,
+ "grad_norm": 65.31874084472656,
+ "learning_rate": 8.317460317460318e-07,
+ "loss": 0.184,
+ "step": 8619
+ },
+ {
+ "epoch": 49.25714285714286,
+ "grad_norm": 77.79670715332031,
+ "learning_rate": 8.253968253968255e-07,
+ "loss": 0.148,
+ "step": 8620
+ },
+ {
+ "epoch": 49.26285714285714,
+ "grad_norm": 33.58202362060547,
+ "learning_rate": 8.190476190476191e-07,
+ "loss": 0.0955,
+ "step": 8621
+ },
+ {
+ "epoch": 49.26857142857143,
+ "grad_norm": 65.25545501708984,
+ "learning_rate": 8.126984126984127e-07,
+ "loss": 0.1292,
+ "step": 8622
+ },
+ {
+ "epoch": 49.27428571428572,
+ "grad_norm": 26.848678588867188,
+ "learning_rate": 8.063492063492065e-07,
+ "loss": 0.0972,
+ "step": 8623
+ },
+ {
+ "epoch": 49.28,
+ "grad_norm": 33.38542556762695,
+ "learning_rate": 8.000000000000001e-07,
+ "loss": 0.138,
+ "step": 8624
+ },
+ {
+ "epoch": 49.285714285714285,
+ "grad_norm": 36.43754577636719,
+ "learning_rate": 7.936507936507937e-07,
+ "loss": 0.1028,
+ "step": 8625
+ },
+ {
+ "epoch": 49.29142857142857,
+ "grad_norm": 45.26066970825195,
+ "learning_rate": 7.873015873015875e-07,
+ "loss": 0.1223,
+ "step": 8626
+ },
+ {
+ "epoch": 49.29714285714286,
+ "grad_norm": 30.72954750061035,
+ "learning_rate": 7.80952380952381e-07,
+ "loss": 0.0998,
+ "step": 8627
+ },
+ {
+ "epoch": 49.30285714285714,
+ "grad_norm": 21.003131866455078,
+ "learning_rate": 7.746031746031746e-07,
+ "loss": 0.1052,
+ "step": 8628
+ },
+ {
+ "epoch": 49.308571428571426,
+ "grad_norm": 26.676870346069336,
+ "learning_rate": 7.682539682539682e-07,
+ "loss": 0.1597,
+ "step": 8629
+ },
+ {
+ "epoch": 49.31428571428572,
+ "grad_norm": 52.057456970214844,
+ "learning_rate": 7.619047619047619e-07,
+ "loss": 0.1111,
+ "step": 8630
+ },
+ {
+ "epoch": 49.32,
+ "grad_norm": 465.9215393066406,
+ "learning_rate": 7.555555555555556e-07,
+ "loss": 0.1259,
+ "step": 8631
+ },
+ {
+ "epoch": 49.325714285714284,
+ "grad_norm": 63.335350036621094,
+ "learning_rate": 7.492063492063492e-07,
+ "loss": 0.1646,
+ "step": 8632
+ },
+ {
+ "epoch": 49.331428571428575,
+ "grad_norm": 25.190292358398438,
+ "learning_rate": 7.428571428571429e-07,
+ "loss": 0.1723,
+ "step": 8633
+ },
+ {
+ "epoch": 49.33714285714286,
+ "grad_norm": 82.61519622802734,
+ "learning_rate": 7.365079365079366e-07,
+ "loss": 0.1337,
+ "step": 8634
+ },
+ {
+ "epoch": 49.34285714285714,
+ "grad_norm": 55.032196044921875,
+ "learning_rate": 7.301587301587303e-07,
+ "loss": 0.1,
+ "step": 8635
+ },
+ {
+ "epoch": 49.348571428571425,
+ "grad_norm": 90.56881713867188,
+ "learning_rate": 7.238095238095239e-07,
+ "loss": 0.1015,
+ "step": 8636
+ },
+ {
+ "epoch": 49.354285714285716,
+ "grad_norm": 42.1295051574707,
+ "learning_rate": 7.174603174603175e-07,
+ "loss": 0.1947,
+ "step": 8637
+ },
+ {
+ "epoch": 49.36,
+ "grad_norm": 31.98049545288086,
+ "learning_rate": 7.111111111111112e-07,
+ "loss": 0.1481,
+ "step": 8638
+ },
+ {
+ "epoch": 49.36571428571428,
+ "grad_norm": 611.3924560546875,
+ "learning_rate": 7.047619047619048e-07,
+ "loss": 0.1554,
+ "step": 8639
+ },
+ {
+ "epoch": 49.371428571428574,
+ "grad_norm": 68.15882873535156,
+ "learning_rate": 6.984126984126984e-07,
+ "loss": 0.1957,
+ "step": 8640
+ },
+ {
+ "epoch": 49.37714285714286,
+ "grad_norm": 12.325106620788574,
+ "learning_rate": 6.920634920634921e-07,
+ "loss": 0.1073,
+ "step": 8641
+ },
+ {
+ "epoch": 49.38285714285714,
+ "grad_norm": 33.361045837402344,
+ "learning_rate": 6.857142857142857e-07,
+ "loss": 0.0915,
+ "step": 8642
+ },
+ {
+ "epoch": 49.38857142857143,
+ "grad_norm": 63.9436149597168,
+ "learning_rate": 6.793650793650794e-07,
+ "loss": 0.1205,
+ "step": 8643
+ },
+ {
+ "epoch": 49.394285714285715,
+ "grad_norm": 28.686870574951172,
+ "learning_rate": 6.73015873015873e-07,
+ "loss": 0.1347,
+ "step": 8644
+ },
+ {
+ "epoch": 49.4,
+ "grad_norm": 24.923826217651367,
+ "learning_rate": 6.666666666666667e-07,
+ "loss": 0.1315,
+ "step": 8645
+ },
+ {
+ "epoch": 49.40571428571428,
+ "grad_norm": 27.837583541870117,
+ "learning_rate": 6.603174603174603e-07,
+ "loss": 0.1144,
+ "step": 8646
+ },
+ {
+ "epoch": 49.41142857142857,
+ "grad_norm": 282.70849609375,
+ "learning_rate": 6.53968253968254e-07,
+ "loss": 0.1473,
+ "step": 8647
+ },
+ {
+ "epoch": 49.417142857142856,
+ "grad_norm": 277.87164306640625,
+ "learning_rate": 6.476190476190477e-07,
+ "loss": 0.1597,
+ "step": 8648
+ },
+ {
+ "epoch": 49.42285714285714,
+ "grad_norm": 13.175591468811035,
+ "learning_rate": 6.412698412698413e-07,
+ "loss": 0.1367,
+ "step": 8649
+ },
+ {
+ "epoch": 49.42857142857143,
+ "grad_norm": 40.8079719543457,
+ "learning_rate": 6.34920634920635e-07,
+ "loss": 0.1257,
+ "step": 8650
+ },
+ {
+ "epoch": 49.434285714285714,
+ "grad_norm": 100.99056243896484,
+ "learning_rate": 6.285714285714287e-07,
+ "loss": 0.0908,
+ "step": 8651
+ },
+ {
+ "epoch": 49.44,
+ "grad_norm": 85.10597229003906,
+ "learning_rate": 6.222222222222223e-07,
+ "loss": 0.1284,
+ "step": 8652
+ },
+ {
+ "epoch": 49.44571428571429,
+ "grad_norm": 54.369171142578125,
+ "learning_rate": 6.158730158730158e-07,
+ "loss": 0.0878,
+ "step": 8653
+ },
+ {
+ "epoch": 49.45142857142857,
+ "grad_norm": 15.557859420776367,
+ "learning_rate": 6.095238095238095e-07,
+ "loss": 0.1084,
+ "step": 8654
+ },
+ {
+ "epoch": 49.457142857142856,
+ "grad_norm": 28.819398880004883,
+ "learning_rate": 6.031746031746032e-07,
+ "loss": 0.5745,
+ "step": 8655
+ },
+ {
+ "epoch": 49.462857142857146,
+ "grad_norm": 54.07514190673828,
+ "learning_rate": 5.968253968253968e-07,
+ "loss": 0.1251,
+ "step": 8656
+ },
+ {
+ "epoch": 49.46857142857143,
+ "grad_norm": 41.79790115356445,
+ "learning_rate": 5.904761904761905e-07,
+ "loss": 0.1454,
+ "step": 8657
+ },
+ {
+ "epoch": 49.47428571428571,
+ "grad_norm": 52.691593170166016,
+ "learning_rate": 5.841269841269842e-07,
+ "loss": 0.1064,
+ "step": 8658
+ },
+ {
+ "epoch": 49.48,
+ "grad_norm": 58.90571594238281,
+ "learning_rate": 5.777777777777778e-07,
+ "loss": 0.2098,
+ "step": 8659
+ },
+ {
+ "epoch": 49.48571428571429,
+ "grad_norm": 24.718616485595703,
+ "learning_rate": 5.714285714285715e-07,
+ "loss": 0.1146,
+ "step": 8660
+ },
+ {
+ "epoch": 49.49142857142857,
+ "grad_norm": 31.180971145629883,
+ "learning_rate": 5.650793650793651e-07,
+ "loss": 0.1749,
+ "step": 8661
+ },
+ {
+ "epoch": 49.497142857142855,
+ "grad_norm": 30.386829376220703,
+ "learning_rate": 5.587301587301587e-07,
+ "loss": 0.1383,
+ "step": 8662
+ },
+ {
+ "epoch": 49.502857142857145,
+ "grad_norm": 32.07866668701172,
+ "learning_rate": 5.523809523809524e-07,
+ "loss": 0.1654,
+ "step": 8663
+ },
+ {
+ "epoch": 49.50857142857143,
+ "grad_norm": 21.287935256958008,
+ "learning_rate": 5.460317460317461e-07,
+ "loss": 0.1027,
+ "step": 8664
+ },
+ {
+ "epoch": 49.51428571428571,
+ "grad_norm": 69.31727600097656,
+ "learning_rate": 5.396825396825398e-07,
+ "loss": 0.134,
+ "step": 8665
+ },
+ {
+ "epoch": 49.52,
+ "grad_norm": 26.733562469482422,
+ "learning_rate": 5.333333333333333e-07,
+ "loss": 0.1535,
+ "step": 8666
+ },
+ {
+ "epoch": 49.52571428571429,
+ "grad_norm": 40.09874725341797,
+ "learning_rate": 5.26984126984127e-07,
+ "loss": 0.116,
+ "step": 8667
+ },
+ {
+ "epoch": 49.53142857142857,
+ "grad_norm": 38.64852523803711,
+ "learning_rate": 5.206349206349207e-07,
+ "loss": 0.1157,
+ "step": 8668
+ },
+ {
+ "epoch": 49.537142857142854,
+ "grad_norm": 41.49262619018555,
+ "learning_rate": 5.142857142857143e-07,
+ "loss": 0.1353,
+ "step": 8669
+ },
+ {
+ "epoch": 49.542857142857144,
+ "grad_norm": 24.964202880859375,
+ "learning_rate": 5.079365079365079e-07,
+ "loss": 0.1327,
+ "step": 8670
+ },
+ {
+ "epoch": 49.54857142857143,
+ "grad_norm": 51.50368118286133,
+ "learning_rate": 5.015873015873016e-07,
+ "loss": 0.0924,
+ "step": 8671
+ },
+ {
+ "epoch": 49.55428571428571,
+ "grad_norm": 25.396739959716797,
+ "learning_rate": 4.952380952380952e-07,
+ "loss": 0.1331,
+ "step": 8672
+ },
+ {
+ "epoch": 49.56,
+ "grad_norm": 55.02810287475586,
+ "learning_rate": 4.888888888888889e-07,
+ "loss": 0.0904,
+ "step": 8673
+ },
+ {
+ "epoch": 49.565714285714286,
+ "grad_norm": 68.27679443359375,
+ "learning_rate": 4.825396825396826e-07,
+ "loss": 0.2253,
+ "step": 8674
+ },
+ {
+ "epoch": 49.57142857142857,
+ "grad_norm": 46.043724060058594,
+ "learning_rate": 4.761904761904763e-07,
+ "loss": 0.1871,
+ "step": 8675
+ },
+ {
+ "epoch": 49.57714285714286,
+ "grad_norm": 38.0540771484375,
+ "learning_rate": 4.6984126984126987e-07,
+ "loss": 0.1149,
+ "step": 8676
+ },
+ {
+ "epoch": 49.582857142857144,
+ "grad_norm": 21.746551513671875,
+ "learning_rate": 4.634920634920635e-07,
+ "loss": 0.1411,
+ "step": 8677
+ },
+ {
+ "epoch": 49.58857142857143,
+ "grad_norm": 21.1721134185791,
+ "learning_rate": 4.571428571428572e-07,
+ "loss": 0.0846,
+ "step": 8678
+ },
+ {
+ "epoch": 49.59428571428572,
+ "grad_norm": 30.42035675048828,
+ "learning_rate": 4.507936507936508e-07,
+ "loss": 0.1854,
+ "step": 8679
+ },
+ {
+ "epoch": 49.6,
+ "grad_norm": 17.304428100585938,
+ "learning_rate": 4.444444444444445e-07,
+ "loss": 0.1107,
+ "step": 8680
+ },
+ {
+ "epoch": 49.605714285714285,
+ "grad_norm": 97.10359954833984,
+ "learning_rate": 4.3809523809523813e-07,
+ "loss": 0.1457,
+ "step": 8681
+ },
+ {
+ "epoch": 49.61142857142857,
+ "grad_norm": 20.74465560913086,
+ "learning_rate": 4.317460317460317e-07,
+ "loss": 0.1375,
+ "step": 8682
+ },
+ {
+ "epoch": 49.61714285714286,
+ "grad_norm": 28.685523986816406,
+ "learning_rate": 4.253968253968254e-07,
+ "loss": 0.1129,
+ "step": 8683
+ },
+ {
+ "epoch": 49.62285714285714,
+ "grad_norm": 50.773048400878906,
+ "learning_rate": 4.190476190476191e-07,
+ "loss": 0.1449,
+ "step": 8684
+ },
+ {
+ "epoch": 49.628571428571426,
+ "grad_norm": 19.267837524414062,
+ "learning_rate": 4.1269841269841275e-07,
+ "loss": 0.1278,
+ "step": 8685
+ },
+ {
+ "epoch": 49.63428571428572,
+ "grad_norm": 17.098880767822266,
+ "learning_rate": 4.0634920634920634e-07,
+ "loss": 0.1052,
+ "step": 8686
+ },
+ {
+ "epoch": 49.64,
+ "grad_norm": 39.21644973754883,
+ "learning_rate": 4.0000000000000003e-07,
+ "loss": 0.0809,
+ "step": 8687
+ },
+ {
+ "epoch": 49.645714285714284,
+ "grad_norm": 159.01382446289062,
+ "learning_rate": 3.9365079365079373e-07,
+ "loss": 0.1159,
+ "step": 8688
+ },
+ {
+ "epoch": 49.651428571428575,
+ "grad_norm": 44.169063568115234,
+ "learning_rate": 3.873015873015873e-07,
+ "loss": 0.2899,
+ "step": 8689
+ },
+ {
+ "epoch": 49.65714285714286,
+ "grad_norm": 55.33827590942383,
+ "learning_rate": 3.8095238095238096e-07,
+ "loss": 0.0921,
+ "step": 8690
+ },
+ {
+ "epoch": 49.66285714285714,
+ "grad_norm": 16.90172004699707,
+ "learning_rate": 3.746031746031746e-07,
+ "loss": 0.1106,
+ "step": 8691
+ },
+ {
+ "epoch": 49.668571428571425,
+ "grad_norm": 42.39299011230469,
+ "learning_rate": 3.682539682539683e-07,
+ "loss": 0.1854,
+ "step": 8692
+ },
+ {
+ "epoch": 49.674285714285716,
+ "grad_norm": 42.97679901123047,
+ "learning_rate": 3.6190476190476194e-07,
+ "loss": 0.1753,
+ "step": 8693
+ },
+ {
+ "epoch": 49.68,
+ "grad_norm": 32.868167877197266,
+ "learning_rate": 3.555555555555556e-07,
+ "loss": 0.1082,
+ "step": 8694
+ },
+ {
+ "epoch": 49.68571428571428,
+ "grad_norm": 74.34040069580078,
+ "learning_rate": 3.492063492063492e-07,
+ "loss": 0.1063,
+ "step": 8695
+ },
+ {
+ "epoch": 49.691428571428574,
+ "grad_norm": 21.42570686340332,
+ "learning_rate": 3.4285714285714286e-07,
+ "loss": 0.1261,
+ "step": 8696
+ },
+ {
+ "epoch": 49.69714285714286,
+ "grad_norm": 35.38351821899414,
+ "learning_rate": 3.365079365079365e-07,
+ "loss": 0.1202,
+ "step": 8697
+ },
+ {
+ "epoch": 49.70285714285714,
+ "grad_norm": 32.82429122924805,
+ "learning_rate": 3.3015873015873015e-07,
+ "loss": 0.1131,
+ "step": 8698
+ },
+ {
+ "epoch": 49.70857142857143,
+ "grad_norm": 36.85771560668945,
+ "learning_rate": 3.2380952380952384e-07,
+ "loss": 0.1317,
+ "step": 8699
+ },
+ {
+ "epoch": 49.714285714285715,
+ "grad_norm": 20.852977752685547,
+ "learning_rate": 3.174603174603175e-07,
+ "loss": 0.1724,
+ "step": 8700
+ },
+ {
+ "epoch": 49.72,
+ "grad_norm": 27.297931671142578,
+ "learning_rate": 3.111111111111111e-07,
+ "loss": 0.1426,
+ "step": 8701
+ },
+ {
+ "epoch": 49.72571428571428,
+ "grad_norm": 80.59913635253906,
+ "learning_rate": 3.0476190476190477e-07,
+ "loss": 0.1696,
+ "step": 8702
+ },
+ {
+ "epoch": 49.73142857142857,
+ "grad_norm": 52.69660949707031,
+ "learning_rate": 2.984126984126984e-07,
+ "loss": 0.1506,
+ "step": 8703
+ },
+ {
+ "epoch": 49.73714285714286,
+ "grad_norm": 44.189369201660156,
+ "learning_rate": 2.920634920634921e-07,
+ "loss": 0.1293,
+ "step": 8704
+ },
+ {
+ "epoch": 49.74285714285714,
+ "grad_norm": 83.07208251953125,
+ "learning_rate": 2.8571428571428575e-07,
+ "loss": 0.143,
+ "step": 8705
+ },
+ {
+ "epoch": 49.74857142857143,
+ "grad_norm": 98.30414581298828,
+ "learning_rate": 2.7936507936507934e-07,
+ "loss": 0.1133,
+ "step": 8706
+ },
+ {
+ "epoch": 49.754285714285714,
+ "grad_norm": 39.12610626220703,
+ "learning_rate": 2.7301587301587303e-07,
+ "loss": 0.1472,
+ "step": 8707
+ },
+ {
+ "epoch": 49.76,
+ "grad_norm": 25.01207160949707,
+ "learning_rate": 2.6666666666666667e-07,
+ "loss": 0.1268,
+ "step": 8708
+ },
+ {
+ "epoch": 49.76571428571429,
+ "grad_norm": 29.20207405090332,
+ "learning_rate": 2.6031746031746037e-07,
+ "loss": 0.1149,
+ "step": 8709
+ },
+ {
+ "epoch": 49.77142857142857,
+ "grad_norm": 49.14268493652344,
+ "learning_rate": 2.5396825396825396e-07,
+ "loss": 0.1132,
+ "step": 8710
+ },
+ {
+ "epoch": 49.777142857142856,
+ "grad_norm": 239.8989715576172,
+ "learning_rate": 2.476190476190476e-07,
+ "loss": 0.1319,
+ "step": 8711
+ },
+ {
+ "epoch": 49.78285714285714,
+ "grad_norm": 737.55810546875,
+ "learning_rate": 2.412698412698413e-07,
+ "loss": 0.1384,
+ "step": 8712
+ },
+ {
+ "epoch": 49.78857142857143,
+ "grad_norm": 53.101776123046875,
+ "learning_rate": 2.3492063492063493e-07,
+ "loss": 0.1095,
+ "step": 8713
+ },
+ {
+ "epoch": 49.794285714285714,
+ "grad_norm": 54.95819854736328,
+ "learning_rate": 2.285714285714286e-07,
+ "loss": 0.103,
+ "step": 8714
+ },
+ {
+ "epoch": 49.8,
+ "grad_norm": 23.40339469909668,
+ "learning_rate": 2.2222222222222224e-07,
+ "loss": 0.1043,
+ "step": 8715
+ },
+ {
+ "epoch": 49.80571428571429,
+ "grad_norm": 57.28781509399414,
+ "learning_rate": 2.1587301587301586e-07,
+ "loss": 0.1065,
+ "step": 8716
+ },
+ {
+ "epoch": 49.81142857142857,
+ "grad_norm": 57.101924896240234,
+ "learning_rate": 2.0952380952380955e-07,
+ "loss": 0.1305,
+ "step": 8717
+ },
+ {
+ "epoch": 49.817142857142855,
+ "grad_norm": 59.88277053833008,
+ "learning_rate": 2.0317460317460317e-07,
+ "loss": 0.1376,
+ "step": 8718
+ },
+ {
+ "epoch": 49.822857142857146,
+ "grad_norm": 39.137847900390625,
+ "learning_rate": 1.9682539682539686e-07,
+ "loss": 0.1243,
+ "step": 8719
+ },
+ {
+ "epoch": 49.82857142857143,
+ "grad_norm": 18.661094665527344,
+ "learning_rate": 1.9047619047619048e-07,
+ "loss": 0.1702,
+ "step": 8720
+ },
+ {
+ "epoch": 49.83428571428571,
+ "grad_norm": 55.55431365966797,
+ "learning_rate": 1.8412698412698415e-07,
+ "loss": 0.1566,
+ "step": 8721
+ },
+ {
+ "epoch": 49.84,
+ "grad_norm": 55.64157485961914,
+ "learning_rate": 1.777777777777778e-07,
+ "loss": 0.1292,
+ "step": 8722
+ },
+ {
+ "epoch": 49.84571428571429,
+ "grad_norm": 19.37632942199707,
+ "learning_rate": 1.7142857142857143e-07,
+ "loss": 0.1204,
+ "step": 8723
+ },
+ {
+ "epoch": 49.85142857142857,
+ "grad_norm": 37.731903076171875,
+ "learning_rate": 1.6507936507936507e-07,
+ "loss": 0.1444,
+ "step": 8724
+ },
+ {
+ "epoch": 49.857142857142854,
+ "grad_norm": 88.25274658203125,
+ "learning_rate": 1.5873015873015874e-07,
+ "loss": 0.1353,
+ "step": 8725
+ },
+ {
+ "epoch": 49.862857142857145,
+ "grad_norm": 97.2085952758789,
+ "learning_rate": 1.5238095238095238e-07,
+ "loss": 0.1177,
+ "step": 8726
+ },
+ {
+ "epoch": 49.86857142857143,
+ "grad_norm": 34.84482192993164,
+ "learning_rate": 1.4603174603174605e-07,
+ "loss": 0.1166,
+ "step": 8727
+ },
+ {
+ "epoch": 49.87428571428571,
+ "grad_norm": 57.15216064453125,
+ "learning_rate": 1.3968253968253967e-07,
+ "loss": 0.1416,
+ "step": 8728
+ },
+ {
+ "epoch": 49.88,
+ "grad_norm": 37.720645904541016,
+ "learning_rate": 1.3333333333333334e-07,
+ "loss": 0.1528,
+ "step": 8729
+ },
+ {
+ "epoch": 49.885714285714286,
+ "grad_norm": 48.101558685302734,
+ "learning_rate": 1.2698412698412698e-07,
+ "loss": 0.1142,
+ "step": 8730
+ },
+ {
+ "epoch": 49.89142857142857,
+ "grad_norm": 41.24326705932617,
+ "learning_rate": 1.2063492063492065e-07,
+ "loss": 0.1573,
+ "step": 8731
+ },
+ {
+ "epoch": 49.89714285714286,
+ "grad_norm": 70.49794006347656,
+ "learning_rate": 1.142857142857143e-07,
+ "loss": 0.1123,
+ "step": 8732
+ },
+ {
+ "epoch": 49.902857142857144,
+ "grad_norm": 26.48020362854004,
+ "learning_rate": 1.0793650793650793e-07,
+ "loss": 0.1771,
+ "step": 8733
+ },
+ {
+ "epoch": 49.90857142857143,
+ "grad_norm": 30.033111572265625,
+ "learning_rate": 1.0158730158730159e-07,
+ "loss": 0.1004,
+ "step": 8734
+ },
+ {
+ "epoch": 49.91428571428571,
+ "grad_norm": 31.556259155273438,
+ "learning_rate": 9.523809523809524e-08,
+ "loss": 0.1414,
+ "step": 8735
+ },
+ {
+ "epoch": 49.92,
+ "grad_norm": 24.438173294067383,
+ "learning_rate": 8.88888888888889e-08,
+ "loss": 0.133,
+ "step": 8736
+ },
+ {
+ "epoch": 49.925714285714285,
+ "grad_norm": 24.674129486083984,
+ "learning_rate": 8.253968253968254e-08,
+ "loss": 0.1231,
+ "step": 8737
+ },
+ {
+ "epoch": 49.93142857142857,
+ "grad_norm": 59.45757293701172,
+ "learning_rate": 7.619047619047619e-08,
+ "loss": 0.1327,
+ "step": 8738
+ },
+ {
+ "epoch": 49.93714285714286,
+ "grad_norm": 26.112653732299805,
+ "learning_rate": 6.984126984126983e-08,
+ "loss": 0.0904,
+ "step": 8739
+ },
+ {
+ "epoch": 49.94285714285714,
+ "grad_norm": 41.43610382080078,
+ "learning_rate": 6.349206349206349e-08,
+ "loss": 0.1086,
+ "step": 8740
+ },
+ {
+ "epoch": 49.94857142857143,
+ "grad_norm": 48.097923278808594,
+ "learning_rate": 5.714285714285715e-08,
+ "loss": 0.1219,
+ "step": 8741
+ },
+ {
+ "epoch": 49.95428571428572,
+ "grad_norm": 34.53264236450195,
+ "learning_rate": 5.079365079365079e-08,
+ "loss": 0.1583,
+ "step": 8742
+ },
+ {
+ "epoch": 49.96,
+ "grad_norm": 33.10739517211914,
+ "learning_rate": 4.444444444444445e-08,
+ "loss": 0.1107,
+ "step": 8743
+ },
+ {
+ "epoch": 49.965714285714284,
+ "grad_norm": 14.772244453430176,
+ "learning_rate": 3.8095238095238096e-08,
+ "loss": 0.1375,
+ "step": 8744
+ },
+ {
+ "epoch": 49.97142857142857,
+ "grad_norm": 29.29127311706543,
+ "learning_rate": 3.1746031746031744e-08,
+ "loss": 0.1135,
+ "step": 8745
+ },
+ {
+ "epoch": 49.97714285714286,
+ "grad_norm": 25.30208396911621,
+ "learning_rate": 2.5396825396825396e-08,
+ "loss": 0.0992,
+ "step": 8746
+ },
+ {
+ "epoch": 49.98285714285714,
+ "grad_norm": 50.606361389160156,
+ "learning_rate": 1.9047619047619048e-08,
+ "loss": 0.1279,
+ "step": 8747
+ },
+ {
+ "epoch": 49.988571428571426,
+ "grad_norm": 40.236900329589844,
+ "learning_rate": 1.2698412698412698e-08,
+ "loss": 0.1602,
+ "step": 8748
+ },
+ {
+ "epoch": 49.994285714285716,
+ "grad_norm": 18.14468765258789,
+ "learning_rate": 6.349206349206349e-09,
+ "loss": 0.1338,
+ "step": 8749
+ },
+ {
+ "epoch": 50.0,
+ "grad_norm": 48.93581771850586,
+ "learning_rate": 0.0,
+ "loss": 0.1094,
+ "step": 8750
+ },
+ {
+ "epoch": 50.0,
+ "eval_classes": 0,
+ "eval_loss": 0.5793861150741577,
+ "eval_map": 0.9388,
+ "eval_map_50": 0.9721,
+ "eval_map_75": 0.9641,
+ "eval_map_large": 0.9388,
+ "eval_map_medium": -1.0,
+ "eval_map_per_class": 0.9388,
+ "eval_map_small": -1.0,
+ "eval_mar_1": 0.7959,
+ "eval_mar_10": 0.9733,
+ "eval_mar_100": 0.9749,
+ "eval_mar_100_per_class": 0.9749,
+ "eval_mar_large": 0.9749,
+ "eval_mar_medium": -1.0,
+ "eval_mar_small": -1.0,
+ "eval_runtime": 13.5545,
+ "eval_samples_per_second": 21.69,
+ "eval_steps_per_second": 2.73,
+ "step": 8750
+ }
+ ],
+ "logging_steps": 1,
+ "max_steps": 8750,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 50,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "EarlyStoppingCallback": {
+ "args": {
+ "early_stopping_patience": 15,
+ "early_stopping_threshold": 0.0
+ },
+ "attributes": {
+ "early_stopping_patience_counter": 14
+ }
+ },
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 2.2356489609216e+19,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/best_checkpoint/training_args.bin b/best_checkpoint/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b94cd4028ea19975be06e91a5eb60032a049eff1
--- /dev/null
+++ b/best_checkpoint/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3706f9f79f5744209c871ccf9fbee60fa5a8e284a17427199064284853941395
+size 5496
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..5f75103d2a8582229fe2ced88551ad02cbb27e1c
--- /dev/null
+++ b/config.json
@@ -0,0 +1,61 @@
+{
+ "_name_or_path": "microsoft/conditional-detr-resnet-50",
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "architectures": [
+ "ConditionalDetrForObjectDetection"
+ ],
+ "attention_dropout": 0.0,
+ "auxiliary_loss": false,
+ "backbone": "resnet50",
+ "backbone_config": null,
+ "backbone_kwargs": {
+ "in_chans": 3,
+ "out_indices": [
+ 1,
+ 2,
+ 3,
+ 4
+ ]
+ },
+ "bbox_cost": 5,
+ "bbox_loss_coefficient": 5,
+ "class_cost": 2,
+ "cls_loss_coefficient": 2,
+ "d_model": 256,
+ "decoder_attention_heads": 8,
+ "decoder_ffn_dim": 2048,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 6,
+ "dice_loss_coefficient": 1,
+ "dilation": false,
+ "dropout": 0.1,
+ "encoder_attention_heads": 8,
+ "encoder_ffn_dim": 2048,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 6,
+ "focal_alpha": 0.25,
+ "giou_cost": 2,
+ "giou_loss_coefficient": 2,
+ "id2label": {
+ "0": "signature"
+ },
+ "init_std": 0.02,
+ "init_xavier_std": 1.0,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "signature": 0
+ },
+ "mask_loss_coefficient": 1,
+ "max_position_embeddings": 1024,
+ "model_type": "conditional_detr",
+ "num_channels": 3,
+ "num_hidden_layers": 6,
+ "num_queries": 300,
+ "position_embedding_type": "sine",
+ "scale_embedding": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.46.3",
+ "use_pretrained_backbone": true,
+ "use_timm_backbone": true
+}
diff --git a/eval/cpu/confusion_matrix.png b/eval/cpu/confusion_matrix.png
new file mode 100644
index 0000000000000000000000000000000000000000..7f2f714632f724cef03630c5d2ca0fc496e25918
Binary files /dev/null and b/eval/cpu/confusion_matrix.png differ
diff --git a/eval/cpu/inference_grid_0.png b/eval/cpu/inference_grid_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..8de1fbc123b2cf18fc4698918b915bf5fb76aed7
--- /dev/null
+++ b/eval/cpu/inference_grid_0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:531b5f201a53888a78883489ebe2b4abcedb73829aca2838a925d4c003917e33
+size 115618
diff --git a/eval/cpu/inference_grid_1.png b/eval/cpu/inference_grid_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..1f20309c4fbef8a3c873cc0e05e54f5ce8021cb8
--- /dev/null
+++ b/eval/cpu/inference_grid_1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75a74c8f9e0be541121074e1146d26e64ff84b46b76fa673a2f23d5358babb65
+size 129712
diff --git a/eval/cpu/inference_grid_10.png b/eval/cpu/inference_grid_10.png
new file mode 100644
index 0000000000000000000000000000000000000000..ffabf4f6d030bf38dad7bf336b4d774b9627c53a
--- /dev/null
+++ b/eval/cpu/inference_grid_10.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39e12511e3892731bfe17542eacd66cd59fe55860d88b0f5eeb027eb14b50fd9
+size 111497
diff --git a/eval/cpu/inference_grid_11.png b/eval/cpu/inference_grid_11.png
new file mode 100644
index 0000000000000000000000000000000000000000..15d67438388e1cdbe7baaea4cd1f032a0015a525
--- /dev/null
+++ b/eval/cpu/inference_grid_11.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10023be3aacc95f06ba3ad7eea15f11762dde10ce82728f23d76cc6b83df34b0
+size 139997
diff --git a/eval/cpu/inference_grid_12.png b/eval/cpu/inference_grid_12.png
new file mode 100644
index 0000000000000000000000000000000000000000..0cf10e1e259648a7720ed042a682b5a7e5c7fc6f
Binary files /dev/null and b/eval/cpu/inference_grid_12.png differ
diff --git a/eval/cpu/inference_grid_13.png b/eval/cpu/inference_grid_13.png
new file mode 100644
index 0000000000000000000000000000000000000000..469b50b0c4c52d1755f1c2a6b18774976726e13a
Binary files /dev/null and b/eval/cpu/inference_grid_13.png differ
diff --git a/eval/cpu/inference_grid_14.png b/eval/cpu/inference_grid_14.png
new file mode 100644
index 0000000000000000000000000000000000000000..a8d46a0c0aeba076c88d444a6e7fe96991d22fbd
Binary files /dev/null and b/eval/cpu/inference_grid_14.png differ
diff --git a/eval/cpu/inference_grid_15.png b/eval/cpu/inference_grid_15.png
new file mode 100644
index 0000000000000000000000000000000000000000..ac37870e39bbe5f3bd26a39492dc5bd2a810f248
Binary files /dev/null and b/eval/cpu/inference_grid_15.png differ
diff --git a/eval/cpu/inference_grid_16.png b/eval/cpu/inference_grid_16.png
new file mode 100644
index 0000000000000000000000000000000000000000..bb054abfdafb2630f136bb89dcc90e70320db49a
--- /dev/null
+++ b/eval/cpu/inference_grid_16.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c16506561145145a7c3d67d326c8fbecaa9c3db93b052e544a100a4a6f77289
+size 143964
diff --git a/eval/cpu/inference_grid_17.png b/eval/cpu/inference_grid_17.png
new file mode 100644
index 0000000000000000000000000000000000000000..d81f8f0e31bc2eab176a5a5f76adb609818ae00c
Binary files /dev/null and b/eval/cpu/inference_grid_17.png differ
diff --git a/eval/cpu/inference_grid_18.png b/eval/cpu/inference_grid_18.png
new file mode 100644
index 0000000000000000000000000000000000000000..68cea80454f4cc93429fd76cbadba46b5621b70e
Binary files /dev/null and b/eval/cpu/inference_grid_18.png differ
diff --git a/eval/cpu/inference_grid_19.png b/eval/cpu/inference_grid_19.png
new file mode 100644
index 0000000000000000000000000000000000000000..fbf0f7b127265413e7522923d7c5d9988796d0f1
--- /dev/null
+++ b/eval/cpu/inference_grid_19.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8db0a3330ad0555c48b7fa9e5653d96a6068cee7a61b803a85fe6baba0b887e3
+size 112716
diff --git a/eval/cpu/inference_grid_2.png b/eval/cpu/inference_grid_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..8e858e7deeec4e5293e8dbc44268f2c9e831ad77
--- /dev/null
+++ b/eval/cpu/inference_grid_2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08c04041c23c4290b35614cd463e5f3d94fbd1f4130bf1beec2e87a7a136cf38
+size 100960
diff --git a/eval/cpu/inference_grid_20.png b/eval/cpu/inference_grid_20.png
new file mode 100644
index 0000000000000000000000000000000000000000..977df1464d59ae55a0518f636216d7ea8f0cb92a
--- /dev/null
+++ b/eval/cpu/inference_grid_20.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9045dc42b8df8df426cd0544df2f6a287ad62fffa5d43fd47931419a57f6004e
+size 157969
diff --git a/eval/cpu/inference_grid_21.png b/eval/cpu/inference_grid_21.png
new file mode 100644
index 0000000000000000000000000000000000000000..f2fc6c16a0771d53c90f20fb4664ce7f372e2522
Binary files /dev/null and b/eval/cpu/inference_grid_21.png differ
diff --git a/eval/cpu/inference_grid_22.png b/eval/cpu/inference_grid_22.png
new file mode 100644
index 0000000000000000000000000000000000000000..728c960025c26d3be319a146fd9800d2290b82f9
--- /dev/null
+++ b/eval/cpu/inference_grid_22.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21de7020d60fd497f31ba954ff35fcced26fb75cf2224ad10049b1046bee202f
+size 116087
diff --git a/eval/cpu/inference_grid_23.png b/eval/cpu/inference_grid_23.png
new file mode 100644
index 0000000000000000000000000000000000000000..7ac776f83b7f6d34d612561efe5613e6c86bfc79
--- /dev/null
+++ b/eval/cpu/inference_grid_23.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84b4b553806a31b7fe66a330087d3a2dc1fb23a5037f15e4c35c30cbb15acdba
+size 145482
diff --git a/eval/cpu/inference_grid_24.png b/eval/cpu/inference_grid_24.png
new file mode 100644
index 0000000000000000000000000000000000000000..804ded4c2ed36ba80ab7f69cc96860ca4677de1f
Binary files /dev/null and b/eval/cpu/inference_grid_24.png differ
diff --git a/eval/cpu/inference_grid_3.png b/eval/cpu/inference_grid_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..86bdee4fb7c6c90e01fb8ab4f1cd262ef3d9dd4e
Binary files /dev/null and b/eval/cpu/inference_grid_3.png differ
diff --git a/eval/cpu/inference_grid_4.png b/eval/cpu/inference_grid_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..c87774fa5cf4a40cd3eb881d746557bc768a3dd8
Binary files /dev/null and b/eval/cpu/inference_grid_4.png differ
diff --git a/eval/cpu/inference_grid_5.png b/eval/cpu/inference_grid_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..71c10626e12b937cd8ad217dfc0e10c006fa7205
--- /dev/null
+++ b/eval/cpu/inference_grid_5.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8263af9871e524c9745f2160e83e9eb8009349c8847c05b45f8a62f7e267b999
+size 110258
diff --git a/eval/cpu/inference_grid_6.png b/eval/cpu/inference_grid_6.png
new file mode 100644
index 0000000000000000000000000000000000000000..e612fcedebee35ff0d51d24547d0247a0f6f4ba9
Binary files /dev/null and b/eval/cpu/inference_grid_6.png differ
diff --git a/eval/cpu/inference_grid_7.png b/eval/cpu/inference_grid_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..7a8ec3067ed29ebe36d32e9164939167f1dfd1d5
Binary files /dev/null and b/eval/cpu/inference_grid_7.png differ
diff --git a/eval/cpu/inference_grid_8.png b/eval/cpu/inference_grid_8.png
new file mode 100644
index 0000000000000000000000000000000000000000..f682e85d9bc2a2a349fabb4d11c3b0fd5f6a6e78
--- /dev/null
+++ b/eval/cpu/inference_grid_8.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29b7bd53402eb2c8fdfae4bea951cb590cef2b16349f072024966417f83b55f7
+size 115151
diff --git a/eval/cpu/inference_grid_9.png b/eval/cpu/inference_grid_9.png
new file mode 100644
index 0000000000000000000000000000000000000000..538550324be98fb6796080695c390d9aba6f8c2c
--- /dev/null
+++ b/eval/cpu/inference_grid_9.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1fc49bd0f4cc40091408d4c03a317ec602e89bc72d614abb17cc44971d99973
+size 145357
diff --git a/eval/gpu/confusion_matrix.png b/eval/gpu/confusion_matrix.png
new file mode 100644
index 0000000000000000000000000000000000000000..7f2f714632f724cef03630c5d2ca0fc496e25918
Binary files /dev/null and b/eval/gpu/confusion_matrix.png differ
diff --git a/eval/gpu/inference_grid_0.png b/eval/gpu/inference_grid_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..8de1fbc123b2cf18fc4698918b915bf5fb76aed7
--- /dev/null
+++ b/eval/gpu/inference_grid_0.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:531b5f201a53888a78883489ebe2b4abcedb73829aca2838a925d4c003917e33
+size 115618
diff --git a/eval/gpu/inference_grid_1.png b/eval/gpu/inference_grid_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..1f20309c4fbef8a3c873cc0e05e54f5ce8021cb8
--- /dev/null
+++ b/eval/gpu/inference_grid_1.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75a74c8f9e0be541121074e1146d26e64ff84b46b76fa673a2f23d5358babb65
+size 129712
diff --git a/eval/gpu/inference_grid_10.png b/eval/gpu/inference_grid_10.png
new file mode 100644
index 0000000000000000000000000000000000000000..ffabf4f6d030bf38dad7bf336b4d774b9627c53a
--- /dev/null
+++ b/eval/gpu/inference_grid_10.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39e12511e3892731bfe17542eacd66cd59fe55860d88b0f5eeb027eb14b50fd9
+size 111497
diff --git a/eval/gpu/inference_grid_11.png b/eval/gpu/inference_grid_11.png
new file mode 100644
index 0000000000000000000000000000000000000000..15d67438388e1cdbe7baaea4cd1f032a0015a525
--- /dev/null
+++ b/eval/gpu/inference_grid_11.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10023be3aacc95f06ba3ad7eea15f11762dde10ce82728f23d76cc6b83df34b0
+size 139997
diff --git a/eval/gpu/inference_grid_12.png b/eval/gpu/inference_grid_12.png
new file mode 100644
index 0000000000000000000000000000000000000000..0cf10e1e259648a7720ed042a682b5a7e5c7fc6f
Binary files /dev/null and b/eval/gpu/inference_grid_12.png differ
diff --git a/eval/gpu/inference_grid_13.png b/eval/gpu/inference_grid_13.png
new file mode 100644
index 0000000000000000000000000000000000000000..469b50b0c4c52d1755f1c2a6b18774976726e13a
Binary files /dev/null and b/eval/gpu/inference_grid_13.png differ
diff --git a/eval/gpu/inference_grid_14.png b/eval/gpu/inference_grid_14.png
new file mode 100644
index 0000000000000000000000000000000000000000..a8d46a0c0aeba076c88d444a6e7fe96991d22fbd
Binary files /dev/null and b/eval/gpu/inference_grid_14.png differ
diff --git a/eval/gpu/inference_grid_15.png b/eval/gpu/inference_grid_15.png
new file mode 100644
index 0000000000000000000000000000000000000000..ac37870e39bbe5f3bd26a39492dc5bd2a810f248
Binary files /dev/null and b/eval/gpu/inference_grid_15.png differ
diff --git a/eval/gpu/inference_grid_16.png b/eval/gpu/inference_grid_16.png
new file mode 100644
index 0000000000000000000000000000000000000000..bb054abfdafb2630f136bb89dcc90e70320db49a
--- /dev/null
+++ b/eval/gpu/inference_grid_16.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c16506561145145a7c3d67d326c8fbecaa9c3db93b052e544a100a4a6f77289
+size 143964
diff --git a/eval/gpu/inference_grid_17.png b/eval/gpu/inference_grid_17.png
new file mode 100644
index 0000000000000000000000000000000000000000..d81f8f0e31bc2eab176a5a5f76adb609818ae00c
Binary files /dev/null and b/eval/gpu/inference_grid_17.png differ
diff --git a/eval/gpu/inference_grid_18.png b/eval/gpu/inference_grid_18.png
new file mode 100644
index 0000000000000000000000000000000000000000..68cea80454f4cc93429fd76cbadba46b5621b70e
Binary files /dev/null and b/eval/gpu/inference_grid_18.png differ
diff --git a/eval/gpu/inference_grid_19.png b/eval/gpu/inference_grid_19.png
new file mode 100644
index 0000000000000000000000000000000000000000..fbf0f7b127265413e7522923d7c5d9988796d0f1
--- /dev/null
+++ b/eval/gpu/inference_grid_19.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8db0a3330ad0555c48b7fa9e5653d96a6068cee7a61b803a85fe6baba0b887e3
+size 112716
diff --git a/eval/gpu/inference_grid_2.png b/eval/gpu/inference_grid_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..8e858e7deeec4e5293e8dbc44268f2c9e831ad77
--- /dev/null
+++ b/eval/gpu/inference_grid_2.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08c04041c23c4290b35614cd463e5f3d94fbd1f4130bf1beec2e87a7a136cf38
+size 100960
diff --git a/eval/gpu/inference_grid_20.png b/eval/gpu/inference_grid_20.png
new file mode 100644
index 0000000000000000000000000000000000000000..977df1464d59ae55a0518f636216d7ea8f0cb92a
--- /dev/null
+++ b/eval/gpu/inference_grid_20.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9045dc42b8df8df426cd0544df2f6a287ad62fffa5d43fd47931419a57f6004e
+size 157969
diff --git a/eval/gpu/inference_grid_21.png b/eval/gpu/inference_grid_21.png
new file mode 100644
index 0000000000000000000000000000000000000000..f2fc6c16a0771d53c90f20fb4664ce7f372e2522
Binary files /dev/null and b/eval/gpu/inference_grid_21.png differ
diff --git a/eval/gpu/inference_grid_22.png b/eval/gpu/inference_grid_22.png
new file mode 100644
index 0000000000000000000000000000000000000000..728c960025c26d3be319a146fd9800d2290b82f9
--- /dev/null
+++ b/eval/gpu/inference_grid_22.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21de7020d60fd497f31ba954ff35fcced26fb75cf2224ad10049b1046bee202f
+size 116087
diff --git a/eval/gpu/inference_grid_23.png b/eval/gpu/inference_grid_23.png
new file mode 100644
index 0000000000000000000000000000000000000000..7ac776f83b7f6d34d612561efe5613e6c86bfc79
--- /dev/null
+++ b/eval/gpu/inference_grid_23.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84b4b553806a31b7fe66a330087d3a2dc1fb23a5037f15e4c35c30cbb15acdba
+size 145482
diff --git a/eval/gpu/inference_grid_24.png b/eval/gpu/inference_grid_24.png
new file mode 100644
index 0000000000000000000000000000000000000000..804ded4c2ed36ba80ab7f69cc96860ca4677de1f
Binary files /dev/null and b/eval/gpu/inference_grid_24.png differ
diff --git a/eval/gpu/inference_grid_3.png b/eval/gpu/inference_grid_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..86bdee4fb7c6c90e01fb8ab4f1cd262ef3d9dd4e
Binary files /dev/null and b/eval/gpu/inference_grid_3.png differ
diff --git a/eval/gpu/inference_grid_4.png b/eval/gpu/inference_grid_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..c87774fa5cf4a40cd3eb881d746557bc768a3dd8
Binary files /dev/null and b/eval/gpu/inference_grid_4.png differ
diff --git a/eval/gpu/inference_grid_5.png b/eval/gpu/inference_grid_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..71c10626e12b937cd8ad217dfc0e10c006fa7205
--- /dev/null
+++ b/eval/gpu/inference_grid_5.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8263af9871e524c9745f2160e83e9eb8009349c8847c05b45f8a62f7e267b999
+size 110258
diff --git a/eval/gpu/inference_grid_6.png b/eval/gpu/inference_grid_6.png
new file mode 100644
index 0000000000000000000000000000000000000000..e612fcedebee35ff0d51d24547d0247a0f6f4ba9
Binary files /dev/null and b/eval/gpu/inference_grid_6.png differ
diff --git a/eval/gpu/inference_grid_7.png b/eval/gpu/inference_grid_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..7a8ec3067ed29ebe36d32e9164939167f1dfd1d5
Binary files /dev/null and b/eval/gpu/inference_grid_7.png differ
diff --git a/eval/gpu/inference_grid_8.png b/eval/gpu/inference_grid_8.png
new file mode 100644
index 0000000000000000000000000000000000000000..f682e85d9bc2a2a349fabb4d11c3b0fd5f6a6e78
--- /dev/null
+++ b/eval/gpu/inference_grid_8.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29b7bd53402eb2c8fdfae4bea951cb590cef2b16349f072024966417f83b55f7
+size 115151
diff --git a/eval/gpu/inference_grid_9.png b/eval/gpu/inference_grid_9.png
new file mode 100644
index 0000000000000000000000000000000000000000..538550324be98fb6796080695c390d9aba6f8c2c
--- /dev/null
+++ b/eval/gpu/inference_grid_9.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1fc49bd0f4cc40091408d4c03a317ec602e89bc72d614abb17cc44971d99973
+size 145357
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0a451517bf33441e9ca58d4a535f308e73a397cb
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d76af422bc714de4e13e585e25ed5cf64f2934c795f70450f642a3769c363bde
+size 174075684
diff --git a/preprocessor_config.json b/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8fa4bd2a1e0250a62cdd19c0597f2100eec51d15
--- /dev/null
+++ b/preprocessor_config.json
@@ -0,0 +1,26 @@
+{
+ "do_convert_annotations": true,
+ "do_normalize": true,
+ "do_pad": true,
+ "do_rescale": true,
+ "do_resize": true,
+ "format": "coco_detection",
+ "image_mean": [
+ 0.485,
+ 0.456,
+ 0.406
+ ],
+ "image_processor_type": "ConditionalDetrImageProcessor",
+ "image_std": [
+ 0.229,
+ 0.224,
+ 0.225
+ ],
+ "pad_size": null,
+ "resample": 2,
+ "rescale_factor": 0.00392156862745098,
+ "size": {
+ "height": 640,
+ "width": 640
+ }
+}