diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..0aa05647e6c4345f2f71e53a1b7fb0683512b4b5 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,29 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_0.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_1.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_10.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_11.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_16.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_19.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_2.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_20.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_22.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_23.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_5.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_8.png filter=lfs diff=lfs merge=lfs -text +eval/cpu/inference_grid_9.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_0.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_1.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_10.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_11.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_16.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_19.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_2.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_20.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_22.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_23.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_5.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_8.png filter=lfs diff=lfs merge=lfs -text +eval/gpu/inference_grid_9.png filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c66352ce1383592cbb3825e78bdef8d89732f3c8 --- /dev/null +++ b/README.md @@ -0,0 +1,354 @@ +--- +license: apache-2.0 +base_model: +- microsoft/conditional-detr-resnet-50 +pipeline_tag: object-detection +datasets: +- tech4humans/signature-detection +metrics: +- f1 +- precision +- recall +library_name: transformers +inference: false +tags: +- object-detection +- signature-detection +- detr +- conditional-detr +- pytorch +model-index: +- name: tech4humans/conditional-detr-50-signature-detector + results: + - task: + type: object-detection + dataset: + type: tech4humans/signature-detection + name: tech4humans/signature-detection + split: test + metrics: + - type: precision + value: 0.936524 + name: mAP@0.5 + - type: precision + value: 0.653321 + name: mAP@0.5:0.95 +--- + +# **Conditional-DETR ResNet-50 - Handwritten Signature Detection** + +This repository presents a Conditional-DETR model with ResNet-50 backbone, fine-tuned to detect handwritten signatures in document images. This model achieved the **highest mAP@0.5 (93.65%)** among all tested architectures in our comprehensive evaluation. + +| Resource | Links / Badges | Details | +|---------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **Article** | [![Paper page](https://huggingface.co/datasets/huggingface/badges/resolve/main/paper-page-md.svg)](https://huggingface.co/blog/samuellimabraz/signature-detection-model) | A detailed community article covering the full development process of the project | +| **Model Files (YOLOv8s)** | [![HF Model](https://huggingface.co/datasets/huggingface/badges/resolve/main/model-on-hf-md.svg)](https://huggingface.co/tech4humans/yolov8s-signature-detector) | **Available formats:** [![PyTorch](https://img.shields.io/badge/PyTorch-%23EE4C2C.svg?style=flat&logo=PyTorch&logoColor=white)](https://pytorch.org/) [![ONNX](https://img.shields.io/badge/ONNX-005CED.svg?style=flat&logo=ONNX&logoColor=white)](https://onnx.ai/) [![TensorRT](https://img.shields.io/badge/TensorRT-76B900.svg?style=flat&logo=NVIDIA&logoColor=white)](https://developer.nvidia.com/tensorrt) | +| **Dataset – Original** | [![Roboflow](https://app.roboflow.com/images/download-dataset-badge.svg)](https://universe.roboflow.com/tech-ysdkk/signature-detection-hlx8j) | 2,819 document images annotated with signature coordinates | +| **Dataset – Processed** | [![HF Dataset](https://huggingface.co/datasets/huggingface/badges/resolve/main/dataset-on-hf-md.svg)](https://huggingface.co/datasets/tech4humans/signature-detection) | Augmented and pre-processed version (640px) for model training | +| **Notebooks – Model Experiments** | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1wSySw_zwyuv6XSaGmkngI4dwbj-hR4ix) [![W&B Training](https://img.shields.io/badge/W%26B_Training-FFBE00?style=flat&logo=WeightsAndBiases&logoColor=white)](https://api.wandb.ai/links/samuel-lima-tech4humans/30cmrkp8) | Complete training and evaluation pipeline with selection among different architectures (yolo, detr, rt-detr, conditional-detr, yolos) | +| **Notebooks – HP Tuning** | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1wSySw_zwyuv6XSaGmkngI4dwbj-hR4ix) [![W&B HP Tuning](https://img.shields.io/badge/W%26B_HP_Tuning-FFBE00?style=flat&logo=WeightsAndBiases&logoColor=white)](https://api.wandb.ai/links/samuel-lima-tech4humans/31a6zhb1) | Optuna trials for optimizing the precision/recall balance | +| **Inference Server** | [![GitHub](https://img.shields.io/badge/Deploy-ffffff?style=for-the-badge&logo=github&logoColor=black)](https://github.com/tech4ai/t4ai-signature-detect-server) | Complete deployment and inference pipeline with Triton Inference Server
[![OpenVINO](https://img.shields.io/badge/OpenVINO-00c7fd?style=flat&logo=intel&logoColor=white)](https://docs.openvino.ai/2025/index.html) [![Docker](https://img.shields.io/badge/Docker-2496ED?logo=docker&logoColor=fff)](https://www.docker.com/) [![Triton](https://img.shields.io/badge/Triton-Inference%20Server-76B900?labelColor=black&logo=nvidia)](https://developer.nvidia.com/triton-inference-server) | +| **Live Demo** | [![HF Space](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md.svg)](https://huggingface.co/spaces/tech4humans/signature-detection) | Graphical interface with real-time inference
[![Gradio](https://img.shields.io/badge/Gradio-FF5722?style=flat&logo=Gradio&logoColor=white)](https://www.gradio.app/) [![Plotly](https://img.shields.io/badge/PLotly-000000?style=flat&logo=plotly&logoColor=white)](https://plotly.com/python/) | + +--- + +--- + +## **Dataset** + + + + + + +
+ + + + + + Dataset on HF + +
+The training utilized a dataset built from two public datasets: [Tobacco800](https://paperswithcode.com/dataset/tobacco-800) and [signatures-xc8up](https://universe.roboflow.com/roboflow-100/signatures-xc8up), unified and processed in [Roboflow](https://roboflow.com/). + +**Dataset Summary:** +- Training: 1,980 images (70%) +- Validation: 420 images (15%) +- Testing: 419 images (15%) +- Format: COCO JSON +- Resolution: 640x640 pixels + +![Roboflow Dataset](./assets/roboflow_ds.png) + +--- + +## **Training Process** + +The training process involved the following steps: + +### 1. **Model Selection:** + +Various object detection models were evaluated to identify the best balance between precision, recall, and inference time. + + +| **Metric** | [rtdetr-l](https://github.com/ultralytics/assets/releases/download/v8.2.0/rtdetr-l.pt) | [yolos-base](https://huggingface.co/hustvl/yolos-base) | [yolos-tiny](https://huggingface.co/hustvl/yolos-tiny) | [conditional-detr-resnet-50](https://huggingface.co/microsoft/conditional-detr-resnet-50) | [detr-resnet-50](https://huggingface.co/facebook/detr-resnet-50) | [yolov8x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x.pt) | [yolov8l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8l.pt) | [yolov8m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt) | [yolov8s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt) | [yolov8n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt) | [yolo11x](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt) | [yolo11l](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11l.pt) | [yolo11m](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt) | [yolo11s](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11s.pt) | [yolo11n](https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt) | [yolov10x](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10x.pt) | [yolov10l](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10l.pt) | [yolov10b](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10b.pt) | [yolov10m](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10m.pt) | [yolov10s](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10s.pt) | [yolov10n](https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov10n.pt) | +|:---------------------|---------:|-----------:|-----------:|---------------------------:|---------------:|--------:|--------:|--------:|--------:|--------:|--------:|--------:|--------:|--------:|--------:|---------:|---------:|---------:|---------:|---------:|---------:| +| **Inference Time - CPU (ms)** | 583.608 | 1706.49 | 265.346 | 476.831 | 425.649 | 1259.47 | 871.329 | 401.183 | 216.6 | 110.442 | 1016.68 | 518.147 | 381.652 | 179.792 | 106.656 | 821.183 | 580.767 | 473.109 | 320.12 | 150.076 | **73.8596** | +| **mAP50** | 0.92709 | 0.901154 | 0.869814 | **0.936524** | 0.88885 | 0.794237| 0.800312| 0.875322| 0.874721| 0.816089| 0.667074| 0.707409| 0.809557| 0.835605| 0.813799| 0.681023| 0.726802| 0.789835| 0.787688| 0.663877| 0.734332 | +| **mAP50-95** | 0.622364 | 0.583569 | 0.469064 | 0.653321 | 0.579428 | 0.552919| 0.593976| **0.665495**| 0.65457 | 0.623963| 0.482289| 0.499126| 0.600797| 0.638849| 0.617496| 0.474535| 0.522654| 0.578874| 0.581259| 0.473857| 0.552704 | + + +![Model Selection](./assets/model_selection.png) + +#### Highlights: +- **Best mAP50:** `conditional-detr-resnet-50` (**0.936524**) +- **Best mAP50-95:** `yolov8m` (**0.665495**) +- **Fastest Inference Time:** `yolov10n` (**73.8596 ms**) + +Detailed experiments are available on [**Weights & Biases**](https://api.wandb.ai/links/samuel-lima-tech4humans/30cmrkp8). + +### 2. **Hyperparameter Tuning:** + +The YOLOv8s model, which demonstrated a good balance of inference time, precision, and recall, was selected for hyperparameter tuning. + +[Optuna](https://optuna.org/) was used for 20 optimization trials. +The hyperparameter tuning used the following parameter configuration: + +```python + dropout = trial.suggest_float("dropout", 0.0, 0.5, step=0.1) + lr0 = trial.suggest_float("lr0", 1e-5, 1e-1, log=True) + box = trial.suggest_float("box", 3.0, 7.0, step=1.0) + cls = trial.suggest_float("cls", 0.5, 1.5, step=0.2) + opt = trial.suggest_categorical("optimizer", ["AdamW", "RMSProp"]) +``` +Results can be visualized here: [**Hypertuning Experiment**](https://api.wandb.ai/links/samuel-lima-tech4humans/31a6zhb1). + +![Hypertuning Sweep](./assets/sweep.png) + +### 3. **Evaluation:** + +The models were evaluated on the test set at the end of training in ONNX (CPU) and TensorRT (GPU - T4) formats. Performance metrics included precision, recall, mAP50, and mAP50-95. + +![Trials](./assets/trials.png) + +#### Results Comparison: + +| Metric | Base Model | Best Trial (#10) | Difference | +|------------|------------|-------------------|-------------| +| mAP50 | 87.47% | **95.75%** | +8.28% | +| mAP50-95 | 65.46% | **66.26%** | +0.81% | +| Precision | **97.23%** | 95.61% | -1.63% | +| Recall | 76.16% | **91.21%** | +15.05% | +| F1-score | 85.42% | **93.36%** | +7.94% | + +--- + +## **Results** + +After hyperparameter tuning of the YOLOv8s model, the best model achieved the following results on the test set: + +- **Precision:** 94.74% +- **Recall:** 89.72% +- **mAP@50:** 94.50% +- **mAP@50-95:** 67.35% +- **Inference Time:** + - **ONNX Runtime (CPU):** 171.56 ms + - **TensorRT (GPU - T4):** 7.657 ms + +--- + +## **How to Use** + +### **Installation** + +```bash +pip install transformers torch torchvision pillow +``` + +### **Inference** + +```python +from transformers import AutoImageProcessor, AutoModelForObjectDetection +from PIL import Image +import torch + +# Load model and processor +model_name = "tech4humans/conditional-detr-50-signature-detector" +processor = AutoImageProcessor.from_pretrained(model_name) +model = AutoModelForObjectDetection.from_pretrained(model_name) + +# Load and process image +image = Image.open("path/to/your/document.jpg") +inputs = processor(images=image, return_tensors="pt") + +# Run inference +with torch.no_grad(): + outputs = model(**inputs) + +# Post-process results +target_sizes = torch.tensor([image.size[::-1]]) +results = processor.post_process_object_detection( + outputs, target_sizes=target_sizes, threshold=0.5 +)[0] + +# Extract detections +for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): + box = [round(i, 2) for i in box.tolist()] + print(f"Detected signature with confidence {round(score.item(), 3)} at location {box}") +``` + +### **Visualization** + +```python +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from PIL import Image + +def visualize_predictions(image_path, results, threshold=0.5): + image = Image.open(image_path) + fig, ax = plt.subplots(1, figsize=(12, 9)) + ax.imshow(image) + + for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): + if score > threshold: + x, y, x2, y2 = box.tolist() + width, height = x2 - x, y2 - y + + rect = patches.Rectangle( + (x, y), width, height, + linewidth=2, edgecolor='red', facecolor='none' + ) + ax.add_patch(rect) + ax.text(x, y-10, f'Signature: {score:.3f}', + bbox=dict(boxstyle="round,pad=0.3", facecolor="yellow", alpha=0.7)) + + ax.set_title("Signature Detection Results") + plt.axis('off') + plt.show() + +# Use the visualization +visualize_predictions("path/to/your/document.jpg", results) +``` + +--- + +## **Demo** + +You can explore the model and test real-time inference in the Hugging Face Spaces demo, built with Gradio and ONNXRuntime. + +[![Open in Spaces](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md.svg)](https://huggingface.co/spaces/tech4humans/signature-detection) + +--- + +## 🔗 **Inference with Triton Server** + +If you want to deploy this signature detection model in a production environment, check out our inference server repository based on the NVIDIA Triton Inference Server. + + + + + + +
+ Triton Badge + + GitHub Badge +
+--- + +## **Infrastructure** + +### Software + +The model was trained and tuned using a Jupyter Notebook environment. + +- **Operating System:** Ubuntu 22.04 +- **Python:** 3.10.12 +- **PyTorch:** 2.5.1+cu121 +- **Ultralytics:** 8.3.58 +- **Roboflow:** 1.1.50 +- **Optuna:** 4.1.0 +- **ONNX Runtime:** 1.20.1 +- **TensorRT:** 10.7.0 + +### Hardware + +Training was performed on a Google Cloud Platform n1-standard-8 instance with the following specifications: + +- **CPU:** 8 vCPUs +- **GPU:** NVIDIA Tesla T4 + +--- + +## **License** + +### Model Weights, Code and Training Materials – **Apache 2.0** +- **License:** Apache License 2.0 +- **Usage:** All training scripts, deployment code, and usage instructions are licensed under the Apache 2.0 license. + +--- + +## **Citation** + +If you use this model in your research, please cite: + +```bibtex +@misc{lima2024conditional-detr-signature-detection, + title={Conditional-DETR for Handwritten Signature Detection}, + author={Lima, Samuel and Tech4Humans Team}, + year={2024}, + publisher={Hugging Face}, + url={https://huggingface.co/tech4humans/conditional-detr-50-signature-detector} +} +``` + +--- + +## **Contact and Information** + +For further information, questions, or contributions, contact us at **iag@tech4h.com.br**. + +
+

+ 📧 Email: iag@tech4h.com.br
+ 🌐 Website: www.tech4.ai
+ 💼 LinkedIn: Tech4Humans +

+
+ +## **Author** + +
+ + + + + +
+ + Samuel Lima +

Samuel Lima

+
+

AI Research Engineer

+

+ + HuggingFace + +

+
+

Responsibilities in this Project

+
    +
  • 🔬 Model development and training
  • +
  • 📊 Dataset analysis and processing
  • +
  • ⚙️ Architecture selection and performance evaluation
  • +
  • 📝 Technical documentation and model card
  • +
+
+
+ +--- + +
+

Developed with 💜 by Tech4Humans

+
diff --git a/best_checkpoint/config.json b/best_checkpoint/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5f75103d2a8582229fe2ced88551ad02cbb27e1c --- /dev/null +++ b/best_checkpoint/config.json @@ -0,0 +1,61 @@ +{ + "_name_or_path": "microsoft/conditional-detr-resnet-50", + "activation_dropout": 0.0, + "activation_function": "relu", + "architectures": [ + "ConditionalDetrForObjectDetection" + ], + "attention_dropout": 0.0, + "auxiliary_loss": false, + "backbone": "resnet50", + "backbone_config": null, + "backbone_kwargs": { + "in_chans": 3, + "out_indices": [ + 1, + 2, + 3, + 4 + ] + }, + "bbox_cost": 5, + "bbox_loss_coefficient": 5, + "class_cost": 2, + "cls_loss_coefficient": 2, + "d_model": 256, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "dice_loss_coefficient": 1, + "dilation": false, + "dropout": 0.1, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "focal_alpha": 0.25, + "giou_cost": 2, + "giou_loss_coefficient": 2, + "id2label": { + "0": "signature" + }, + "init_std": 0.02, + "init_xavier_std": 1.0, + "is_encoder_decoder": true, + "label2id": { + "signature": 0 + }, + "mask_loss_coefficient": 1, + "max_position_embeddings": 1024, + "model_type": "conditional_detr", + "num_channels": 3, + "num_hidden_layers": 6, + "num_queries": 300, + "position_embedding_type": "sine", + "scale_embedding": false, + "torch_dtype": "float32", + "transformers_version": "4.46.3", + "use_pretrained_backbone": true, + "use_timm_backbone": true +} diff --git a/best_checkpoint/model.safetensors b/best_checkpoint/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..623115fef6b4192304696ca5a8166762ae27d387 --- /dev/null +++ b/best_checkpoint/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b804b3797a81dbaa7f803c93ddff884acb321b10f3ad2520861b378e72cb3ef +size 174075684 diff --git a/best_checkpoint/optimizer.pt b/best_checkpoint/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c6e449fa83e8cc884ca80e57658d9f08e3a2c1e --- /dev/null +++ b/best_checkpoint/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60667f62d23d0156209d0db0cd48fc1bf1aaaabf2f564a2cf22aa304543eecd0 +size 345689625 diff --git a/best_checkpoint/preprocessor_config.json b/best_checkpoint/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fa4bd2a1e0250a62cdd19c0597f2100eec51d15 --- /dev/null +++ b/best_checkpoint/preprocessor_config.json @@ -0,0 +1,26 @@ +{ + "do_convert_annotations": true, + "do_normalize": true, + "do_pad": true, + "do_rescale": true, + "do_resize": true, + "format": "coco_detection", + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "ConditionalDetrImageProcessor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "pad_size": null, + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 640, + "width": 640 + } +} diff --git a/best_checkpoint/rng_state.pth b/best_checkpoint/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a0e58d3ec4cda441b7ad0baa595c06d8bc4ccc5 --- /dev/null +++ b/best_checkpoint/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:672f61b91e1dc0ec0cfc7cc6bea9c0630fa1b53fe3a606869eead6061469864c +size 14244 diff --git a/best_checkpoint/scheduler.pt b/best_checkpoint/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..91019efb8414ccac74569365310475adec2ab102 --- /dev/null +++ b/best_checkpoint/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73201c99891272e8d20ef63730f93b9b956d012d7aefe414a361a43f9b574909 +size 1064 diff --git a/best_checkpoint/trainer_state.json b/best_checkpoint/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..389daeebfecd1109bea96701ecf998b5a0fc3402 --- /dev/null +++ b/best_checkpoint/trainer_state.json @@ -0,0 +1,62442 @@ +{ + "best_metric": 0.9409, + "best_model_checkpoint": "/content/gcs/iag-training/models/image/signature-detection/detr/conditional-detr-resnet-50/checkpoint-6300", + "epoch": 50.0, + "eval_steps": 500, + "global_step": 8750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.005714285714285714, + "grad_norm": 860.2239990234375, + "learning_rate": 5.714285714285715e-08, + "loss": 48.7852, + "step": 1 + }, + { + "epoch": 0.011428571428571429, + "grad_norm": 1068.1214599609375, + "learning_rate": 1.142857142857143e-07, + "loss": 59.7628, + "step": 2 + }, + { + "epoch": 0.017142857142857144, + "grad_norm": 629.9717407226562, + "learning_rate": 1.7142857142857143e-07, + "loss": 35.0063, + "step": 3 + }, + { + "epoch": 0.022857142857142857, + "grad_norm": 969.9238891601562, + "learning_rate": 2.285714285714286e-07, + "loss": 55.0153, + "step": 4 + }, + { + "epoch": 0.02857142857142857, + "grad_norm": 976.5315551757812, + "learning_rate": 2.8571428571428575e-07, + "loss": 54.7871, + "step": 5 + }, + { + "epoch": 0.03428571428571429, + "grad_norm": 848.9593505859375, + "learning_rate": 3.4285714285714286e-07, + "loss": 48.7854, + "step": 6 + }, + { + "epoch": 0.04, + "grad_norm": 1419.771728515625, + "learning_rate": 4.0000000000000003e-07, + "loss": 78.9038, + "step": 7 + }, + { + "epoch": 0.045714285714285714, + "grad_norm": 863.019287109375, + "learning_rate": 4.571428571428572e-07, + "loss": 47.4685, + "step": 8 + }, + { + "epoch": 0.05142857142857143, + "grad_norm": 1193.3538818359375, + "learning_rate": 5.142857142857143e-07, + "loss": 67.3228, + "step": 9 + }, + { + "epoch": 0.05714285714285714, + "grad_norm": 939.5175170898438, + "learning_rate": 5.714285714285715e-07, + "loss": 53.061, + "step": 10 + }, + { + "epoch": 0.06285714285714286, + "grad_norm": 775.0874633789062, + "learning_rate": 6.285714285714287e-07, + "loss": 43.3721, + "step": 11 + }, + { + "epoch": 0.06857142857142857, + "grad_norm": 1446.0272216796875, + "learning_rate": 6.857142857142857e-07, + "loss": 80.2565, + "step": 12 + }, + { + "epoch": 0.07428571428571429, + "grad_norm": 1051.2706298828125, + "learning_rate": 7.428571428571429e-07, + "loss": 58.7238, + "step": 13 + }, + { + "epoch": 0.08, + "grad_norm": 925.4149780273438, + "learning_rate": 8.000000000000001e-07, + "loss": 52.5781, + "step": 14 + }, + { + "epoch": 0.08571428571428572, + "grad_norm": 1029.443115234375, + "learning_rate": 8.571428571428572e-07, + "loss": 57.8017, + "step": 15 + }, + { + "epoch": 0.09142857142857143, + "grad_norm": 568.0119018554688, + "learning_rate": 9.142857142857144e-07, + "loss": 32.4704, + "step": 16 + }, + { + "epoch": 0.09714285714285714, + "grad_norm": 1057.5506591796875, + "learning_rate": 9.714285714285715e-07, + "loss": 59.1879, + "step": 17 + }, + { + "epoch": 0.10285714285714286, + "grad_norm": 1026.0074462890625, + "learning_rate": 1.0285714285714286e-06, + "loss": 55.431, + "step": 18 + }, + { + "epoch": 0.10857142857142857, + "grad_norm": 1320.9364013671875, + "learning_rate": 1.0857142857142858e-06, + "loss": 73.9857, + "step": 19 + }, + { + "epoch": 0.11428571428571428, + "grad_norm": 1451.337646484375, + "learning_rate": 1.142857142857143e-06, + "loss": 64.1714, + "step": 20 + }, + { + "epoch": 0.12, + "grad_norm": 963.4343872070312, + "learning_rate": 1.2000000000000002e-06, + "loss": 54.32, + "step": 21 + }, + { + "epoch": 0.12571428571428572, + "grad_norm": 1317.2686767578125, + "learning_rate": 1.2571428571428573e-06, + "loss": 72.2047, + "step": 22 + }, + { + "epoch": 0.13142857142857142, + "grad_norm": 653.3263549804688, + "learning_rate": 1.3142857142857145e-06, + "loss": 36.5625, + "step": 23 + }, + { + "epoch": 0.13714285714285715, + "grad_norm": 1563.2149658203125, + "learning_rate": 1.3714285714285715e-06, + "loss": 86.5436, + "step": 24 + }, + { + "epoch": 0.14285714285714285, + "grad_norm": 910.78564453125, + "learning_rate": 1.4285714285714286e-06, + "loss": 46.524, + "step": 25 + }, + { + "epoch": 0.14857142857142858, + "grad_norm": 850.3976440429688, + "learning_rate": 1.4857142857142858e-06, + "loss": 45.7479, + "step": 26 + }, + { + "epoch": 0.15428571428571428, + "grad_norm": 874.3585815429688, + "learning_rate": 1.542857142857143e-06, + "loss": 47.2899, + "step": 27 + }, + { + "epoch": 0.16, + "grad_norm": 1307.404541015625, + "learning_rate": 1.6000000000000001e-06, + "loss": 69.136, + "step": 28 + }, + { + "epoch": 0.1657142857142857, + "grad_norm": 1136.086669921875, + "learning_rate": 1.657142857142857e-06, + "loss": 58.2475, + "step": 29 + }, + { + "epoch": 0.17142857142857143, + "grad_norm": 1073.1068115234375, + "learning_rate": 1.7142857142857145e-06, + "loss": 59.151, + "step": 30 + }, + { + "epoch": 0.17714285714285713, + "grad_norm": 1063.392822265625, + "learning_rate": 1.7714285714285714e-06, + "loss": 56.9664, + "step": 31 + }, + { + "epoch": 0.18285714285714286, + "grad_norm": 1098.790283203125, + "learning_rate": 1.8285714285714288e-06, + "loss": 55.6005, + "step": 32 + }, + { + "epoch": 0.18857142857142858, + "grad_norm": 1341.4580078125, + "learning_rate": 1.8857142857142858e-06, + "loss": 65.0587, + "step": 33 + }, + { + "epoch": 0.19428571428571428, + "grad_norm": 637.0850830078125, + "learning_rate": 1.942857142857143e-06, + "loss": 34.4329, + "step": 34 + }, + { + "epoch": 0.2, + "grad_norm": 838.4291381835938, + "learning_rate": 2.0000000000000003e-06, + "loss": 41.3785, + "step": 35 + }, + { + "epoch": 0.2057142857142857, + "grad_norm": 1155.2122802734375, + "learning_rate": 2.0571428571428573e-06, + "loss": 53.1862, + "step": 36 + }, + { + "epoch": 0.21142857142857144, + "grad_norm": 519.1825561523438, + "learning_rate": 2.1142857142857147e-06, + "loss": 27.9733, + "step": 37 + }, + { + "epoch": 0.21714285714285714, + "grad_norm": 1609.19482421875, + "learning_rate": 2.1714285714285716e-06, + "loss": 43.9349, + "step": 38 + }, + { + "epoch": 0.22285714285714286, + "grad_norm": 666.6470336914062, + "learning_rate": 2.228571428571429e-06, + "loss": 32.3454, + "step": 39 + }, + { + "epoch": 0.22857142857142856, + "grad_norm": 676.74267578125, + "learning_rate": 2.285714285714286e-06, + "loss": 32.8595, + "step": 40 + }, + { + "epoch": 0.2342857142857143, + "grad_norm": 883.8102416992188, + "learning_rate": 2.342857142857143e-06, + "loss": 43.0263, + "step": 41 + }, + { + "epoch": 0.24, + "grad_norm": 982.6002197265625, + "learning_rate": 2.4000000000000003e-06, + "loss": 46.6085, + "step": 42 + }, + { + "epoch": 0.24571428571428572, + "grad_norm": 697.6786499023438, + "learning_rate": 2.4571428571428573e-06, + "loss": 33.0379, + "step": 43 + }, + { + "epoch": 0.25142857142857145, + "grad_norm": 691.048828125, + "learning_rate": 2.5142857142857147e-06, + "loss": 33.1235, + "step": 44 + }, + { + "epoch": 0.2571428571428571, + "grad_norm": 841.7835693359375, + "learning_rate": 2.5714285714285716e-06, + "loss": 27.9442, + "step": 45 + }, + { + "epoch": 0.26285714285714284, + "grad_norm": 812.8052368164062, + "learning_rate": 2.628571428571429e-06, + "loss": 36.1373, + "step": 46 + }, + { + "epoch": 0.26857142857142857, + "grad_norm": 758.7867431640625, + "learning_rate": 2.685714285714286e-06, + "loss": 36.4701, + "step": 47 + }, + { + "epoch": 0.2742857142857143, + "grad_norm": 675.4127197265625, + "learning_rate": 2.742857142857143e-06, + "loss": 30.4991, + "step": 48 + }, + { + "epoch": 0.28, + "grad_norm": 858.139892578125, + "learning_rate": 2.8000000000000003e-06, + "loss": 37.4854, + "step": 49 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 512.81689453125, + "learning_rate": 2.8571428571428573e-06, + "loss": 25.0081, + "step": 50 + }, + { + "epoch": 0.2914285714285714, + "grad_norm": 619.5103149414062, + "learning_rate": 2.9142857142857146e-06, + "loss": 28.0793, + "step": 51 + }, + { + "epoch": 0.29714285714285715, + "grad_norm": 621.6701049804688, + "learning_rate": 2.9714285714285716e-06, + "loss": 27.6905, + "step": 52 + }, + { + "epoch": 0.3028571428571429, + "grad_norm": 482.0356750488281, + "learning_rate": 3.028571428571429e-06, + "loss": 20.7056, + "step": 53 + }, + { + "epoch": 0.30857142857142855, + "grad_norm": 869.6558837890625, + "learning_rate": 3.085714285714286e-06, + "loss": 36.8075, + "step": 54 + }, + { + "epoch": 0.3142857142857143, + "grad_norm": 627.7383422851562, + "learning_rate": 3.1428571428571433e-06, + "loss": 29.0973, + "step": 55 + }, + { + "epoch": 0.32, + "grad_norm": 428.7781677246094, + "learning_rate": 3.2000000000000003e-06, + "loss": 19.1738, + "step": 56 + }, + { + "epoch": 0.32571428571428573, + "grad_norm": 576.77099609375, + "learning_rate": 3.2571428571428572e-06, + "loss": 24.746, + "step": 57 + }, + { + "epoch": 0.3314285714285714, + "grad_norm": 581.8759765625, + "learning_rate": 3.314285714285714e-06, + "loss": 26.0945, + "step": 58 + }, + { + "epoch": 0.33714285714285713, + "grad_norm": 518.2973022460938, + "learning_rate": 3.371428571428572e-06, + "loss": 23.3643, + "step": 59 + }, + { + "epoch": 0.34285714285714286, + "grad_norm": 566.5599975585938, + "learning_rate": 3.428571428571429e-06, + "loss": 25.4305, + "step": 60 + }, + { + "epoch": 0.3485714285714286, + "grad_norm": 423.3687744140625, + "learning_rate": 3.485714285714286e-06, + "loss": 17.0948, + "step": 61 + }, + { + "epoch": 0.35428571428571426, + "grad_norm": 610.7925415039062, + "learning_rate": 3.542857142857143e-06, + "loss": 25.4729, + "step": 62 + }, + { + "epoch": 0.36, + "grad_norm": 289.5078430175781, + "learning_rate": 3.6e-06, + "loss": 14.4792, + "step": 63 + }, + { + "epoch": 0.3657142857142857, + "grad_norm": 375.97882080078125, + "learning_rate": 3.6571428571428576e-06, + "loss": 15.283, + "step": 64 + }, + { + "epoch": 0.37142857142857144, + "grad_norm": 556.4803466796875, + "learning_rate": 3.7142857142857146e-06, + "loss": 23.1576, + "step": 65 + }, + { + "epoch": 0.37714285714285717, + "grad_norm": 400.8939208984375, + "learning_rate": 3.7714285714285716e-06, + "loss": 17.8219, + "step": 66 + }, + { + "epoch": 0.38285714285714284, + "grad_norm": 473.13897705078125, + "learning_rate": 3.828571428571429e-06, + "loss": 18.0179, + "step": 67 + }, + { + "epoch": 0.38857142857142857, + "grad_norm": 369.0384521484375, + "learning_rate": 3.885714285714286e-06, + "loss": 16.7251, + "step": 68 + }, + { + "epoch": 0.3942857142857143, + "grad_norm": 444.827392578125, + "learning_rate": 3.942857142857143e-06, + "loss": 20.251, + "step": 69 + }, + { + "epoch": 0.4, + "grad_norm": 482.73333740234375, + "learning_rate": 4.000000000000001e-06, + "loss": 21.5673, + "step": 70 + }, + { + "epoch": 0.4057142857142857, + "grad_norm": 285.9597473144531, + "learning_rate": 4.057142857142858e-06, + "loss": 13.1687, + "step": 71 + }, + { + "epoch": 0.4114285714285714, + "grad_norm": 351.0760803222656, + "learning_rate": 4.114285714285715e-06, + "loss": 15.4936, + "step": 72 + }, + { + "epoch": 0.41714285714285715, + "grad_norm": 395.3171081542969, + "learning_rate": 4.1714285714285715e-06, + "loss": 16.7497, + "step": 73 + }, + { + "epoch": 0.4228571428571429, + "grad_norm": 310.77935791015625, + "learning_rate": 4.228571428571429e-06, + "loss": 13.1282, + "step": 74 + }, + { + "epoch": 0.42857142857142855, + "grad_norm": 333.78497314453125, + "learning_rate": 4.285714285714286e-06, + "loss": 13.1536, + "step": 75 + }, + { + "epoch": 0.4342857142857143, + "grad_norm": 378.2083435058594, + "learning_rate": 4.342857142857143e-06, + "loss": 15.6583, + "step": 76 + }, + { + "epoch": 0.44, + "grad_norm": 238.56402587890625, + "learning_rate": 4.4e-06, + "loss": 10.2499, + "step": 77 + }, + { + "epoch": 0.44571428571428573, + "grad_norm": 255.11715698242188, + "learning_rate": 4.457142857142858e-06, + "loss": 7.5773, + "step": 78 + }, + { + "epoch": 0.4514285714285714, + "grad_norm": 354.782470703125, + "learning_rate": 4.514285714285715e-06, + "loss": 14.8135, + "step": 79 + }, + { + "epoch": 0.45714285714285713, + "grad_norm": 307.1529541015625, + "learning_rate": 4.571428571428572e-06, + "loss": 12.8466, + "step": 80 + }, + { + "epoch": 0.46285714285714286, + "grad_norm": 238.98980712890625, + "learning_rate": 4.628571428571429e-06, + "loss": 9.0211, + "step": 81 + }, + { + "epoch": 0.4685714285714286, + "grad_norm": 291.2945251464844, + "learning_rate": 4.685714285714286e-06, + "loss": 12.0581, + "step": 82 + }, + { + "epoch": 0.4742857142857143, + "grad_norm": 184.88775634765625, + "learning_rate": 4.742857142857144e-06, + "loss": 8.1703, + "step": 83 + }, + { + "epoch": 0.48, + "grad_norm": 213.38546752929688, + "learning_rate": 4.800000000000001e-06, + "loss": 8.8526, + "step": 84 + }, + { + "epoch": 0.4857142857142857, + "grad_norm": 291.2813415527344, + "learning_rate": 4.857142857142858e-06, + "loss": 9.452, + "step": 85 + }, + { + "epoch": 0.49142857142857144, + "grad_norm": 318.67938232421875, + "learning_rate": 4.9142857142857145e-06, + "loss": 12.6986, + "step": 86 + }, + { + "epoch": 0.49714285714285716, + "grad_norm": 143.00746154785156, + "learning_rate": 4.9714285714285715e-06, + "loss": 6.0976, + "step": 87 + }, + { + "epoch": 0.5028571428571429, + "grad_norm": 232.49122619628906, + "learning_rate": 5.028571428571429e-06, + "loss": 8.8368, + "step": 88 + }, + { + "epoch": 0.5085714285714286, + "grad_norm": 183.7664794921875, + "learning_rate": 5.085714285714286e-06, + "loss": 7.6702, + "step": 89 + }, + { + "epoch": 0.5142857142857142, + "grad_norm": 162.1573944091797, + "learning_rate": 5.142857142857143e-06, + "loss": 7.0566, + "step": 90 + }, + { + "epoch": 0.52, + "grad_norm": 202.04483032226562, + "learning_rate": 5.2e-06, + "loss": 8.3381, + "step": 91 + }, + { + "epoch": 0.5257142857142857, + "grad_norm": 131.73065185546875, + "learning_rate": 5.257142857142858e-06, + "loss": 4.88, + "step": 92 + }, + { + "epoch": 0.5314285714285715, + "grad_norm": 379.7463684082031, + "learning_rate": 5.314285714285715e-06, + "loss": 8.5365, + "step": 93 + }, + { + "epoch": 0.5371428571428571, + "grad_norm": 192.61703491210938, + "learning_rate": 5.371428571428572e-06, + "loss": 7.4655, + "step": 94 + }, + { + "epoch": 0.5428571428571428, + "grad_norm": 190.39840698242188, + "learning_rate": 5.428571428571429e-06, + "loss": 7.7574, + "step": 95 + }, + { + "epoch": 0.5485714285714286, + "grad_norm": 182.56820678710938, + "learning_rate": 5.485714285714286e-06, + "loss": 6.4847, + "step": 96 + }, + { + "epoch": 0.5542857142857143, + "grad_norm": 138.87295532226562, + "learning_rate": 5.542857142857144e-06, + "loss": 5.575, + "step": 97 + }, + { + "epoch": 0.56, + "grad_norm": 528.1649780273438, + "learning_rate": 5.600000000000001e-06, + "loss": 4.9471, + "step": 98 + }, + { + "epoch": 0.5657142857142857, + "grad_norm": 234.5634002685547, + "learning_rate": 5.6571428571428576e-06, + "loss": 7.5448, + "step": 99 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 280.67047119140625, + "learning_rate": 5.7142857142857145e-06, + "loss": 4.4288, + "step": 100 + }, + { + "epoch": 0.5771428571428572, + "grad_norm": 121.8084945678711, + "learning_rate": 5.7714285714285715e-06, + "loss": 4.4681, + "step": 101 + }, + { + "epoch": 0.5828571428571429, + "grad_norm": 155.22755432128906, + "learning_rate": 5.828571428571429e-06, + "loss": 4.7771, + "step": 102 + }, + { + "epoch": 0.5885714285714285, + "grad_norm": 108.56673431396484, + "learning_rate": 5.885714285714286e-06, + "loss": 4.6154, + "step": 103 + }, + { + "epoch": 0.5942857142857143, + "grad_norm": 118.11555480957031, + "learning_rate": 5.942857142857143e-06, + "loss": 3.9309, + "step": 104 + }, + { + "epoch": 0.6, + "grad_norm": 184.95895385742188, + "learning_rate": 6e-06, + "loss": 4.5836, + "step": 105 + }, + { + "epoch": 0.6057142857142858, + "grad_norm": 100.24383544921875, + "learning_rate": 6.057142857142858e-06, + "loss": 4.0985, + "step": 106 + }, + { + "epoch": 0.6114285714285714, + "grad_norm": 69.76212310791016, + "learning_rate": 6.114285714285715e-06, + "loss": 3.3107, + "step": 107 + }, + { + "epoch": 0.6171428571428571, + "grad_norm": 70.37541961669922, + "learning_rate": 6.171428571428572e-06, + "loss": 3.4681, + "step": 108 + }, + { + "epoch": 0.6228571428571429, + "grad_norm": 83.5008544921875, + "learning_rate": 6.228571428571429e-06, + "loss": 3.7449, + "step": 109 + }, + { + "epoch": 0.6285714285714286, + "grad_norm": 74.19607543945312, + "learning_rate": 6.285714285714287e-06, + "loss": 3.7265, + "step": 110 + }, + { + "epoch": 0.6342857142857142, + "grad_norm": 89.18614196777344, + "learning_rate": 6.342857142857144e-06, + "loss": 3.2003, + "step": 111 + }, + { + "epoch": 0.64, + "grad_norm": 77.59644317626953, + "learning_rate": 6.4000000000000006e-06, + "loss": 3.7784, + "step": 112 + }, + { + "epoch": 0.6457142857142857, + "grad_norm": 76.62641143798828, + "learning_rate": 6.4571428571428575e-06, + "loss": 3.7134, + "step": 113 + }, + { + "epoch": 0.6514285714285715, + "grad_norm": 102.94229888916016, + "learning_rate": 6.5142857142857145e-06, + "loss": 2.9531, + "step": 114 + }, + { + "epoch": 0.6571428571428571, + "grad_norm": 195.8087615966797, + "learning_rate": 6.5714285714285714e-06, + "loss": 3.368, + "step": 115 + }, + { + "epoch": 0.6628571428571428, + "grad_norm": 45.65556716918945, + "learning_rate": 6.628571428571428e-06, + "loss": 2.9719, + "step": 116 + }, + { + "epoch": 0.6685714285714286, + "grad_norm": 46.55626678466797, + "learning_rate": 6.685714285714285e-06, + "loss": 2.5911, + "step": 117 + }, + { + "epoch": 0.6742857142857143, + "grad_norm": 50.443519592285156, + "learning_rate": 6.742857142857144e-06, + "loss": 2.4823, + "step": 118 + }, + { + "epoch": 0.68, + "grad_norm": 63.44559860229492, + "learning_rate": 6.800000000000001e-06, + "loss": 3.1693, + "step": 119 + }, + { + "epoch": 0.6857142857142857, + "grad_norm": 52.527462005615234, + "learning_rate": 6.857142857142858e-06, + "loss": 2.763, + "step": 120 + }, + { + "epoch": 0.6914285714285714, + "grad_norm": 56.55500030517578, + "learning_rate": 6.914285714285715e-06, + "loss": 2.291, + "step": 121 + }, + { + "epoch": 0.6971428571428572, + "grad_norm": 44.16722106933594, + "learning_rate": 6.971428571428572e-06, + "loss": 2.318, + "step": 122 + }, + { + "epoch": 0.7028571428571428, + "grad_norm": 340.8834228515625, + "learning_rate": 7.028571428571429e-06, + "loss": 2.3768, + "step": 123 + }, + { + "epoch": 0.7085714285714285, + "grad_norm": 49.526893615722656, + "learning_rate": 7.085714285714286e-06, + "loss": 2.0165, + "step": 124 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 63.88526916503906, + "learning_rate": 7.142857142857143e-06, + "loss": 2.222, + "step": 125 + }, + { + "epoch": 0.72, + "grad_norm": 54.57352828979492, + "learning_rate": 7.2e-06, + "loss": 2.4017, + "step": 126 + }, + { + "epoch": 0.7257142857142858, + "grad_norm": 65.19429779052734, + "learning_rate": 7.257142857142857e-06, + "loss": 1.9304, + "step": 127 + }, + { + "epoch": 0.7314285714285714, + "grad_norm": 57.65582275390625, + "learning_rate": 7.314285714285715e-06, + "loss": 2.3272, + "step": 128 + }, + { + "epoch": 0.7371428571428571, + "grad_norm": 40.34184646606445, + "learning_rate": 7.371428571428572e-06, + "loss": 1.9784, + "step": 129 + }, + { + "epoch": 0.7428571428571429, + "grad_norm": 44.75320816040039, + "learning_rate": 7.428571428571429e-06, + "loss": 2.0642, + "step": 130 + }, + { + "epoch": 0.7485714285714286, + "grad_norm": 97.42229461669922, + "learning_rate": 7.485714285714286e-06, + "loss": 1.7851, + "step": 131 + }, + { + "epoch": 0.7542857142857143, + "grad_norm": 48.73323059082031, + "learning_rate": 7.542857142857143e-06, + "loss": 1.8948, + "step": 132 + }, + { + "epoch": 0.76, + "grad_norm": 68.79501342773438, + "learning_rate": 7.6e-06, + "loss": 1.6354, + "step": 133 + }, + { + "epoch": 0.7657142857142857, + "grad_norm": 42.33122634887695, + "learning_rate": 7.657142857142858e-06, + "loss": 1.6211, + "step": 134 + }, + { + "epoch": 0.7714285714285715, + "grad_norm": 95.96932220458984, + "learning_rate": 7.714285714285714e-06, + "loss": 1.8455, + "step": 135 + }, + { + "epoch": 0.7771428571428571, + "grad_norm": 42.68363571166992, + "learning_rate": 7.771428571428572e-06, + "loss": 1.7399, + "step": 136 + }, + { + "epoch": 0.7828571428571428, + "grad_norm": 55.536964416503906, + "learning_rate": 7.82857142857143e-06, + "loss": 1.6986, + "step": 137 + }, + { + "epoch": 0.7885714285714286, + "grad_norm": 128.13711547851562, + "learning_rate": 7.885714285714286e-06, + "loss": 1.7396, + "step": 138 + }, + { + "epoch": 0.7942857142857143, + "grad_norm": 192.2489776611328, + "learning_rate": 7.942857142857144e-06, + "loss": 1.501, + "step": 139 + }, + { + "epoch": 0.8, + "grad_norm": 282.86810302734375, + "learning_rate": 8.000000000000001e-06, + "loss": 2.0364, + "step": 140 + }, + { + "epoch": 0.8057142857142857, + "grad_norm": 44.54533386230469, + "learning_rate": 8.057142857142857e-06, + "loss": 1.7826, + "step": 141 + }, + { + "epoch": 0.8114285714285714, + "grad_norm": 56.85557556152344, + "learning_rate": 8.114285714285715e-06, + "loss": 1.5573, + "step": 142 + }, + { + "epoch": 0.8171428571428572, + "grad_norm": 56.26758575439453, + "learning_rate": 8.171428571428571e-06, + "loss": 1.612, + "step": 143 + }, + { + "epoch": 0.8228571428571428, + "grad_norm": 52.04902648925781, + "learning_rate": 8.22857142857143e-06, + "loss": 1.5511, + "step": 144 + }, + { + "epoch": 0.8285714285714286, + "grad_norm": 41.186100006103516, + "learning_rate": 8.285714285714285e-06, + "loss": 1.5916, + "step": 145 + }, + { + "epoch": 0.8342857142857143, + "grad_norm": 67.39685821533203, + "learning_rate": 8.342857142857143e-06, + "loss": 1.4581, + "step": 146 + }, + { + "epoch": 0.84, + "grad_norm": 31.806520462036133, + "learning_rate": 8.400000000000001e-06, + "loss": 1.4401, + "step": 147 + }, + { + "epoch": 0.8457142857142858, + "grad_norm": 60.504905700683594, + "learning_rate": 8.457142857142859e-06, + "loss": 1.4034, + "step": 148 + }, + { + "epoch": 0.8514285714285714, + "grad_norm": 70.5359115600586, + "learning_rate": 8.514285714285715e-06, + "loss": 1.3912, + "step": 149 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 91.03093719482422, + "learning_rate": 8.571428571428573e-06, + "loss": 1.5493, + "step": 150 + }, + { + "epoch": 0.8628571428571429, + "grad_norm": 56.56180953979492, + "learning_rate": 8.628571428571429e-06, + "loss": 1.2104, + "step": 151 + }, + { + "epoch": 0.8685714285714285, + "grad_norm": 38.104461669921875, + "learning_rate": 8.685714285714287e-06, + "loss": 1.2611, + "step": 152 + }, + { + "epoch": 0.8742857142857143, + "grad_norm": 45.80816650390625, + "learning_rate": 8.742857142857143e-06, + "loss": 1.3031, + "step": 153 + }, + { + "epoch": 0.88, + "grad_norm": 69.43998718261719, + "learning_rate": 8.8e-06, + "loss": 1.4006, + "step": 154 + }, + { + "epoch": 0.8857142857142857, + "grad_norm": 50.02825927734375, + "learning_rate": 8.857142857142857e-06, + "loss": 1.3128, + "step": 155 + }, + { + "epoch": 0.8914285714285715, + "grad_norm": 49.2396125793457, + "learning_rate": 8.914285714285716e-06, + "loss": 1.6515, + "step": 156 + }, + { + "epoch": 0.8971428571428571, + "grad_norm": 58.82089614868164, + "learning_rate": 8.971428571428572e-06, + "loss": 1.6539, + "step": 157 + }, + { + "epoch": 0.9028571428571428, + "grad_norm": 41.613197326660156, + "learning_rate": 9.02857142857143e-06, + "loss": 1.3638, + "step": 158 + }, + { + "epoch": 0.9085714285714286, + "grad_norm": 56.6411018371582, + "learning_rate": 9.085714285714286e-06, + "loss": 1.6067, + "step": 159 + }, + { + "epoch": 0.9142857142857143, + "grad_norm": 25.510644912719727, + "learning_rate": 9.142857142857144e-06, + "loss": 1.5412, + "step": 160 + }, + { + "epoch": 0.92, + "grad_norm": 47.901302337646484, + "learning_rate": 9.2e-06, + "loss": 1.3587, + "step": 161 + }, + { + "epoch": 0.9257142857142857, + "grad_norm": 52.8482551574707, + "learning_rate": 9.257142857142858e-06, + "loss": 1.6951, + "step": 162 + }, + { + "epoch": 0.9314285714285714, + "grad_norm": 49.34379577636719, + "learning_rate": 9.314285714285714e-06, + "loss": 1.5581, + "step": 163 + }, + { + "epoch": 0.9371428571428572, + "grad_norm": 33.59023666381836, + "learning_rate": 9.371428571428572e-06, + "loss": 1.5744, + "step": 164 + }, + { + "epoch": 0.9428571428571428, + "grad_norm": 41.23167419433594, + "learning_rate": 9.42857142857143e-06, + "loss": 1.2757, + "step": 165 + }, + { + "epoch": 0.9485714285714286, + "grad_norm": 94.31331634521484, + "learning_rate": 9.485714285714287e-06, + "loss": 1.1797, + "step": 166 + }, + { + "epoch": 0.9542857142857143, + "grad_norm": 107.66983795166016, + "learning_rate": 9.542857142857143e-06, + "loss": 1.8364, + "step": 167 + }, + { + "epoch": 0.96, + "grad_norm": 119.86046600341797, + "learning_rate": 9.600000000000001e-06, + "loss": 1.2014, + "step": 168 + }, + { + "epoch": 0.9657142857142857, + "grad_norm": 59.50279235839844, + "learning_rate": 9.657142857142857e-06, + "loss": 1.3769, + "step": 169 + }, + { + "epoch": 0.9714285714285714, + "grad_norm": 52.51278305053711, + "learning_rate": 9.714285714285715e-06, + "loss": 1.5001, + "step": 170 + }, + { + "epoch": 0.9771428571428571, + "grad_norm": 66.36750030517578, + "learning_rate": 9.771428571428571e-06, + "loss": 1.3174, + "step": 171 + }, + { + "epoch": 0.9828571428571429, + "grad_norm": 148.5225067138672, + "learning_rate": 9.828571428571429e-06, + "loss": 1.2301, + "step": 172 + }, + { + "epoch": 0.9885714285714285, + "grad_norm": 57.223838806152344, + "learning_rate": 9.885714285714285e-06, + "loss": 1.3186, + "step": 173 + }, + { + "epoch": 0.9942857142857143, + "grad_norm": 45.917572021484375, + "learning_rate": 9.942857142857143e-06, + "loss": 1.4557, + "step": 174 + }, + { + "epoch": 1.0, + "grad_norm": 52.964290618896484, + "learning_rate": 1e-05, + "loss": 1.2294, + "step": 175 + }, + { + "epoch": 1.0, + "eval_classes": 0, + "eval_loss": 1.1637780666351318, + "eval_map": 0.1074, + "eval_map_50": 0.1346, + "eval_map_75": 0.1134, + "eval_map_large": 0.1146, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.1074, + "eval_map_small": -1.0, + "eval_mar_1": 0.5098, + "eval_mar_10": 0.8324, + "eval_mar_100": 0.9521, + "eval_mar_100_per_class": 0.9521, + "eval_mar_large": 0.9521, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 17.5957, + "eval_samples_per_second": 16.709, + "eval_steps_per_second": 2.103, + "step": 175 + }, + { + "epoch": 1.0057142857142858, + "grad_norm": 25.71605682373047, + "learning_rate": 1.0057142857142859e-05, + "loss": 1.3411, + "step": 176 + }, + { + "epoch": 1.0114285714285713, + "grad_norm": 59.9210090637207, + "learning_rate": 1.0114285714285715e-05, + "loss": 1.2487, + "step": 177 + }, + { + "epoch": 1.0171428571428571, + "grad_norm": 64.52934265136719, + "learning_rate": 1.0171428571428573e-05, + "loss": 1.266, + "step": 178 + }, + { + "epoch": 1.022857142857143, + "grad_norm": 41.21025085449219, + "learning_rate": 1.0228571428571429e-05, + "loss": 1.2221, + "step": 179 + }, + { + "epoch": 1.0285714285714285, + "grad_norm": 56.34316635131836, + "learning_rate": 1.0285714285714286e-05, + "loss": 1.2948, + "step": 180 + }, + { + "epoch": 1.0342857142857143, + "grad_norm": 52.73511505126953, + "learning_rate": 1.0342857142857143e-05, + "loss": 1.1771, + "step": 181 + }, + { + "epoch": 1.04, + "grad_norm": 76.45502471923828, + "learning_rate": 1.04e-05, + "loss": 1.2123, + "step": 182 + }, + { + "epoch": 1.0457142857142858, + "grad_norm": 83.90774536132812, + "learning_rate": 1.0457142857142856e-05, + "loss": 1.1788, + "step": 183 + }, + { + "epoch": 1.0514285714285714, + "grad_norm": 45.44466781616211, + "learning_rate": 1.0514285714285716e-05, + "loss": 1.3411, + "step": 184 + }, + { + "epoch": 1.0571428571428572, + "grad_norm": 73.0859146118164, + "learning_rate": 1.0571428571428572e-05, + "loss": 1.3006, + "step": 185 + }, + { + "epoch": 1.062857142857143, + "grad_norm": 39.06739807128906, + "learning_rate": 1.062857142857143e-05, + "loss": 1.1095, + "step": 186 + }, + { + "epoch": 1.0685714285714285, + "grad_norm": 84.93657684326172, + "learning_rate": 1.0685714285714286e-05, + "loss": 1.4484, + "step": 187 + }, + { + "epoch": 1.0742857142857143, + "grad_norm": 48.28471755981445, + "learning_rate": 1.0742857142857144e-05, + "loss": 1.4836, + "step": 188 + }, + { + "epoch": 1.08, + "grad_norm": 76.1847152709961, + "learning_rate": 1.08e-05, + "loss": 1.1036, + "step": 189 + }, + { + "epoch": 1.0857142857142856, + "grad_norm": 74.36713409423828, + "learning_rate": 1.0857142857142858e-05, + "loss": 1.1358, + "step": 190 + }, + { + "epoch": 1.0914285714285714, + "grad_norm": 40.03973388671875, + "learning_rate": 1.0914285714285714e-05, + "loss": 1.4495, + "step": 191 + }, + { + "epoch": 1.0971428571428572, + "grad_norm": 32.62919616699219, + "learning_rate": 1.0971428571428572e-05, + "loss": 1.1748, + "step": 192 + }, + { + "epoch": 1.1028571428571428, + "grad_norm": 116.62271118164062, + "learning_rate": 1.102857142857143e-05, + "loss": 1.3018, + "step": 193 + }, + { + "epoch": 1.1085714285714285, + "grad_norm": 49.567359924316406, + "learning_rate": 1.1085714285714287e-05, + "loss": 1.2993, + "step": 194 + }, + { + "epoch": 1.1142857142857143, + "grad_norm": 89.49858093261719, + "learning_rate": 1.1142857142857143e-05, + "loss": 1.1842, + "step": 195 + }, + { + "epoch": 1.12, + "grad_norm": 86.89800262451172, + "learning_rate": 1.1200000000000001e-05, + "loss": 1.547, + "step": 196 + }, + { + "epoch": 1.1257142857142857, + "grad_norm": 52.09599304199219, + "learning_rate": 1.1257142857142857e-05, + "loss": 1.0948, + "step": 197 + }, + { + "epoch": 1.1314285714285715, + "grad_norm": 55.586875915527344, + "learning_rate": 1.1314285714285715e-05, + "loss": 1.1103, + "step": 198 + }, + { + "epoch": 1.1371428571428572, + "grad_norm": 32.5805549621582, + "learning_rate": 1.1371428571428571e-05, + "loss": 1.1465, + "step": 199 + }, + { + "epoch": 1.1428571428571428, + "grad_norm": 31.688655853271484, + "learning_rate": 1.1428571428571429e-05, + "loss": 1.3233, + "step": 200 + }, + { + "epoch": 1.1485714285714286, + "grad_norm": 37.952674865722656, + "learning_rate": 1.1485714285714285e-05, + "loss": 1.1838, + "step": 201 + }, + { + "epoch": 1.1542857142857144, + "grad_norm": 74.23160552978516, + "learning_rate": 1.1542857142857143e-05, + "loss": 1.3673, + "step": 202 + }, + { + "epoch": 1.16, + "grad_norm": 106.44403076171875, + "learning_rate": 1.16e-05, + "loss": 1.3237, + "step": 203 + }, + { + "epoch": 1.1657142857142857, + "grad_norm": 39.78948211669922, + "learning_rate": 1.1657142857142859e-05, + "loss": 0.9842, + "step": 204 + }, + { + "epoch": 1.1714285714285715, + "grad_norm": 35.71757507324219, + "learning_rate": 1.1714285714285715e-05, + "loss": 1.315, + "step": 205 + }, + { + "epoch": 1.177142857142857, + "grad_norm": 72.02671813964844, + "learning_rate": 1.1771428571428572e-05, + "loss": 1.0231, + "step": 206 + }, + { + "epoch": 1.1828571428571428, + "grad_norm": 47.552490234375, + "learning_rate": 1.1828571428571429e-05, + "loss": 1.5534, + "step": 207 + }, + { + "epoch": 1.1885714285714286, + "grad_norm": 71.50056457519531, + "learning_rate": 1.1885714285714286e-05, + "loss": 1.2347, + "step": 208 + }, + { + "epoch": 1.1942857142857144, + "grad_norm": 31.451601028442383, + "learning_rate": 1.1942857142857142e-05, + "loss": 0.9793, + "step": 209 + }, + { + "epoch": 1.2, + "grad_norm": 61.02602005004883, + "learning_rate": 1.2e-05, + "loss": 1.1618, + "step": 210 + }, + { + "epoch": 1.2057142857142857, + "grad_norm": 54.90080642700195, + "learning_rate": 1.2057142857142856e-05, + "loss": 1.3242, + "step": 211 + }, + { + "epoch": 1.2114285714285715, + "grad_norm": 29.097959518432617, + "learning_rate": 1.2114285714285716e-05, + "loss": 1.0527, + "step": 212 + }, + { + "epoch": 1.217142857142857, + "grad_norm": 34.94865036010742, + "learning_rate": 1.2171428571428572e-05, + "loss": 1.5236, + "step": 213 + }, + { + "epoch": 1.2228571428571429, + "grad_norm": 60.06378173828125, + "learning_rate": 1.222857142857143e-05, + "loss": 1.6229, + "step": 214 + }, + { + "epoch": 1.2285714285714286, + "grad_norm": 55.50552749633789, + "learning_rate": 1.2285714285714286e-05, + "loss": 1.1756, + "step": 215 + }, + { + "epoch": 1.2342857142857142, + "grad_norm": 96.09048461914062, + "learning_rate": 1.2342857142857144e-05, + "loss": 1.3275, + "step": 216 + }, + { + "epoch": 1.24, + "grad_norm": 28.802642822265625, + "learning_rate": 1.24e-05, + "loss": 1.2808, + "step": 217 + }, + { + "epoch": 1.2457142857142858, + "grad_norm": 43.80913543701172, + "learning_rate": 1.2457142857142858e-05, + "loss": 1.176, + "step": 218 + }, + { + "epoch": 1.2514285714285713, + "grad_norm": 78.38436126708984, + "learning_rate": 1.2514285714285714e-05, + "loss": 1.2733, + "step": 219 + }, + { + "epoch": 1.2571428571428571, + "grad_norm": 77.23725891113281, + "learning_rate": 1.2571428571428573e-05, + "loss": 1.0301, + "step": 220 + }, + { + "epoch": 1.262857142857143, + "grad_norm": 29.865652084350586, + "learning_rate": 1.2628571428571428e-05, + "loss": 1.156, + "step": 221 + }, + { + "epoch": 1.2685714285714287, + "grad_norm": 49.52701950073242, + "learning_rate": 1.2685714285714287e-05, + "loss": 1.4922, + "step": 222 + }, + { + "epoch": 1.2742857142857142, + "grad_norm": 71.39379119873047, + "learning_rate": 1.2742857142857143e-05, + "loss": 1.4941, + "step": 223 + }, + { + "epoch": 1.28, + "grad_norm": 73.40324401855469, + "learning_rate": 1.2800000000000001e-05, + "loss": 1.4157, + "step": 224 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 51.31359100341797, + "learning_rate": 1.2857142857142857e-05, + "loss": 1.3963, + "step": 225 + }, + { + "epoch": 1.2914285714285714, + "grad_norm": 335.7010803222656, + "learning_rate": 1.2914285714285715e-05, + "loss": 1.4926, + "step": 226 + }, + { + "epoch": 1.2971428571428572, + "grad_norm": 42.918251037597656, + "learning_rate": 1.2971428571428573e-05, + "loss": 1.2107, + "step": 227 + }, + { + "epoch": 1.302857142857143, + "grad_norm": 62.98908233642578, + "learning_rate": 1.3028571428571429e-05, + "loss": 1.5262, + "step": 228 + }, + { + "epoch": 1.3085714285714285, + "grad_norm": 42.34180450439453, + "learning_rate": 1.3085714285714287e-05, + "loss": 1.3537, + "step": 229 + }, + { + "epoch": 1.3142857142857143, + "grad_norm": 32.696128845214844, + "learning_rate": 1.3142857142857143e-05, + "loss": 1.2539, + "step": 230 + }, + { + "epoch": 1.32, + "grad_norm": 64.67933654785156, + "learning_rate": 1.32e-05, + "loss": 1.435, + "step": 231 + }, + { + "epoch": 1.3257142857142856, + "grad_norm": 53.249977111816406, + "learning_rate": 1.3257142857142857e-05, + "loss": 1.2543, + "step": 232 + }, + { + "epoch": 1.3314285714285714, + "grad_norm": 45.25177764892578, + "learning_rate": 1.3314285714285715e-05, + "loss": 1.6551, + "step": 233 + }, + { + "epoch": 1.3371428571428572, + "grad_norm": 39.65488815307617, + "learning_rate": 1.337142857142857e-05, + "loss": 1.3281, + "step": 234 + }, + { + "epoch": 1.342857142857143, + "grad_norm": 48.21430206298828, + "learning_rate": 1.3428571428571429e-05, + "loss": 1.1968, + "step": 235 + }, + { + "epoch": 1.3485714285714285, + "grad_norm": 72.81331634521484, + "learning_rate": 1.3485714285714288e-05, + "loss": 1.1244, + "step": 236 + }, + { + "epoch": 1.3542857142857143, + "grad_norm": 66.70149230957031, + "learning_rate": 1.3542857142857142e-05, + "loss": 1.0649, + "step": 237 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 40.40673828125, + "learning_rate": 1.3600000000000002e-05, + "loss": 1.2182, + "step": 238 + }, + { + "epoch": 1.3657142857142857, + "grad_norm": 50.67107391357422, + "learning_rate": 1.3657142857142858e-05, + "loss": 1.2684, + "step": 239 + }, + { + "epoch": 1.3714285714285714, + "grad_norm": 56.261566162109375, + "learning_rate": 1.3714285714285716e-05, + "loss": 1.1621, + "step": 240 + }, + { + "epoch": 1.3771428571428572, + "grad_norm": 43.4561882019043, + "learning_rate": 1.3771428571428572e-05, + "loss": 1.236, + "step": 241 + }, + { + "epoch": 1.3828571428571428, + "grad_norm": 58.3783073425293, + "learning_rate": 1.382857142857143e-05, + "loss": 1.1642, + "step": 242 + }, + { + "epoch": 1.3885714285714286, + "grad_norm": 86.39718627929688, + "learning_rate": 1.3885714285714286e-05, + "loss": 1.2724, + "step": 243 + }, + { + "epoch": 1.3942857142857144, + "grad_norm": 44.4029541015625, + "learning_rate": 1.3942857142857144e-05, + "loss": 1.323, + "step": 244 + }, + { + "epoch": 1.4, + "grad_norm": 68.77522277832031, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.8696, + "step": 245 + }, + { + "epoch": 1.4057142857142857, + "grad_norm": 40.915870666503906, + "learning_rate": 1.4057142857142858e-05, + "loss": 1.1746, + "step": 246 + }, + { + "epoch": 1.4114285714285715, + "grad_norm": 41.75663375854492, + "learning_rate": 1.4114285714285715e-05, + "loss": 1.1253, + "step": 247 + }, + { + "epoch": 1.4171428571428573, + "grad_norm": 41.473060607910156, + "learning_rate": 1.4171428571428572e-05, + "loss": 1.4069, + "step": 248 + }, + { + "epoch": 1.4228571428571428, + "grad_norm": 33.94078063964844, + "learning_rate": 1.422857142857143e-05, + "loss": 1.2038, + "step": 249 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 25.82473373413086, + "learning_rate": 1.4285714285714285e-05, + "loss": 0.9384, + "step": 250 + }, + { + "epoch": 1.4342857142857142, + "grad_norm": 49.748172760009766, + "learning_rate": 1.4342857142857143e-05, + "loss": 1.1221, + "step": 251 + }, + { + "epoch": 1.44, + "grad_norm": 50.44265365600586, + "learning_rate": 1.44e-05, + "loss": 1.0359, + "step": 252 + }, + { + "epoch": 1.4457142857142857, + "grad_norm": 77.93119812011719, + "learning_rate": 1.4457142857142857e-05, + "loss": 1.1011, + "step": 253 + }, + { + "epoch": 1.4514285714285715, + "grad_norm": 103.56857299804688, + "learning_rate": 1.4514285714285713e-05, + "loss": 1.1609, + "step": 254 + }, + { + "epoch": 1.457142857142857, + "grad_norm": 57.082916259765625, + "learning_rate": 1.4571428571428573e-05, + "loss": 1.153, + "step": 255 + }, + { + "epoch": 1.4628571428571429, + "grad_norm": 29.88153839111328, + "learning_rate": 1.462857142857143e-05, + "loss": 1.0068, + "step": 256 + }, + { + "epoch": 1.4685714285714286, + "grad_norm": 48.73033142089844, + "learning_rate": 1.4685714285714287e-05, + "loss": 1.0846, + "step": 257 + }, + { + "epoch": 1.4742857142857142, + "grad_norm": 50.5568733215332, + "learning_rate": 1.4742857142857144e-05, + "loss": 1.1191, + "step": 258 + }, + { + "epoch": 1.48, + "grad_norm": 36.18242645263672, + "learning_rate": 1.48e-05, + "loss": 0.8898, + "step": 259 + }, + { + "epoch": 1.4857142857142858, + "grad_norm": 38.34716033935547, + "learning_rate": 1.4857142857142858e-05, + "loss": 1.1931, + "step": 260 + }, + { + "epoch": 1.4914285714285715, + "grad_norm": 34.09955596923828, + "learning_rate": 1.4914285714285715e-05, + "loss": 1.2114, + "step": 261 + }, + { + "epoch": 1.497142857142857, + "grad_norm": 90.42156982421875, + "learning_rate": 1.4971428571428572e-05, + "loss": 1.0864, + "step": 262 + }, + { + "epoch": 1.502857142857143, + "grad_norm": 67.16676330566406, + "learning_rate": 1.5028571428571428e-05, + "loss": 0.845, + "step": 263 + }, + { + "epoch": 1.5085714285714285, + "grad_norm": 64.06490325927734, + "learning_rate": 1.5085714285714286e-05, + "loss": 1.164, + "step": 264 + }, + { + "epoch": 1.5142857142857142, + "grad_norm": 60.20040512084961, + "learning_rate": 1.5142857142857144e-05, + "loss": 0.8942, + "step": 265 + }, + { + "epoch": 1.52, + "grad_norm": 41.62589645385742, + "learning_rate": 1.52e-05, + "loss": 1.2033, + "step": 266 + }, + { + "epoch": 1.5257142857142858, + "grad_norm": 56.95344543457031, + "learning_rate": 1.5257142857142858e-05, + "loss": 0.8938, + "step": 267 + }, + { + "epoch": 1.5314285714285716, + "grad_norm": 37.76027297973633, + "learning_rate": 1.5314285714285716e-05, + "loss": 1.2119, + "step": 268 + }, + { + "epoch": 1.5371428571428571, + "grad_norm": 43.52880096435547, + "learning_rate": 1.5371428571428572e-05, + "loss": 1.2131, + "step": 269 + }, + { + "epoch": 1.5428571428571427, + "grad_norm": 65.82935333251953, + "learning_rate": 1.5428571428571428e-05, + "loss": 1.2843, + "step": 270 + }, + { + "epoch": 1.5485714285714285, + "grad_norm": 49.85045623779297, + "learning_rate": 1.5485714285714287e-05, + "loss": 1.2297, + "step": 271 + }, + { + "epoch": 1.5542857142857143, + "grad_norm": 43.79518508911133, + "learning_rate": 1.5542857142857144e-05, + "loss": 0.9763, + "step": 272 + }, + { + "epoch": 1.56, + "grad_norm": 55.906646728515625, + "learning_rate": 1.56e-05, + "loss": 1.4565, + "step": 273 + }, + { + "epoch": 1.5657142857142858, + "grad_norm": 43.23767852783203, + "learning_rate": 1.565714285714286e-05, + "loss": 1.0054, + "step": 274 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 29.514699935913086, + "learning_rate": 1.5714285714285715e-05, + "loss": 1.0612, + "step": 275 + }, + { + "epoch": 1.5771428571428572, + "grad_norm": 47.55738067626953, + "learning_rate": 1.577142857142857e-05, + "loss": 1.1649, + "step": 276 + }, + { + "epoch": 1.5828571428571427, + "grad_norm": 60.16688537597656, + "learning_rate": 1.5828571428571428e-05, + "loss": 1.1763, + "step": 277 + }, + { + "epoch": 1.5885714285714285, + "grad_norm": 31.548669815063477, + "learning_rate": 1.5885714285714287e-05, + "loss": 1.0998, + "step": 278 + }, + { + "epoch": 1.5942857142857143, + "grad_norm": 84.46231842041016, + "learning_rate": 1.5942857142857143e-05, + "loss": 1.1437, + "step": 279 + }, + { + "epoch": 1.6, + "grad_norm": 24.36756706237793, + "learning_rate": 1.6000000000000003e-05, + "loss": 1.1319, + "step": 280 + }, + { + "epoch": 1.6057142857142859, + "grad_norm": 41.27078628540039, + "learning_rate": 1.6057142857142855e-05, + "loss": 1.2245, + "step": 281 + }, + { + "epoch": 1.6114285714285714, + "grad_norm": 47.624202728271484, + "learning_rate": 1.6114285714285715e-05, + "loss": 1.0325, + "step": 282 + }, + { + "epoch": 1.617142857142857, + "grad_norm": 34.220977783203125, + "learning_rate": 1.6171428571428574e-05, + "loss": 1.1388, + "step": 283 + }, + { + "epoch": 1.6228571428571428, + "grad_norm": 80.00936889648438, + "learning_rate": 1.622857142857143e-05, + "loss": 1.4803, + "step": 284 + }, + { + "epoch": 1.6285714285714286, + "grad_norm": 55.12397384643555, + "learning_rate": 1.6285714285714287e-05, + "loss": 1.0528, + "step": 285 + }, + { + "epoch": 1.6342857142857143, + "grad_norm": 58.02750015258789, + "learning_rate": 1.6342857142857143e-05, + "loss": 0.9903, + "step": 286 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 56.112327575683594, + "learning_rate": 1.6400000000000002e-05, + "loss": 0.8939, + "step": 287 + }, + { + "epoch": 1.6457142857142857, + "grad_norm": 32.75306701660156, + "learning_rate": 1.645714285714286e-05, + "loss": 1.0878, + "step": 288 + }, + { + "epoch": 1.6514285714285715, + "grad_norm": 32.49684524536133, + "learning_rate": 1.6514285714285714e-05, + "loss": 1.1555, + "step": 289 + }, + { + "epoch": 1.657142857142857, + "grad_norm": 28.5819034576416, + "learning_rate": 1.657142857142857e-05, + "loss": 1.4024, + "step": 290 + }, + { + "epoch": 1.6628571428571428, + "grad_norm": 77.95793151855469, + "learning_rate": 1.662857142857143e-05, + "loss": 1.4714, + "step": 291 + }, + { + "epoch": 1.6685714285714286, + "grad_norm": 81.19375610351562, + "learning_rate": 1.6685714285714286e-05, + "loss": 1.686, + "step": 292 + }, + { + "epoch": 1.6742857142857144, + "grad_norm": 42.35756301879883, + "learning_rate": 1.6742857142857142e-05, + "loss": 1.4978, + "step": 293 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 37.717281341552734, + "learning_rate": 1.6800000000000002e-05, + "loss": 1.2754, + "step": 294 + }, + { + "epoch": 1.6857142857142857, + "grad_norm": 37.30710983276367, + "learning_rate": 1.6857142857142858e-05, + "loss": 1.0861, + "step": 295 + }, + { + "epoch": 1.6914285714285713, + "grad_norm": 47.3027458190918, + "learning_rate": 1.6914285714285717e-05, + "loss": 1.171, + "step": 296 + }, + { + "epoch": 1.697142857142857, + "grad_norm": 55.43088912963867, + "learning_rate": 1.697142857142857e-05, + "loss": 0.9904, + "step": 297 + }, + { + "epoch": 1.7028571428571428, + "grad_norm": 51.3643684387207, + "learning_rate": 1.702857142857143e-05, + "loss": 1.1707, + "step": 298 + }, + { + "epoch": 1.7085714285714286, + "grad_norm": 44.561859130859375, + "learning_rate": 1.7085714285714286e-05, + "loss": 1.096, + "step": 299 + }, + { + "epoch": 1.7142857142857144, + "grad_norm": 45.064903259277344, + "learning_rate": 1.7142857142857145e-05, + "loss": 1.259, + "step": 300 + }, + { + "epoch": 1.72, + "grad_norm": 54.00167465209961, + "learning_rate": 1.7199999999999998e-05, + "loss": 1.1983, + "step": 301 + }, + { + "epoch": 1.7257142857142858, + "grad_norm": 61.48951721191406, + "learning_rate": 1.7257142857142857e-05, + "loss": 1.0426, + "step": 302 + }, + { + "epoch": 1.7314285714285713, + "grad_norm": 49.40690612792969, + "learning_rate": 1.7314285714285717e-05, + "loss": 0.9505, + "step": 303 + }, + { + "epoch": 1.737142857142857, + "grad_norm": 41.197330474853516, + "learning_rate": 1.7371428571428573e-05, + "loss": 1.4243, + "step": 304 + }, + { + "epoch": 1.7428571428571429, + "grad_norm": 49.21907424926758, + "learning_rate": 1.742857142857143e-05, + "loss": 1.2452, + "step": 305 + }, + { + "epoch": 1.7485714285714287, + "grad_norm": 48.3714599609375, + "learning_rate": 1.7485714285714285e-05, + "loss": 0.9797, + "step": 306 + }, + { + "epoch": 1.7542857142857144, + "grad_norm": 59.02680969238281, + "learning_rate": 1.7542857142857145e-05, + "loss": 1.0165, + "step": 307 + }, + { + "epoch": 1.76, + "grad_norm": 43.11337661743164, + "learning_rate": 1.76e-05, + "loss": 1.0995, + "step": 308 + }, + { + "epoch": 1.7657142857142856, + "grad_norm": 55.11835479736328, + "learning_rate": 1.7657142857142857e-05, + "loss": 1.1308, + "step": 309 + }, + { + "epoch": 1.7714285714285714, + "grad_norm": 41.109554290771484, + "learning_rate": 1.7714285714285713e-05, + "loss": 1.2723, + "step": 310 + }, + { + "epoch": 1.7771428571428571, + "grad_norm": 79.188720703125, + "learning_rate": 1.7771428571428573e-05, + "loss": 1.4638, + "step": 311 + }, + { + "epoch": 1.782857142857143, + "grad_norm": 57.13640213012695, + "learning_rate": 1.7828571428571432e-05, + "loss": 1.3124, + "step": 312 + }, + { + "epoch": 1.7885714285714287, + "grad_norm": 36.40741729736328, + "learning_rate": 1.7885714285714285e-05, + "loss": 1.0631, + "step": 313 + }, + { + "epoch": 1.7942857142857143, + "grad_norm": 48.303768157958984, + "learning_rate": 1.7942857142857144e-05, + "loss": 0.8791, + "step": 314 + }, + { + "epoch": 1.8, + "grad_norm": 29.768587112426758, + "learning_rate": 1.8e-05, + "loss": 1.0898, + "step": 315 + }, + { + "epoch": 1.8057142857142856, + "grad_norm": 39.185028076171875, + "learning_rate": 1.805714285714286e-05, + "loss": 1.1098, + "step": 316 + }, + { + "epoch": 1.8114285714285714, + "grad_norm": 32.381385803222656, + "learning_rate": 1.8114285714285713e-05, + "loss": 1.0213, + "step": 317 + }, + { + "epoch": 1.8171428571428572, + "grad_norm": 52.5543098449707, + "learning_rate": 1.8171428571428572e-05, + "loss": 0.9562, + "step": 318 + }, + { + "epoch": 1.822857142857143, + "grad_norm": 35.95527648925781, + "learning_rate": 1.8228571428571428e-05, + "loss": 0.9972, + "step": 319 + }, + { + "epoch": 1.8285714285714287, + "grad_norm": 42.66740036010742, + "learning_rate": 1.8285714285714288e-05, + "loss": 0.9987, + "step": 320 + }, + { + "epoch": 1.8342857142857143, + "grad_norm": 76.55131530761719, + "learning_rate": 1.8342857142857144e-05, + "loss": 1.0302, + "step": 321 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 45.564605712890625, + "learning_rate": 1.84e-05, + "loss": 1.0857, + "step": 322 + }, + { + "epoch": 1.8457142857142856, + "grad_norm": 28.313241958618164, + "learning_rate": 1.845714285714286e-05, + "loss": 1.0884, + "step": 323 + }, + { + "epoch": 1.8514285714285714, + "grad_norm": 38.595279693603516, + "learning_rate": 1.8514285714285716e-05, + "loss": 1.1742, + "step": 324 + }, + { + "epoch": 1.8571428571428572, + "grad_norm": 32.34958267211914, + "learning_rate": 1.8571428571428572e-05, + "loss": 0.9849, + "step": 325 + }, + { + "epoch": 1.862857142857143, + "grad_norm": 51.1898307800293, + "learning_rate": 1.8628571428571428e-05, + "loss": 1.5177, + "step": 326 + }, + { + "epoch": 1.8685714285714285, + "grad_norm": 80.85442352294922, + "learning_rate": 1.8685714285714287e-05, + "loss": 0.8764, + "step": 327 + }, + { + "epoch": 1.8742857142857143, + "grad_norm": 43.80781173706055, + "learning_rate": 1.8742857142857143e-05, + "loss": 1.2376, + "step": 328 + }, + { + "epoch": 1.88, + "grad_norm": 83.95499420166016, + "learning_rate": 1.88e-05, + "loss": 0.981, + "step": 329 + }, + { + "epoch": 1.8857142857142857, + "grad_norm": 42.65687942504883, + "learning_rate": 1.885714285714286e-05, + "loss": 1.0849, + "step": 330 + }, + { + "epoch": 1.8914285714285715, + "grad_norm": 46.72631072998047, + "learning_rate": 1.8914285714285715e-05, + "loss": 1.2969, + "step": 331 + }, + { + "epoch": 1.8971428571428572, + "grad_norm": 76.61483001708984, + "learning_rate": 1.8971428571428575e-05, + "loss": 1.0192, + "step": 332 + }, + { + "epoch": 1.9028571428571428, + "grad_norm": 34.24771499633789, + "learning_rate": 1.9028571428571427e-05, + "loss": 1.0717, + "step": 333 + }, + { + "epoch": 1.9085714285714286, + "grad_norm": 124.33210754394531, + "learning_rate": 1.9085714285714287e-05, + "loss": 1.0499, + "step": 334 + }, + { + "epoch": 1.9142857142857141, + "grad_norm": 45.05997085571289, + "learning_rate": 1.9142857142857143e-05, + "loss": 1.1427, + "step": 335 + }, + { + "epoch": 1.92, + "grad_norm": 43.917236328125, + "learning_rate": 1.9200000000000003e-05, + "loss": 1.0491, + "step": 336 + }, + { + "epoch": 1.9257142857142857, + "grad_norm": 74.866455078125, + "learning_rate": 1.9257142857142855e-05, + "loss": 0.9418, + "step": 337 + }, + { + "epoch": 1.9314285714285715, + "grad_norm": 46.80175018310547, + "learning_rate": 1.9314285714285715e-05, + "loss": 1.3776, + "step": 338 + }, + { + "epoch": 1.9371428571428573, + "grad_norm": 69.39533233642578, + "learning_rate": 1.9371428571428574e-05, + "loss": 1.035, + "step": 339 + }, + { + "epoch": 1.9428571428571428, + "grad_norm": 79.26314544677734, + "learning_rate": 1.942857142857143e-05, + "loss": 1.0309, + "step": 340 + }, + { + "epoch": 1.9485714285714286, + "grad_norm": 91.3028793334961, + "learning_rate": 1.9485714285714286e-05, + "loss": 1.2112, + "step": 341 + }, + { + "epoch": 1.9542857142857142, + "grad_norm": 67.27446746826172, + "learning_rate": 1.9542857142857143e-05, + "loss": 1.0539, + "step": 342 + }, + { + "epoch": 1.96, + "grad_norm": 117.01478576660156, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.8803, + "step": 343 + }, + { + "epoch": 1.9657142857142857, + "grad_norm": 49.12966537475586, + "learning_rate": 1.9657142857142858e-05, + "loss": 1.305, + "step": 344 + }, + { + "epoch": 1.9714285714285715, + "grad_norm": 36.83738708496094, + "learning_rate": 1.9714285714285714e-05, + "loss": 1.0059, + "step": 345 + }, + { + "epoch": 1.977142857142857, + "grad_norm": 55.849609375, + "learning_rate": 1.977142857142857e-05, + "loss": 1.0178, + "step": 346 + }, + { + "epoch": 1.9828571428571429, + "grad_norm": 47.24936294555664, + "learning_rate": 1.982857142857143e-05, + "loss": 1.1549, + "step": 347 + }, + { + "epoch": 1.9885714285714284, + "grad_norm": 38.20663070678711, + "learning_rate": 1.9885714285714286e-05, + "loss": 0.8695, + "step": 348 + }, + { + "epoch": 1.9942857142857142, + "grad_norm": 37.89916229248047, + "learning_rate": 1.9942857142857142e-05, + "loss": 1.0985, + "step": 349 + }, + { + "epoch": 2.0, + "grad_norm": 26.7008113861084, + "learning_rate": 2e-05, + "loss": 0.9978, + "step": 350 + }, + { + "epoch": 2.0, + "eval_classes": 0, + "eval_loss": 0.9191280007362366, + "eval_map": 0.287, + "eval_map_50": 0.3324, + "eval_map_75": 0.3026, + "eval_map_large": 0.2872, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.287, + "eval_map_small": -1.0, + "eval_mar_1": 0.6952, + "eval_mar_10": 0.9121, + "eval_mar_100": 0.9737, + "eval_mar_100_per_class": 0.9737, + "eval_mar_large": 0.9737, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 18.6378, + "eval_samples_per_second": 15.774, + "eval_steps_per_second": 1.985, + "step": 350 + }, + { + "epoch": 2.005714285714286, + "grad_norm": 47.30525207519531, + "learning_rate": 2.0057142857142858e-05, + "loss": 0.9073, + "step": 351 + }, + { + "epoch": 2.0114285714285716, + "grad_norm": 54.44076919555664, + "learning_rate": 2.0114285714285717e-05, + "loss": 1.0188, + "step": 352 + }, + { + "epoch": 2.0171428571428573, + "grad_norm": 61.43099594116211, + "learning_rate": 2.0171428571428573e-05, + "loss": 0.8953, + "step": 353 + }, + { + "epoch": 2.0228571428571427, + "grad_norm": 52.021724700927734, + "learning_rate": 2.022857142857143e-05, + "loss": 1.2142, + "step": 354 + }, + { + "epoch": 2.0285714285714285, + "grad_norm": 96.9588394165039, + "learning_rate": 2.0285714285714286e-05, + "loss": 1.1424, + "step": 355 + }, + { + "epoch": 2.0342857142857143, + "grad_norm": 43.736534118652344, + "learning_rate": 2.0342857142857145e-05, + "loss": 1.0734, + "step": 356 + }, + { + "epoch": 2.04, + "grad_norm": 26.348413467407227, + "learning_rate": 2.04e-05, + "loss": 1.2237, + "step": 357 + }, + { + "epoch": 2.045714285714286, + "grad_norm": 92.93049621582031, + "learning_rate": 2.0457142857142857e-05, + "loss": 1.0244, + "step": 358 + }, + { + "epoch": 2.0514285714285716, + "grad_norm": 43.89330291748047, + "learning_rate": 2.0514285714285717e-05, + "loss": 1.1088, + "step": 359 + }, + { + "epoch": 2.057142857142857, + "grad_norm": 31.635217666625977, + "learning_rate": 2.0571428571428573e-05, + "loss": 0.9592, + "step": 360 + }, + { + "epoch": 2.0628571428571427, + "grad_norm": 47.123844146728516, + "learning_rate": 2.062857142857143e-05, + "loss": 0.9273, + "step": 361 + }, + { + "epoch": 2.0685714285714285, + "grad_norm": 70.71419525146484, + "learning_rate": 2.0685714285714285e-05, + "loss": 1.1692, + "step": 362 + }, + { + "epoch": 2.0742857142857143, + "grad_norm": 54.28295135498047, + "learning_rate": 2.0742857142857145e-05, + "loss": 0.9669, + "step": 363 + }, + { + "epoch": 2.08, + "grad_norm": 41.889286041259766, + "learning_rate": 2.08e-05, + "loss": 0.9807, + "step": 364 + }, + { + "epoch": 2.085714285714286, + "grad_norm": 88.92764282226562, + "learning_rate": 2.0857142857142857e-05, + "loss": 1.18, + "step": 365 + }, + { + "epoch": 2.0914285714285716, + "grad_norm": 43.853431701660156, + "learning_rate": 2.0914285714285713e-05, + "loss": 1.0387, + "step": 366 + }, + { + "epoch": 2.097142857142857, + "grad_norm": 42.877838134765625, + "learning_rate": 2.0971428571428572e-05, + "loss": 1.043, + "step": 367 + }, + { + "epoch": 2.1028571428571428, + "grad_norm": 44.86766052246094, + "learning_rate": 2.1028571428571432e-05, + "loss": 0.9026, + "step": 368 + }, + { + "epoch": 2.1085714285714285, + "grad_norm": 36.03720474243164, + "learning_rate": 2.1085714285714288e-05, + "loss": 0.9818, + "step": 369 + }, + { + "epoch": 2.1142857142857143, + "grad_norm": 60.320594787597656, + "learning_rate": 2.1142857142857144e-05, + "loss": 1.2608, + "step": 370 + }, + { + "epoch": 2.12, + "grad_norm": 81.33612060546875, + "learning_rate": 2.12e-05, + "loss": 1.1488, + "step": 371 + }, + { + "epoch": 2.125714285714286, + "grad_norm": 66.61579895019531, + "learning_rate": 2.125714285714286e-05, + "loss": 1.0263, + "step": 372 + }, + { + "epoch": 2.1314285714285712, + "grad_norm": 48.11174392700195, + "learning_rate": 2.1314285714285716e-05, + "loss": 0.8535, + "step": 373 + }, + { + "epoch": 2.137142857142857, + "grad_norm": 61.046600341796875, + "learning_rate": 2.1371428571428572e-05, + "loss": 1.0907, + "step": 374 + }, + { + "epoch": 2.142857142857143, + "grad_norm": 30.193220138549805, + "learning_rate": 2.1428571428571428e-05, + "loss": 1.1635, + "step": 375 + }, + { + "epoch": 2.1485714285714286, + "grad_norm": 48.01976776123047, + "learning_rate": 2.1485714285714288e-05, + "loss": 1.0505, + "step": 376 + }, + { + "epoch": 2.1542857142857144, + "grad_norm": 70.48137664794922, + "learning_rate": 2.1542857142857144e-05, + "loss": 0.9531, + "step": 377 + }, + { + "epoch": 2.16, + "grad_norm": 45.929988861083984, + "learning_rate": 2.16e-05, + "loss": 0.9404, + "step": 378 + }, + { + "epoch": 2.1657142857142855, + "grad_norm": 98.45175170898438, + "learning_rate": 2.165714285714286e-05, + "loss": 1.0931, + "step": 379 + }, + { + "epoch": 2.1714285714285713, + "grad_norm": 50.332740783691406, + "learning_rate": 2.1714285714285715e-05, + "loss": 1.4033, + "step": 380 + }, + { + "epoch": 2.177142857142857, + "grad_norm": 78.17147064208984, + "learning_rate": 2.177142857142857e-05, + "loss": 1.0196, + "step": 381 + }, + { + "epoch": 2.182857142857143, + "grad_norm": 45.67463684082031, + "learning_rate": 2.1828571428571428e-05, + "loss": 1.133, + "step": 382 + }, + { + "epoch": 2.1885714285714286, + "grad_norm": 57.02043533325195, + "learning_rate": 2.1885714285714287e-05, + "loss": 1.1489, + "step": 383 + }, + { + "epoch": 2.1942857142857144, + "grad_norm": 72.92861938476562, + "learning_rate": 2.1942857142857143e-05, + "loss": 1.1011, + "step": 384 + }, + { + "epoch": 2.2, + "grad_norm": 46.322547912597656, + "learning_rate": 2.2000000000000003e-05, + "loss": 1.2764, + "step": 385 + }, + { + "epoch": 2.2057142857142855, + "grad_norm": 50.237022399902344, + "learning_rate": 2.205714285714286e-05, + "loss": 1.1354, + "step": 386 + }, + { + "epoch": 2.2114285714285713, + "grad_norm": 143.770751953125, + "learning_rate": 2.2114285714285715e-05, + "loss": 1.147, + "step": 387 + }, + { + "epoch": 2.217142857142857, + "grad_norm": 58.19064712524414, + "learning_rate": 2.2171428571428575e-05, + "loss": 1.2218, + "step": 388 + }, + { + "epoch": 2.222857142857143, + "grad_norm": 133.52745056152344, + "learning_rate": 2.222857142857143e-05, + "loss": 1.3217, + "step": 389 + }, + { + "epoch": 2.2285714285714286, + "grad_norm": 55.836387634277344, + "learning_rate": 2.2285714285714287e-05, + "loss": 1.2342, + "step": 390 + }, + { + "epoch": 2.2342857142857144, + "grad_norm": 46.55270004272461, + "learning_rate": 2.2342857142857143e-05, + "loss": 1.1928, + "step": 391 + }, + { + "epoch": 2.24, + "grad_norm": 69.13031768798828, + "learning_rate": 2.2400000000000002e-05, + "loss": 1.3276, + "step": 392 + }, + { + "epoch": 2.2457142857142856, + "grad_norm": 94.66584777832031, + "learning_rate": 2.245714285714286e-05, + "loss": 1.3648, + "step": 393 + }, + { + "epoch": 2.2514285714285713, + "grad_norm": 132.82162475585938, + "learning_rate": 2.2514285714285715e-05, + "loss": 1.0906, + "step": 394 + }, + { + "epoch": 2.257142857142857, + "grad_norm": 166.76165771484375, + "learning_rate": 2.257142857142857e-05, + "loss": 1.1635, + "step": 395 + }, + { + "epoch": 2.262857142857143, + "grad_norm": 49.48722457885742, + "learning_rate": 2.262857142857143e-05, + "loss": 1.457, + "step": 396 + }, + { + "epoch": 2.2685714285714287, + "grad_norm": 51.98416519165039, + "learning_rate": 2.2685714285714286e-05, + "loss": 1.397, + "step": 397 + }, + { + "epoch": 2.2742857142857145, + "grad_norm": 268.32562255859375, + "learning_rate": 2.2742857142857142e-05, + "loss": 1.1811, + "step": 398 + }, + { + "epoch": 2.2800000000000002, + "grad_norm": 113.3601303100586, + "learning_rate": 2.2800000000000002e-05, + "loss": 1.6457, + "step": 399 + }, + { + "epoch": 2.2857142857142856, + "grad_norm": 42.322349548339844, + "learning_rate": 2.2857142857142858e-05, + "loss": 1.529, + "step": 400 + }, + { + "epoch": 2.2914285714285714, + "grad_norm": 37.9848518371582, + "learning_rate": 2.2914285714285718e-05, + "loss": 1.1678, + "step": 401 + }, + { + "epoch": 2.297142857142857, + "grad_norm": 58.746028900146484, + "learning_rate": 2.297142857142857e-05, + "loss": 1.0461, + "step": 402 + }, + { + "epoch": 2.302857142857143, + "grad_norm": 33.83110427856445, + "learning_rate": 2.302857142857143e-05, + "loss": 1.1699, + "step": 403 + }, + { + "epoch": 2.3085714285714287, + "grad_norm": 37.950626373291016, + "learning_rate": 2.3085714285714286e-05, + "loss": 0.8945, + "step": 404 + }, + { + "epoch": 2.314285714285714, + "grad_norm": 36.12480926513672, + "learning_rate": 2.3142857142857145e-05, + "loss": 0.9123, + "step": 405 + }, + { + "epoch": 2.32, + "grad_norm": 44.77686309814453, + "learning_rate": 2.32e-05, + "loss": 1.0462, + "step": 406 + }, + { + "epoch": 2.3257142857142856, + "grad_norm": 91.76225280761719, + "learning_rate": 2.3257142857142858e-05, + "loss": 1.1602, + "step": 407 + }, + { + "epoch": 2.3314285714285714, + "grad_norm": 46.81283950805664, + "learning_rate": 2.3314285714285717e-05, + "loss": 1.0097, + "step": 408 + }, + { + "epoch": 2.337142857142857, + "grad_norm": 78.25968170166016, + "learning_rate": 2.3371428571428573e-05, + "loss": 0.9829, + "step": 409 + }, + { + "epoch": 2.342857142857143, + "grad_norm": 38.0760498046875, + "learning_rate": 2.342857142857143e-05, + "loss": 1.1976, + "step": 410 + }, + { + "epoch": 2.3485714285714288, + "grad_norm": 118.48062896728516, + "learning_rate": 2.3485714285714285e-05, + "loss": 1.0831, + "step": 411 + }, + { + "epoch": 2.354285714285714, + "grad_norm": 63.731868743896484, + "learning_rate": 2.3542857142857145e-05, + "loss": 1.0006, + "step": 412 + }, + { + "epoch": 2.36, + "grad_norm": 57.7193603515625, + "learning_rate": 2.36e-05, + "loss": 0.8601, + "step": 413 + }, + { + "epoch": 2.3657142857142857, + "grad_norm": 65.94424438476562, + "learning_rate": 2.3657142857142857e-05, + "loss": 1.0073, + "step": 414 + }, + { + "epoch": 2.3714285714285714, + "grad_norm": 38.24591064453125, + "learning_rate": 2.3714285714285717e-05, + "loss": 1.0621, + "step": 415 + }, + { + "epoch": 2.3771428571428572, + "grad_norm": 53.88074493408203, + "learning_rate": 2.3771428571428573e-05, + "loss": 0.8198, + "step": 416 + }, + { + "epoch": 2.382857142857143, + "grad_norm": 45.4550895690918, + "learning_rate": 2.3828571428571432e-05, + "loss": 1.3931, + "step": 417 + }, + { + "epoch": 2.388571428571429, + "grad_norm": 42.56758117675781, + "learning_rate": 2.3885714285714285e-05, + "loss": 1.0437, + "step": 418 + }, + { + "epoch": 2.394285714285714, + "grad_norm": 54.02827072143555, + "learning_rate": 2.3942857142857144e-05, + "loss": 1.1002, + "step": 419 + }, + { + "epoch": 2.4, + "grad_norm": 65.14449310302734, + "learning_rate": 2.4e-05, + "loss": 1.2548, + "step": 420 + }, + { + "epoch": 2.4057142857142857, + "grad_norm": 60.62049102783203, + "learning_rate": 2.405714285714286e-05, + "loss": 1.0646, + "step": 421 + }, + { + "epoch": 2.4114285714285715, + "grad_norm": 47.858726501464844, + "learning_rate": 2.4114285714285713e-05, + "loss": 1.0466, + "step": 422 + }, + { + "epoch": 2.4171428571428573, + "grad_norm": 50.54081726074219, + "learning_rate": 2.4171428571428572e-05, + "loss": 1.0147, + "step": 423 + }, + { + "epoch": 2.422857142857143, + "grad_norm": 40.64912033081055, + "learning_rate": 2.4228571428571432e-05, + "loss": 0.9584, + "step": 424 + }, + { + "epoch": 2.4285714285714284, + "grad_norm": 26.813034057617188, + "learning_rate": 2.4285714285714288e-05, + "loss": 0.8747, + "step": 425 + }, + { + "epoch": 2.434285714285714, + "grad_norm": 55.92356491088867, + "learning_rate": 2.4342857142857144e-05, + "loss": 1.3426, + "step": 426 + }, + { + "epoch": 2.44, + "grad_norm": 102.78366088867188, + "learning_rate": 2.44e-05, + "loss": 0.8812, + "step": 427 + }, + { + "epoch": 2.4457142857142857, + "grad_norm": 34.32600021362305, + "learning_rate": 2.445714285714286e-05, + "loss": 1.0611, + "step": 428 + }, + { + "epoch": 2.4514285714285715, + "grad_norm": 58.62373733520508, + "learning_rate": 2.4514285714285716e-05, + "loss": 1.2773, + "step": 429 + }, + { + "epoch": 2.4571428571428573, + "grad_norm": 44.461082458496094, + "learning_rate": 2.4571428571428572e-05, + "loss": 1.0687, + "step": 430 + }, + { + "epoch": 2.4628571428571426, + "grad_norm": 34.10036087036133, + "learning_rate": 2.4628571428571428e-05, + "loss": 1.1176, + "step": 431 + }, + { + "epoch": 2.4685714285714284, + "grad_norm": 72.3791732788086, + "learning_rate": 2.4685714285714288e-05, + "loss": 0.8924, + "step": 432 + }, + { + "epoch": 2.474285714285714, + "grad_norm": 46.446556091308594, + "learning_rate": 2.4742857142857147e-05, + "loss": 1.203, + "step": 433 + }, + { + "epoch": 2.48, + "grad_norm": 32.066680908203125, + "learning_rate": 2.48e-05, + "loss": 1.2535, + "step": 434 + }, + { + "epoch": 2.4857142857142858, + "grad_norm": 46.31652069091797, + "learning_rate": 2.485714285714286e-05, + "loss": 1.5904, + "step": 435 + }, + { + "epoch": 2.4914285714285715, + "grad_norm": 42.79632568359375, + "learning_rate": 2.4914285714285715e-05, + "loss": 1.2958, + "step": 436 + }, + { + "epoch": 2.4971428571428573, + "grad_norm": 30.193653106689453, + "learning_rate": 2.4971428571428575e-05, + "loss": 1.0666, + "step": 437 + }, + { + "epoch": 2.5028571428571427, + "grad_norm": 78.6703872680664, + "learning_rate": 2.5028571428571428e-05, + "loss": 1.0003, + "step": 438 + }, + { + "epoch": 2.5085714285714285, + "grad_norm": 49.08283615112305, + "learning_rate": 2.5085714285714284e-05, + "loss": 0.9589, + "step": 439 + }, + { + "epoch": 2.5142857142857142, + "grad_norm": 27.450288772583008, + "learning_rate": 2.5142857142857147e-05, + "loss": 1.1667, + "step": 440 + }, + { + "epoch": 2.52, + "grad_norm": 120.01811981201172, + "learning_rate": 2.5200000000000003e-05, + "loss": 1.3021, + "step": 441 + }, + { + "epoch": 2.525714285714286, + "grad_norm": 54.62894058227539, + "learning_rate": 2.5257142857142855e-05, + "loss": 1.0864, + "step": 442 + }, + { + "epoch": 2.5314285714285716, + "grad_norm": 135.79263305664062, + "learning_rate": 2.5314285714285718e-05, + "loss": 1.1455, + "step": 443 + }, + { + "epoch": 2.5371428571428574, + "grad_norm": 32.32933807373047, + "learning_rate": 2.5371428571428574e-05, + "loss": 1.2702, + "step": 444 + }, + { + "epoch": 2.5428571428571427, + "grad_norm": 67.88202667236328, + "learning_rate": 2.542857142857143e-05, + "loss": 1.12, + "step": 445 + }, + { + "epoch": 2.5485714285714285, + "grad_norm": 54.43214416503906, + "learning_rate": 2.5485714285714287e-05, + "loss": 0.9457, + "step": 446 + }, + { + "epoch": 2.5542857142857143, + "grad_norm": 57.02019500732422, + "learning_rate": 2.5542857142857146e-05, + "loss": 1.0577, + "step": 447 + }, + { + "epoch": 2.56, + "grad_norm": 91.19580078125, + "learning_rate": 2.5600000000000002e-05, + "loss": 0.9295, + "step": 448 + }, + { + "epoch": 2.565714285714286, + "grad_norm": 70.9061279296875, + "learning_rate": 2.565714285714286e-05, + "loss": 1.1014, + "step": 449 + }, + { + "epoch": 2.571428571428571, + "grad_norm": 62.63069152832031, + "learning_rate": 2.5714285714285714e-05, + "loss": 0.9374, + "step": 450 + }, + { + "epoch": 2.5771428571428574, + "grad_norm": 54.828643798828125, + "learning_rate": 2.5771428571428574e-05, + "loss": 0.9032, + "step": 451 + }, + { + "epoch": 2.5828571428571427, + "grad_norm": 64.81256866455078, + "learning_rate": 2.582857142857143e-05, + "loss": 1.3023, + "step": 452 + }, + { + "epoch": 2.5885714285714285, + "grad_norm": 79.58660125732422, + "learning_rate": 2.5885714285714286e-05, + "loss": 0.849, + "step": 453 + }, + { + "epoch": 2.5942857142857143, + "grad_norm": 39.68455505371094, + "learning_rate": 2.5942857142857146e-05, + "loss": 1.2345, + "step": 454 + }, + { + "epoch": 2.6, + "grad_norm": 39.04762649536133, + "learning_rate": 2.6000000000000002e-05, + "loss": 1.0372, + "step": 455 + }, + { + "epoch": 2.605714285714286, + "grad_norm": 25.01093864440918, + "learning_rate": 2.6057142857142858e-05, + "loss": 0.9171, + "step": 456 + }, + { + "epoch": 2.611428571428571, + "grad_norm": 35.115135192871094, + "learning_rate": 2.6114285714285714e-05, + "loss": 1.1131, + "step": 457 + }, + { + "epoch": 2.617142857142857, + "grad_norm": 73.82764434814453, + "learning_rate": 2.6171428571428574e-05, + "loss": 1.02, + "step": 458 + }, + { + "epoch": 2.6228571428571428, + "grad_norm": 34.32424545288086, + "learning_rate": 2.622857142857143e-05, + "loss": 0.8844, + "step": 459 + }, + { + "epoch": 2.6285714285714286, + "grad_norm": 28.010997772216797, + "learning_rate": 2.6285714285714286e-05, + "loss": 1.1304, + "step": 460 + }, + { + "epoch": 2.6342857142857143, + "grad_norm": 49.68282699584961, + "learning_rate": 2.6342857142857142e-05, + "loss": 0.8364, + "step": 461 + }, + { + "epoch": 2.64, + "grad_norm": 30.49330711364746, + "learning_rate": 2.64e-05, + "loss": 1.0332, + "step": 462 + }, + { + "epoch": 2.645714285714286, + "grad_norm": 54.5880012512207, + "learning_rate": 2.6457142857142857e-05, + "loss": 1.0662, + "step": 463 + }, + { + "epoch": 2.6514285714285712, + "grad_norm": 33.37671661376953, + "learning_rate": 2.6514285714285714e-05, + "loss": 0.9994, + "step": 464 + }, + { + "epoch": 2.657142857142857, + "grad_norm": 83.88538360595703, + "learning_rate": 2.6571428571428576e-05, + "loss": 1.0517, + "step": 465 + }, + { + "epoch": 2.662857142857143, + "grad_norm": 43.34347152709961, + "learning_rate": 2.662857142857143e-05, + "loss": 1.0674, + "step": 466 + }, + { + "epoch": 2.6685714285714286, + "grad_norm": 60.542274475097656, + "learning_rate": 2.6685714285714285e-05, + "loss": 1.036, + "step": 467 + }, + { + "epoch": 2.6742857142857144, + "grad_norm": 25.915237426757812, + "learning_rate": 2.674285714285714e-05, + "loss": 1.033, + "step": 468 + }, + { + "epoch": 2.68, + "grad_norm": 25.848262786865234, + "learning_rate": 2.6800000000000004e-05, + "loss": 0.7942, + "step": 469 + }, + { + "epoch": 2.685714285714286, + "grad_norm": 36.423561096191406, + "learning_rate": 2.6857142857142857e-05, + "loss": 1.1182, + "step": 470 + }, + { + "epoch": 2.6914285714285713, + "grad_norm": 35.71501922607422, + "learning_rate": 2.6914285714285713e-05, + "loss": 0.9274, + "step": 471 + }, + { + "epoch": 2.697142857142857, + "grad_norm": 39.506500244140625, + "learning_rate": 2.6971428571428576e-05, + "loss": 0.7635, + "step": 472 + }, + { + "epoch": 2.702857142857143, + "grad_norm": 56.73497772216797, + "learning_rate": 2.7028571428571432e-05, + "loss": 1.04, + "step": 473 + }, + { + "epoch": 2.7085714285714286, + "grad_norm": 37.4494743347168, + "learning_rate": 2.7085714285714285e-05, + "loss": 0.956, + "step": 474 + }, + { + "epoch": 2.7142857142857144, + "grad_norm": 42.21010971069336, + "learning_rate": 2.714285714285714e-05, + "loss": 0.9411, + "step": 475 + }, + { + "epoch": 2.7199999999999998, + "grad_norm": 34.31499099731445, + "learning_rate": 2.7200000000000004e-05, + "loss": 1.1114, + "step": 476 + }, + { + "epoch": 2.725714285714286, + "grad_norm": 53.104976654052734, + "learning_rate": 2.725714285714286e-05, + "loss": 1.0545, + "step": 477 + }, + { + "epoch": 2.7314285714285713, + "grad_norm": 40.905887603759766, + "learning_rate": 2.7314285714285716e-05, + "loss": 1.1411, + "step": 478 + }, + { + "epoch": 2.737142857142857, + "grad_norm": 29.45627212524414, + "learning_rate": 2.737142857142857e-05, + "loss": 1.0638, + "step": 479 + }, + { + "epoch": 2.742857142857143, + "grad_norm": 41.70409393310547, + "learning_rate": 2.742857142857143e-05, + "loss": 0.9043, + "step": 480 + }, + { + "epoch": 2.7485714285714287, + "grad_norm": 46.97590637207031, + "learning_rate": 2.7485714285714288e-05, + "loss": 0.9821, + "step": 481 + }, + { + "epoch": 2.7542857142857144, + "grad_norm": 64.2392807006836, + "learning_rate": 2.7542857142857144e-05, + "loss": 0.9294, + "step": 482 + }, + { + "epoch": 2.76, + "grad_norm": 30.05347442626953, + "learning_rate": 2.7600000000000003e-05, + "loss": 1.1204, + "step": 483 + }, + { + "epoch": 2.7657142857142856, + "grad_norm": 55.71738815307617, + "learning_rate": 2.765714285714286e-05, + "loss": 0.9528, + "step": 484 + }, + { + "epoch": 2.7714285714285714, + "grad_norm": 73.06256866455078, + "learning_rate": 2.7714285714285716e-05, + "loss": 0.9858, + "step": 485 + }, + { + "epoch": 2.777142857142857, + "grad_norm": 91.91522979736328, + "learning_rate": 2.7771428571428572e-05, + "loss": 1.0217, + "step": 486 + }, + { + "epoch": 2.782857142857143, + "grad_norm": 38.642330169677734, + "learning_rate": 2.782857142857143e-05, + "loss": 0.7838, + "step": 487 + }, + { + "epoch": 2.7885714285714287, + "grad_norm": 44.415470123291016, + "learning_rate": 2.7885714285714287e-05, + "loss": 0.8804, + "step": 488 + }, + { + "epoch": 2.7942857142857145, + "grad_norm": 45.02664566040039, + "learning_rate": 2.7942857142857143e-05, + "loss": 1.4952, + "step": 489 + }, + { + "epoch": 2.8, + "grad_norm": 66.58822631835938, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.8409, + "step": 490 + }, + { + "epoch": 2.8057142857142856, + "grad_norm": 35.23710632324219, + "learning_rate": 2.805714285714286e-05, + "loss": 1.3371, + "step": 491 + }, + { + "epoch": 2.8114285714285714, + "grad_norm": 41.119258880615234, + "learning_rate": 2.8114285714285715e-05, + "loss": 0.9477, + "step": 492 + }, + { + "epoch": 2.817142857142857, + "grad_norm": 223.302734375, + "learning_rate": 2.817142857142857e-05, + "loss": 0.8615, + "step": 493 + }, + { + "epoch": 2.822857142857143, + "grad_norm": 59.39470672607422, + "learning_rate": 2.822857142857143e-05, + "loss": 1.1558, + "step": 494 + }, + { + "epoch": 2.8285714285714287, + "grad_norm": 45.34219741821289, + "learning_rate": 2.8285714285714287e-05, + "loss": 0.7759, + "step": 495 + }, + { + "epoch": 2.8342857142857145, + "grad_norm": 36.19575119018555, + "learning_rate": 2.8342857142857143e-05, + "loss": 0.9873, + "step": 496 + }, + { + "epoch": 2.84, + "grad_norm": 77.39373779296875, + "learning_rate": 2.84e-05, + "loss": 0.8189, + "step": 497 + }, + { + "epoch": 2.8457142857142856, + "grad_norm": 60.195552825927734, + "learning_rate": 2.845714285714286e-05, + "loss": 0.9145, + "step": 498 + }, + { + "epoch": 2.8514285714285714, + "grad_norm": 64.12262725830078, + "learning_rate": 2.8514285714285715e-05, + "loss": 0.8064, + "step": 499 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 33.90835952758789, + "learning_rate": 2.857142857142857e-05, + "loss": 0.9596, + "step": 500 + }, + { + "epoch": 2.862857142857143, + "grad_norm": 317.7658996582031, + "learning_rate": 2.8628571428571434e-05, + "loss": 1.1806, + "step": 501 + }, + { + "epoch": 2.8685714285714283, + "grad_norm": 51.08092498779297, + "learning_rate": 2.8685714285714286e-05, + "loss": 1.0974, + "step": 502 + }, + { + "epoch": 2.8742857142857146, + "grad_norm": 59.075836181640625, + "learning_rate": 2.8742857142857143e-05, + "loss": 1.0331, + "step": 503 + }, + { + "epoch": 2.88, + "grad_norm": 52.50455093383789, + "learning_rate": 2.88e-05, + "loss": 0.9406, + "step": 504 + }, + { + "epoch": 2.8857142857142857, + "grad_norm": 60.88606643676758, + "learning_rate": 2.885714285714286e-05, + "loss": 0.9156, + "step": 505 + }, + { + "epoch": 2.8914285714285715, + "grad_norm": 87.92547607421875, + "learning_rate": 2.8914285714285714e-05, + "loss": 1.2719, + "step": 506 + }, + { + "epoch": 2.8971428571428572, + "grad_norm": 39.06029510498047, + "learning_rate": 2.897142857142857e-05, + "loss": 1.0777, + "step": 507 + }, + { + "epoch": 2.902857142857143, + "grad_norm": 194.26075744628906, + "learning_rate": 2.9028571428571427e-05, + "loss": 0.9401, + "step": 508 + }, + { + "epoch": 2.9085714285714284, + "grad_norm": 56.89970016479492, + "learning_rate": 2.908571428571429e-05, + "loss": 0.9576, + "step": 509 + }, + { + "epoch": 2.914285714285714, + "grad_norm": 38.246788024902344, + "learning_rate": 2.9142857142857146e-05, + "loss": 1.0757, + "step": 510 + }, + { + "epoch": 2.92, + "grad_norm": 39.16490936279297, + "learning_rate": 2.9199999999999998e-05, + "loss": 1.2789, + "step": 511 + }, + { + "epoch": 2.9257142857142857, + "grad_norm": 68.37799835205078, + "learning_rate": 2.925714285714286e-05, + "loss": 0.7949, + "step": 512 + }, + { + "epoch": 2.9314285714285715, + "grad_norm": 46.202537536621094, + "learning_rate": 2.9314285714285717e-05, + "loss": 0.9752, + "step": 513 + }, + { + "epoch": 2.9371428571428573, + "grad_norm": 25.075580596923828, + "learning_rate": 2.9371428571428573e-05, + "loss": 0.9919, + "step": 514 + }, + { + "epoch": 2.942857142857143, + "grad_norm": 45.28491973876953, + "learning_rate": 2.9428571428571426e-05, + "loss": 1.0573, + "step": 515 + }, + { + "epoch": 2.9485714285714284, + "grad_norm": 69.61450958251953, + "learning_rate": 2.948571428571429e-05, + "loss": 1.1779, + "step": 516 + }, + { + "epoch": 2.954285714285714, + "grad_norm": 32.18259048461914, + "learning_rate": 2.9542857142857145e-05, + "loss": 0.8433, + "step": 517 + }, + { + "epoch": 2.96, + "grad_norm": 46.77888488769531, + "learning_rate": 2.96e-05, + "loss": 1.1483, + "step": 518 + }, + { + "epoch": 2.9657142857142857, + "grad_norm": 42.754432678222656, + "learning_rate": 2.965714285714286e-05, + "loss": 0.9295, + "step": 519 + }, + { + "epoch": 2.9714285714285715, + "grad_norm": 48.782291412353516, + "learning_rate": 2.9714285714285717e-05, + "loss": 1.3811, + "step": 520 + }, + { + "epoch": 2.977142857142857, + "grad_norm": 76.11039733886719, + "learning_rate": 2.9771428571428573e-05, + "loss": 1.3558, + "step": 521 + }, + { + "epoch": 2.982857142857143, + "grad_norm": 63.38977813720703, + "learning_rate": 2.982857142857143e-05, + "loss": 1.1242, + "step": 522 + }, + { + "epoch": 2.9885714285714284, + "grad_norm": 37.63509750366211, + "learning_rate": 2.988571428571429e-05, + "loss": 1.2458, + "step": 523 + }, + { + "epoch": 2.994285714285714, + "grad_norm": 68.86089324951172, + "learning_rate": 2.9942857142857145e-05, + "loss": 0.8439, + "step": 524 + }, + { + "epoch": 3.0, + "grad_norm": 50.35411834716797, + "learning_rate": 3e-05, + "loss": 0.8578, + "step": 525 + }, + { + "epoch": 3.0, + "eval_classes": 0, + "eval_loss": 0.8842275142669678, + "eval_map": 0.5188, + "eval_map_50": 0.5846, + "eval_map_75": 0.5479, + "eval_map_large": 0.5189, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.5188, + "eval_map_small": -1.0, + "eval_mar_1": 0.6949, + "eval_mar_10": 0.9317, + "eval_mar_100": 0.9768, + "eval_mar_100_per_class": 0.9768, + "eval_mar_large": 0.9768, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 19.1252, + "eval_samples_per_second": 15.372, + "eval_steps_per_second": 1.935, + "step": 525 + }, + { + "epoch": 3.005714285714286, + "grad_norm": 47.68229293823242, + "learning_rate": 3.0057142857142857e-05, + "loss": 0.7792, + "step": 526 + }, + { + "epoch": 3.0114285714285716, + "grad_norm": 59.04511260986328, + "learning_rate": 3.0114285714285716e-05, + "loss": 0.85, + "step": 527 + }, + { + "epoch": 3.0171428571428573, + "grad_norm": 61.92051315307617, + "learning_rate": 3.0171428571428572e-05, + "loss": 0.9446, + "step": 528 + }, + { + "epoch": 3.0228571428571427, + "grad_norm": 30.297792434692383, + "learning_rate": 3.022857142857143e-05, + "loss": 1.1596, + "step": 529 + }, + { + "epoch": 3.0285714285714285, + "grad_norm": 54.8390007019043, + "learning_rate": 3.0285714285714288e-05, + "loss": 0.852, + "step": 530 + }, + { + "epoch": 3.0342857142857143, + "grad_norm": 45.49747848510742, + "learning_rate": 3.0342857142857144e-05, + "loss": 0.8763, + "step": 531 + }, + { + "epoch": 3.04, + "grad_norm": 52.076026916503906, + "learning_rate": 3.04e-05, + "loss": 1.164, + "step": 532 + }, + { + "epoch": 3.045714285714286, + "grad_norm": 28.280960083007812, + "learning_rate": 3.0457142857142856e-05, + "loss": 0.8567, + "step": 533 + }, + { + "epoch": 3.0514285714285716, + "grad_norm": 43.7817268371582, + "learning_rate": 3.0514285714285716e-05, + "loss": 1.1253, + "step": 534 + }, + { + "epoch": 3.057142857142857, + "grad_norm": 24.932104110717773, + "learning_rate": 3.057142857142857e-05, + "loss": 1.0088, + "step": 535 + }, + { + "epoch": 3.0628571428571427, + "grad_norm": 74.39136505126953, + "learning_rate": 3.062857142857143e-05, + "loss": 0.7345, + "step": 536 + }, + { + "epoch": 3.0685714285714285, + "grad_norm": 30.405521392822266, + "learning_rate": 3.068571428571429e-05, + "loss": 0.7931, + "step": 537 + }, + { + "epoch": 3.0742857142857143, + "grad_norm": 65.0182876586914, + "learning_rate": 3.0742857142857144e-05, + "loss": 0.7928, + "step": 538 + }, + { + "epoch": 3.08, + "grad_norm": 98.8231201171875, + "learning_rate": 3.08e-05, + "loss": 0.9738, + "step": 539 + }, + { + "epoch": 3.085714285714286, + "grad_norm": 54.99164581298828, + "learning_rate": 3.0857142857142856e-05, + "loss": 1.0894, + "step": 540 + }, + { + "epoch": 3.0914285714285716, + "grad_norm": 44.222469329833984, + "learning_rate": 3.0914285714285715e-05, + "loss": 0.867, + "step": 541 + }, + { + "epoch": 3.097142857142857, + "grad_norm": 73.86441040039062, + "learning_rate": 3.0971428571428575e-05, + "loss": 0.9246, + "step": 542 + }, + { + "epoch": 3.1028571428571428, + "grad_norm": 28.863567352294922, + "learning_rate": 3.102857142857143e-05, + "loss": 0.9498, + "step": 543 + }, + { + "epoch": 3.1085714285714285, + "grad_norm": 66.83555603027344, + "learning_rate": 3.108571428571429e-05, + "loss": 1.0356, + "step": 544 + }, + { + "epoch": 3.1142857142857143, + "grad_norm": 41.72322463989258, + "learning_rate": 3.114285714285715e-05, + "loss": 0.9656, + "step": 545 + }, + { + "epoch": 3.12, + "grad_norm": 49.59086608886719, + "learning_rate": 3.12e-05, + "loss": 0.8414, + "step": 546 + }, + { + "epoch": 3.125714285714286, + "grad_norm": 45.67478942871094, + "learning_rate": 3.125714285714286e-05, + "loss": 1.1142, + "step": 547 + }, + { + "epoch": 3.1314285714285712, + "grad_norm": 57.42824172973633, + "learning_rate": 3.131428571428572e-05, + "loss": 1.0558, + "step": 548 + }, + { + "epoch": 3.137142857142857, + "grad_norm": 51.68975830078125, + "learning_rate": 3.137142857142857e-05, + "loss": 0.7937, + "step": 549 + }, + { + "epoch": 3.142857142857143, + "grad_norm": 46.31576919555664, + "learning_rate": 3.142857142857143e-05, + "loss": 0.9352, + "step": 550 + }, + { + "epoch": 3.1485714285714286, + "grad_norm": 38.15770721435547, + "learning_rate": 3.148571428571428e-05, + "loss": 0.8738, + "step": 551 + }, + { + "epoch": 3.1542857142857144, + "grad_norm": 74.9398193359375, + "learning_rate": 3.154285714285714e-05, + "loss": 1.0197, + "step": 552 + }, + { + "epoch": 3.16, + "grad_norm": 92.85323333740234, + "learning_rate": 3.16e-05, + "loss": 0.857, + "step": 553 + }, + { + "epoch": 3.1657142857142855, + "grad_norm": 55.174171447753906, + "learning_rate": 3.1657142857142855e-05, + "loss": 0.8328, + "step": 554 + }, + { + "epoch": 3.1714285714285713, + "grad_norm": 41.23771286010742, + "learning_rate": 3.1714285714285715e-05, + "loss": 1.0083, + "step": 555 + }, + { + "epoch": 3.177142857142857, + "grad_norm": 37.808570861816406, + "learning_rate": 3.1771428571428574e-05, + "loss": 0.9961, + "step": 556 + }, + { + "epoch": 3.182857142857143, + "grad_norm": 119.2252426147461, + "learning_rate": 3.182857142857143e-05, + "loss": 0.9984, + "step": 557 + }, + { + "epoch": 3.1885714285714286, + "grad_norm": 52.03147888183594, + "learning_rate": 3.1885714285714286e-05, + "loss": 1.1348, + "step": 558 + }, + { + "epoch": 3.1942857142857144, + "grad_norm": 43.718875885009766, + "learning_rate": 3.1942857142857146e-05, + "loss": 0.9218, + "step": 559 + }, + { + "epoch": 3.2, + "grad_norm": 33.92399978637695, + "learning_rate": 3.2000000000000005e-05, + "loss": 1.4163, + "step": 560 + }, + { + "epoch": 3.2057142857142855, + "grad_norm": 63.245704650878906, + "learning_rate": 3.205714285714286e-05, + "loss": 0.9589, + "step": 561 + }, + { + "epoch": 3.2114285714285713, + "grad_norm": 26.542739868164062, + "learning_rate": 3.211428571428571e-05, + "loss": 0.9147, + "step": 562 + }, + { + "epoch": 3.217142857142857, + "grad_norm": 116.9144287109375, + "learning_rate": 3.217142857142858e-05, + "loss": 1.251, + "step": 563 + }, + { + "epoch": 3.222857142857143, + "grad_norm": 45.26572036743164, + "learning_rate": 3.222857142857143e-05, + "loss": 0.7846, + "step": 564 + }, + { + "epoch": 3.2285714285714286, + "grad_norm": 40.648685455322266, + "learning_rate": 3.228571428571428e-05, + "loss": 0.778, + "step": 565 + }, + { + "epoch": 3.2342857142857144, + "grad_norm": 79.68229675292969, + "learning_rate": 3.234285714285715e-05, + "loss": 1.1479, + "step": 566 + }, + { + "epoch": 3.24, + "grad_norm": 62.468353271484375, + "learning_rate": 3.24e-05, + "loss": 0.873, + "step": 567 + }, + { + "epoch": 3.2457142857142856, + "grad_norm": 46.446678161621094, + "learning_rate": 3.245714285714286e-05, + "loss": 0.7665, + "step": 568 + }, + { + "epoch": 3.2514285714285713, + "grad_norm": 30.566055297851562, + "learning_rate": 3.2514285714285714e-05, + "loss": 0.9141, + "step": 569 + }, + { + "epoch": 3.257142857142857, + "grad_norm": 43.58727264404297, + "learning_rate": 3.257142857142857e-05, + "loss": 0.8457, + "step": 570 + }, + { + "epoch": 3.262857142857143, + "grad_norm": 38.891563415527344, + "learning_rate": 3.262857142857143e-05, + "loss": 0.7681, + "step": 571 + }, + { + "epoch": 3.2685714285714287, + "grad_norm": 73.75027465820312, + "learning_rate": 3.2685714285714285e-05, + "loss": 1.3875, + "step": 572 + }, + { + "epoch": 3.2742857142857145, + "grad_norm": 21.43327522277832, + "learning_rate": 3.2742857142857145e-05, + "loss": 0.948, + "step": 573 + }, + { + "epoch": 3.2800000000000002, + "grad_norm": 47.069461822509766, + "learning_rate": 3.2800000000000004e-05, + "loss": 0.8676, + "step": 574 + }, + { + "epoch": 3.2857142857142856, + "grad_norm": 36.93059158325195, + "learning_rate": 3.285714285714286e-05, + "loss": 1.0315, + "step": 575 + }, + { + "epoch": 3.2914285714285714, + "grad_norm": 38.3972282409668, + "learning_rate": 3.291428571428572e-05, + "loss": 0.92, + "step": 576 + }, + { + "epoch": 3.297142857142857, + "grad_norm": 32.330604553222656, + "learning_rate": 3.2971428571428576e-05, + "loss": 1.0177, + "step": 577 + }, + { + "epoch": 3.302857142857143, + "grad_norm": 60.15842819213867, + "learning_rate": 3.302857142857143e-05, + "loss": 0.856, + "step": 578 + }, + { + "epoch": 3.3085714285714287, + "grad_norm": 29.508712768554688, + "learning_rate": 3.308571428571429e-05, + "loss": 0.9265, + "step": 579 + }, + { + "epoch": 3.314285714285714, + "grad_norm": 26.6241512298584, + "learning_rate": 3.314285714285714e-05, + "loss": 1.0629, + "step": 580 + }, + { + "epoch": 3.32, + "grad_norm": 55.71906280517578, + "learning_rate": 3.32e-05, + "loss": 0.8574, + "step": 581 + }, + { + "epoch": 3.3257142857142856, + "grad_norm": 32.77925109863281, + "learning_rate": 3.325714285714286e-05, + "loss": 0.913, + "step": 582 + }, + { + "epoch": 3.3314285714285714, + "grad_norm": 391.8411560058594, + "learning_rate": 3.331428571428571e-05, + "loss": 0.8752, + "step": 583 + }, + { + "epoch": 3.337142857142857, + "grad_norm": 88.31011199951172, + "learning_rate": 3.337142857142857e-05, + "loss": 0.9255, + "step": 584 + }, + { + "epoch": 3.342857142857143, + "grad_norm": 45.067344665527344, + "learning_rate": 3.342857142857143e-05, + "loss": 0.989, + "step": 585 + }, + { + "epoch": 3.3485714285714288, + "grad_norm": 26.812660217285156, + "learning_rate": 3.3485714285714285e-05, + "loss": 0.7538, + "step": 586 + }, + { + "epoch": 3.354285714285714, + "grad_norm": 62.75189208984375, + "learning_rate": 3.3542857142857144e-05, + "loss": 0.9422, + "step": 587 + }, + { + "epoch": 3.36, + "grad_norm": 47.160892486572266, + "learning_rate": 3.3600000000000004e-05, + "loss": 1.0516, + "step": 588 + }, + { + "epoch": 3.3657142857142857, + "grad_norm": 45.98322677612305, + "learning_rate": 3.3657142857142856e-05, + "loss": 1.0043, + "step": 589 + }, + { + "epoch": 3.3714285714285714, + "grad_norm": 49.24179458618164, + "learning_rate": 3.3714285714285716e-05, + "loss": 0.8443, + "step": 590 + }, + { + "epoch": 3.3771428571428572, + "grad_norm": 28.958084106445312, + "learning_rate": 3.377142857142857e-05, + "loss": 1.1926, + "step": 591 + }, + { + "epoch": 3.382857142857143, + "grad_norm": 47.162452697753906, + "learning_rate": 3.3828571428571435e-05, + "loss": 0.796, + "step": 592 + }, + { + "epoch": 3.388571428571429, + "grad_norm": 98.75343322753906, + "learning_rate": 3.388571428571429e-05, + "loss": 0.8616, + "step": 593 + }, + { + "epoch": 3.394285714285714, + "grad_norm": 40.074527740478516, + "learning_rate": 3.394285714285714e-05, + "loss": 0.9336, + "step": 594 + }, + { + "epoch": 3.4, + "grad_norm": 37.29640197753906, + "learning_rate": 3.4000000000000007e-05, + "loss": 1.0572, + "step": 595 + }, + { + "epoch": 3.4057142857142857, + "grad_norm": 27.140880584716797, + "learning_rate": 3.405714285714286e-05, + "loss": 1.0782, + "step": 596 + }, + { + "epoch": 3.4114285714285715, + "grad_norm": 40.091896057128906, + "learning_rate": 3.411428571428571e-05, + "loss": 1.0635, + "step": 597 + }, + { + "epoch": 3.4171428571428573, + "grad_norm": 24.463085174560547, + "learning_rate": 3.417142857142857e-05, + "loss": 0.9266, + "step": 598 + }, + { + "epoch": 3.422857142857143, + "grad_norm": 30.32758903503418, + "learning_rate": 3.422857142857143e-05, + "loss": 0.942, + "step": 599 + }, + { + "epoch": 3.4285714285714284, + "grad_norm": 39.49159622192383, + "learning_rate": 3.428571428571429e-05, + "loss": 0.8498, + "step": 600 + }, + { + "epoch": 3.434285714285714, + "grad_norm": 39.12627410888672, + "learning_rate": 3.434285714285714e-05, + "loss": 0.6839, + "step": 601 + }, + { + "epoch": 3.44, + "grad_norm": 38.3946647644043, + "learning_rate": 3.4399999999999996e-05, + "loss": 0.971, + "step": 602 + }, + { + "epoch": 3.4457142857142857, + "grad_norm": 73.4950180053711, + "learning_rate": 3.445714285714286e-05, + "loss": 1.1149, + "step": 603 + }, + { + "epoch": 3.4514285714285715, + "grad_norm": 84.41697692871094, + "learning_rate": 3.4514285714285715e-05, + "loss": 0.8855, + "step": 604 + }, + { + "epoch": 3.4571428571428573, + "grad_norm": 32.518768310546875, + "learning_rate": 3.4571428571428574e-05, + "loss": 1.0337, + "step": 605 + }, + { + "epoch": 3.4628571428571426, + "grad_norm": 45.78841781616211, + "learning_rate": 3.4628571428571434e-05, + "loss": 0.9791, + "step": 606 + }, + { + "epoch": 3.4685714285714284, + "grad_norm": 45.05651092529297, + "learning_rate": 3.468571428571429e-05, + "loss": 1.1856, + "step": 607 + }, + { + "epoch": 3.474285714285714, + "grad_norm": 129.0909881591797, + "learning_rate": 3.4742857142857146e-05, + "loss": 0.809, + "step": 608 + }, + { + "epoch": 3.48, + "grad_norm": 49.62913131713867, + "learning_rate": 3.48e-05, + "loss": 0.9248, + "step": 609 + }, + { + "epoch": 3.4857142857142858, + "grad_norm": 96.46662139892578, + "learning_rate": 3.485714285714286e-05, + "loss": 0.9165, + "step": 610 + }, + { + "epoch": 3.4914285714285715, + "grad_norm": 66.09587097167969, + "learning_rate": 3.491428571428572e-05, + "loss": 0.7754, + "step": 611 + }, + { + "epoch": 3.4971428571428573, + "grad_norm": 54.851680755615234, + "learning_rate": 3.497142857142857e-05, + "loss": 0.9728, + "step": 612 + }, + { + "epoch": 3.5028571428571427, + "grad_norm": 44.88764953613281, + "learning_rate": 3.502857142857143e-05, + "loss": 1.0519, + "step": 613 + }, + { + "epoch": 3.5085714285714285, + "grad_norm": 40.02257537841797, + "learning_rate": 3.508571428571429e-05, + "loss": 0.9248, + "step": 614 + }, + { + "epoch": 3.5142857142857142, + "grad_norm": 78.39165496826172, + "learning_rate": 3.514285714285714e-05, + "loss": 1.0941, + "step": 615 + }, + { + "epoch": 3.52, + "grad_norm": 41.34626770019531, + "learning_rate": 3.52e-05, + "loss": 0.9319, + "step": 616 + }, + { + "epoch": 3.525714285714286, + "grad_norm": 41.48112869262695, + "learning_rate": 3.525714285714286e-05, + "loss": 0.9119, + "step": 617 + }, + { + "epoch": 3.5314285714285716, + "grad_norm": 117.76349639892578, + "learning_rate": 3.5314285714285714e-05, + "loss": 0.6957, + "step": 618 + }, + { + "epoch": 3.5371428571428574, + "grad_norm": 55.89582061767578, + "learning_rate": 3.5371428571428574e-05, + "loss": 1.2014, + "step": 619 + }, + { + "epoch": 3.5428571428571427, + "grad_norm": 47.29049301147461, + "learning_rate": 3.5428571428571426e-05, + "loss": 0.9072, + "step": 620 + }, + { + "epoch": 3.5485714285714285, + "grad_norm": 52.945316314697266, + "learning_rate": 3.5485714285714286e-05, + "loss": 1.0195, + "step": 621 + }, + { + "epoch": 3.5542857142857143, + "grad_norm": 65.0621109008789, + "learning_rate": 3.5542857142857145e-05, + "loss": 0.8933, + "step": 622 + }, + { + "epoch": 3.56, + "grad_norm": 47.37184524536133, + "learning_rate": 3.56e-05, + "loss": 0.7727, + "step": 623 + }, + { + "epoch": 3.565714285714286, + "grad_norm": 46.98302459716797, + "learning_rate": 3.5657142857142864e-05, + "loss": 0.9441, + "step": 624 + }, + { + "epoch": 3.571428571428571, + "grad_norm": 41.932762145996094, + "learning_rate": 3.571428571428572e-05, + "loss": 1.0913, + "step": 625 + }, + { + "epoch": 3.5771428571428574, + "grad_norm": 80.59725189208984, + "learning_rate": 3.577142857142857e-05, + "loss": 0.7176, + "step": 626 + }, + { + "epoch": 3.5828571428571427, + "grad_norm": 33.14458465576172, + "learning_rate": 3.582857142857143e-05, + "loss": 1.0436, + "step": 627 + }, + { + "epoch": 3.5885714285714285, + "grad_norm": 52.75685119628906, + "learning_rate": 3.588571428571429e-05, + "loss": 0.9163, + "step": 628 + }, + { + "epoch": 3.5942857142857143, + "grad_norm": 31.633190155029297, + "learning_rate": 3.594285714285714e-05, + "loss": 1.1692, + "step": 629 + }, + { + "epoch": 3.6, + "grad_norm": 29.593358993530273, + "learning_rate": 3.6e-05, + "loss": 0.8321, + "step": 630 + }, + { + "epoch": 3.605714285714286, + "grad_norm": 88.72626495361328, + "learning_rate": 3.605714285714286e-05, + "loss": 1.0763, + "step": 631 + }, + { + "epoch": 3.611428571428571, + "grad_norm": 45.46866989135742, + "learning_rate": 3.611428571428572e-05, + "loss": 0.9867, + "step": 632 + }, + { + "epoch": 3.617142857142857, + "grad_norm": 39.636932373046875, + "learning_rate": 3.617142857142857e-05, + "loss": 0.8277, + "step": 633 + }, + { + "epoch": 3.6228571428571428, + "grad_norm": 48.63605880737305, + "learning_rate": 3.6228571428571425e-05, + "loss": 0.9193, + "step": 634 + }, + { + "epoch": 3.6285714285714286, + "grad_norm": 36.28878402709961, + "learning_rate": 3.628571428571429e-05, + "loss": 1.3838, + "step": 635 + }, + { + "epoch": 3.6342857142857143, + "grad_norm": 25.930580139160156, + "learning_rate": 3.6342857142857144e-05, + "loss": 0.814, + "step": 636 + }, + { + "epoch": 3.64, + "grad_norm": 56.88616943359375, + "learning_rate": 3.6400000000000004e-05, + "loss": 1.0445, + "step": 637 + }, + { + "epoch": 3.645714285714286, + "grad_norm": 38.71080017089844, + "learning_rate": 3.6457142857142857e-05, + "loss": 0.9755, + "step": 638 + }, + { + "epoch": 3.6514285714285712, + "grad_norm": 47.88688659667969, + "learning_rate": 3.6514285714285716e-05, + "loss": 1.0582, + "step": 639 + }, + { + "epoch": 3.657142857142857, + "grad_norm": 29.28702735900879, + "learning_rate": 3.6571428571428576e-05, + "loss": 0.845, + "step": 640 + }, + { + "epoch": 3.662857142857143, + "grad_norm": 19.560808181762695, + "learning_rate": 3.662857142857143e-05, + "loss": 0.9432, + "step": 641 + }, + { + "epoch": 3.6685714285714286, + "grad_norm": 28.74838638305664, + "learning_rate": 3.668571428571429e-05, + "loss": 1.2524, + "step": 642 + }, + { + "epoch": 3.6742857142857144, + "grad_norm": 100.5932388305664, + "learning_rate": 3.674285714285715e-05, + "loss": 0.9939, + "step": 643 + }, + { + "epoch": 3.68, + "grad_norm": 48.24791717529297, + "learning_rate": 3.68e-05, + "loss": 0.8666, + "step": 644 + }, + { + "epoch": 3.685714285714286, + "grad_norm": 38.138423919677734, + "learning_rate": 3.685714285714286e-05, + "loss": 0.9091, + "step": 645 + }, + { + "epoch": 3.6914285714285713, + "grad_norm": 149.0504913330078, + "learning_rate": 3.691428571428572e-05, + "loss": 1.1256, + "step": 646 + }, + { + "epoch": 3.697142857142857, + "grad_norm": 34.19496154785156, + "learning_rate": 3.697142857142857e-05, + "loss": 0.9972, + "step": 647 + }, + { + "epoch": 3.702857142857143, + "grad_norm": 33.34417724609375, + "learning_rate": 3.702857142857143e-05, + "loss": 0.7458, + "step": 648 + }, + { + "epoch": 3.7085714285714286, + "grad_norm": 26.71389389038086, + "learning_rate": 3.7085714285714284e-05, + "loss": 1.0144, + "step": 649 + }, + { + "epoch": 3.7142857142857144, + "grad_norm": 49.832374572753906, + "learning_rate": 3.7142857142857143e-05, + "loss": 0.8261, + "step": 650 + }, + { + "epoch": 3.7199999999999998, + "grad_norm": 50.95949172973633, + "learning_rate": 3.72e-05, + "loss": 0.7317, + "step": 651 + }, + { + "epoch": 3.725714285714286, + "grad_norm": 38.240806579589844, + "learning_rate": 3.7257142857142856e-05, + "loss": 0.9429, + "step": 652 + }, + { + "epoch": 3.7314285714285713, + "grad_norm": 102.51292419433594, + "learning_rate": 3.7314285714285715e-05, + "loss": 0.8528, + "step": 653 + }, + { + "epoch": 3.737142857142857, + "grad_norm": 66.62725830078125, + "learning_rate": 3.7371428571428575e-05, + "loss": 0.8035, + "step": 654 + }, + { + "epoch": 3.742857142857143, + "grad_norm": 39.304439544677734, + "learning_rate": 3.742857142857143e-05, + "loss": 0.8531, + "step": 655 + }, + { + "epoch": 3.7485714285714287, + "grad_norm": 41.29141616821289, + "learning_rate": 3.748571428571429e-05, + "loss": 0.9209, + "step": 656 + }, + { + "epoch": 3.7542857142857144, + "grad_norm": 42.41242599487305, + "learning_rate": 3.7542857142857146e-05, + "loss": 1.2478, + "step": 657 + }, + { + "epoch": 3.76, + "grad_norm": 31.831212997436523, + "learning_rate": 3.76e-05, + "loss": 1.1195, + "step": 658 + }, + { + "epoch": 3.7657142857142856, + "grad_norm": 74.06922149658203, + "learning_rate": 3.765714285714286e-05, + "loss": 1.0407, + "step": 659 + }, + { + "epoch": 3.7714285714285714, + "grad_norm": 54.0971794128418, + "learning_rate": 3.771428571428572e-05, + "loss": 0.9377, + "step": 660 + }, + { + "epoch": 3.777142857142857, + "grad_norm": 29.178192138671875, + "learning_rate": 3.777142857142858e-05, + "loss": 0.7888, + "step": 661 + }, + { + "epoch": 3.782857142857143, + "grad_norm": 52.03077697753906, + "learning_rate": 3.782857142857143e-05, + "loss": 0.9519, + "step": 662 + }, + { + "epoch": 3.7885714285714287, + "grad_norm": 31.274682998657227, + "learning_rate": 3.788571428571428e-05, + "loss": 0.7655, + "step": 663 + }, + { + "epoch": 3.7942857142857145, + "grad_norm": 78.56085968017578, + "learning_rate": 3.794285714285715e-05, + "loss": 0.8495, + "step": 664 + }, + { + "epoch": 3.8, + "grad_norm": 40.677398681640625, + "learning_rate": 3.8e-05, + "loss": 0.7555, + "step": 665 + }, + { + "epoch": 3.8057142857142856, + "grad_norm": 25.844785690307617, + "learning_rate": 3.8057142857142855e-05, + "loss": 0.9978, + "step": 666 + }, + { + "epoch": 3.8114285714285714, + "grad_norm": 38.2746467590332, + "learning_rate": 3.8114285714285714e-05, + "loss": 0.8147, + "step": 667 + }, + { + "epoch": 3.817142857142857, + "grad_norm": 85.11207580566406, + "learning_rate": 3.8171428571428574e-05, + "loss": 0.8933, + "step": 668 + }, + { + "epoch": 3.822857142857143, + "grad_norm": 79.5720443725586, + "learning_rate": 3.822857142857143e-05, + "loss": 0.8698, + "step": 669 + }, + { + "epoch": 3.8285714285714287, + "grad_norm": 66.55155181884766, + "learning_rate": 3.8285714285714286e-05, + "loss": 1.2136, + "step": 670 + }, + { + "epoch": 3.8342857142857145, + "grad_norm": 50.9844970703125, + "learning_rate": 3.8342857142857146e-05, + "loss": 1.1532, + "step": 671 + }, + { + "epoch": 3.84, + "grad_norm": 40.85847854614258, + "learning_rate": 3.8400000000000005e-05, + "loss": 1.304, + "step": 672 + }, + { + "epoch": 3.8457142857142856, + "grad_norm": 49.67957305908203, + "learning_rate": 3.845714285714286e-05, + "loss": 1.1971, + "step": 673 + }, + { + "epoch": 3.8514285714285714, + "grad_norm": 68.415771484375, + "learning_rate": 3.851428571428571e-05, + "loss": 1.0518, + "step": 674 + }, + { + "epoch": 3.857142857142857, + "grad_norm": 45.53511428833008, + "learning_rate": 3.857142857142858e-05, + "loss": 1.0611, + "step": 675 + }, + { + "epoch": 3.862857142857143, + "grad_norm": 42.6379280090332, + "learning_rate": 3.862857142857143e-05, + "loss": 1.096, + "step": 676 + }, + { + "epoch": 3.8685714285714283, + "grad_norm": 39.18658447265625, + "learning_rate": 3.868571428571429e-05, + "loss": 1.2532, + "step": 677 + }, + { + "epoch": 3.8742857142857146, + "grad_norm": 68.9410629272461, + "learning_rate": 3.874285714285715e-05, + "loss": 0.8739, + "step": 678 + }, + { + "epoch": 3.88, + "grad_norm": 53.28384780883789, + "learning_rate": 3.88e-05, + "loss": 0.9341, + "step": 679 + }, + { + "epoch": 3.8857142857142857, + "grad_norm": 30.44455909729004, + "learning_rate": 3.885714285714286e-05, + "loss": 0.8476, + "step": 680 + }, + { + "epoch": 3.8914285714285715, + "grad_norm": 60.77157974243164, + "learning_rate": 3.8914285714285713e-05, + "loss": 0.8708, + "step": 681 + }, + { + "epoch": 3.8971428571428572, + "grad_norm": 90.17772674560547, + "learning_rate": 3.897142857142857e-05, + "loss": 0.8231, + "step": 682 + }, + { + "epoch": 3.902857142857143, + "grad_norm": 133.7808837890625, + "learning_rate": 3.902857142857143e-05, + "loss": 1.1568, + "step": 683 + }, + { + "epoch": 3.9085714285714284, + "grad_norm": 64.70571899414062, + "learning_rate": 3.9085714285714285e-05, + "loss": 1.0223, + "step": 684 + }, + { + "epoch": 3.914285714285714, + "grad_norm": 29.478635787963867, + "learning_rate": 3.9142857142857145e-05, + "loss": 0.9871, + "step": 685 + }, + { + "epoch": 3.92, + "grad_norm": 56.8112678527832, + "learning_rate": 3.9200000000000004e-05, + "loss": 0.8216, + "step": 686 + }, + { + "epoch": 3.9257142857142857, + "grad_norm": 240.74072265625, + "learning_rate": 3.925714285714286e-05, + "loss": 1.1837, + "step": 687 + }, + { + "epoch": 3.9314285714285715, + "grad_norm": 26.388124465942383, + "learning_rate": 3.9314285714285716e-05, + "loss": 0.9203, + "step": 688 + }, + { + "epoch": 3.9371428571428573, + "grad_norm": 74.51782989501953, + "learning_rate": 3.9371428571428576e-05, + "loss": 0.7446, + "step": 689 + }, + { + "epoch": 3.942857142857143, + "grad_norm": 24.310230255126953, + "learning_rate": 3.942857142857143e-05, + "loss": 1.0372, + "step": 690 + }, + { + "epoch": 3.9485714285714284, + "grad_norm": 36.657264709472656, + "learning_rate": 3.948571428571429e-05, + "loss": 1.037, + "step": 691 + }, + { + "epoch": 3.954285714285714, + "grad_norm": 42.5634880065918, + "learning_rate": 3.954285714285714e-05, + "loss": 0.871, + "step": 692 + }, + { + "epoch": 3.96, + "grad_norm": 119.99665832519531, + "learning_rate": 3.960000000000001e-05, + "loss": 1.2884, + "step": 693 + }, + { + "epoch": 3.9657142857142857, + "grad_norm": 107.50180053710938, + "learning_rate": 3.965714285714286e-05, + "loss": 0.7411, + "step": 694 + }, + { + "epoch": 3.9714285714285715, + "grad_norm": 40.23571014404297, + "learning_rate": 3.971428571428571e-05, + "loss": 0.877, + "step": 695 + }, + { + "epoch": 3.977142857142857, + "grad_norm": 251.15298461914062, + "learning_rate": 3.977142857142857e-05, + "loss": 1.0882, + "step": 696 + }, + { + "epoch": 3.982857142857143, + "grad_norm": 24.243986129760742, + "learning_rate": 3.982857142857143e-05, + "loss": 0.8619, + "step": 697 + }, + { + "epoch": 3.9885714285714284, + "grad_norm": 38.35419464111328, + "learning_rate": 3.9885714285714284e-05, + "loss": 1.1251, + "step": 698 + }, + { + "epoch": 3.994285714285714, + "grad_norm": 27.93589973449707, + "learning_rate": 3.9942857142857144e-05, + "loss": 0.8057, + "step": 699 + }, + { + "epoch": 4.0, + "grad_norm": 27.46137046813965, + "learning_rate": 4e-05, + "loss": 0.8113, + "step": 700 + }, + { + "epoch": 4.0, + "eval_classes": 0, + "eval_loss": 0.9107489585876465, + "eval_map": 0.7488, + "eval_map_50": 0.8322, + "eval_map_75": 0.8001, + "eval_map_large": 0.7489, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.7488, + "eval_map_small": -1.0, + "eval_mar_1": 0.6825, + "eval_mar_10": 0.934, + "eval_mar_100": 0.9676, + "eval_mar_100_per_class": 0.9676, + "eval_mar_large": 0.9676, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 17.1506, + "eval_samples_per_second": 17.142, + "eval_steps_per_second": 2.157, + "step": 700 + }, + { + "epoch": 4.005714285714285, + "grad_norm": 43.37449645996094, + "learning_rate": 4.005714285714286e-05, + "loss": 0.8611, + "step": 701 + }, + { + "epoch": 4.011428571428572, + "grad_norm": 67.10334014892578, + "learning_rate": 4.0114285714285715e-05, + "loss": 1.0242, + "step": 702 + }, + { + "epoch": 4.017142857142857, + "grad_norm": 31.47416877746582, + "learning_rate": 4.017142857142857e-05, + "loss": 1.1063, + "step": 703 + }, + { + "epoch": 4.022857142857143, + "grad_norm": 50.016170501708984, + "learning_rate": 4.0228571428571434e-05, + "loss": 0.8446, + "step": 704 + }, + { + "epoch": 4.0285714285714285, + "grad_norm": 75.51326751708984, + "learning_rate": 4.028571428571429e-05, + "loss": 0.9451, + "step": 705 + }, + { + "epoch": 4.034285714285715, + "grad_norm": 40.627681732177734, + "learning_rate": 4.034285714285715e-05, + "loss": 0.9035, + "step": 706 + }, + { + "epoch": 4.04, + "grad_norm": 31.454418182373047, + "learning_rate": 4.0400000000000006e-05, + "loss": 0.9115, + "step": 707 + }, + { + "epoch": 4.045714285714285, + "grad_norm": 43.80349349975586, + "learning_rate": 4.045714285714286e-05, + "loss": 0.9127, + "step": 708 + }, + { + "epoch": 4.051428571428572, + "grad_norm": 53.53621292114258, + "learning_rate": 4.051428571428572e-05, + "loss": 0.75, + "step": 709 + }, + { + "epoch": 4.057142857142857, + "grad_norm": 34.03127670288086, + "learning_rate": 4.057142857142857e-05, + "loss": 0.8359, + "step": 710 + }, + { + "epoch": 4.062857142857143, + "grad_norm": 38.53433609008789, + "learning_rate": 4.062857142857143e-05, + "loss": 0.7569, + "step": 711 + }, + { + "epoch": 4.0685714285714285, + "grad_norm": 23.258892059326172, + "learning_rate": 4.068571428571429e-05, + "loss": 0.8482, + "step": 712 + }, + { + "epoch": 4.074285714285715, + "grad_norm": 48.58507537841797, + "learning_rate": 4.074285714285714e-05, + "loss": 0.8351, + "step": 713 + }, + { + "epoch": 4.08, + "grad_norm": 92.51113891601562, + "learning_rate": 4.08e-05, + "loss": 0.7758, + "step": 714 + }, + { + "epoch": 4.085714285714285, + "grad_norm": 40.11365509033203, + "learning_rate": 4.085714285714286e-05, + "loss": 0.607, + "step": 715 + }, + { + "epoch": 4.091428571428572, + "grad_norm": 54.20933532714844, + "learning_rate": 4.0914285714285715e-05, + "loss": 0.8703, + "step": 716 + }, + { + "epoch": 4.097142857142857, + "grad_norm": 30.776321411132812, + "learning_rate": 4.0971428571428574e-05, + "loss": 0.8529, + "step": 717 + }, + { + "epoch": 4.102857142857143, + "grad_norm": 61.20046615600586, + "learning_rate": 4.1028571428571434e-05, + "loss": 0.8714, + "step": 718 + }, + { + "epoch": 4.1085714285714285, + "grad_norm": 49.445125579833984, + "learning_rate": 4.1085714285714286e-05, + "loss": 0.9068, + "step": 719 + }, + { + "epoch": 4.114285714285714, + "grad_norm": 48.27702331542969, + "learning_rate": 4.1142857142857146e-05, + "loss": 0.8664, + "step": 720 + }, + { + "epoch": 4.12, + "grad_norm": 75.87247467041016, + "learning_rate": 4.12e-05, + "loss": 1.0875, + "step": 721 + }, + { + "epoch": 4.1257142857142854, + "grad_norm": 29.371505737304688, + "learning_rate": 4.125714285714286e-05, + "loss": 0.679, + "step": 722 + }, + { + "epoch": 4.131428571428572, + "grad_norm": 110.02105712890625, + "learning_rate": 4.131428571428572e-05, + "loss": 1.1682, + "step": 723 + }, + { + "epoch": 4.137142857142857, + "grad_norm": 36.369163513183594, + "learning_rate": 4.137142857142857e-05, + "loss": 0.839, + "step": 724 + }, + { + "epoch": 4.142857142857143, + "grad_norm": 50.09689712524414, + "learning_rate": 4.1428571428571437e-05, + "loss": 0.8611, + "step": 725 + }, + { + "epoch": 4.148571428571429, + "grad_norm": 40.9254264831543, + "learning_rate": 4.148571428571429e-05, + "loss": 0.9962, + "step": 726 + }, + { + "epoch": 4.154285714285714, + "grad_norm": 50.97928237915039, + "learning_rate": 4.154285714285714e-05, + "loss": 1.0002, + "step": 727 + }, + { + "epoch": 4.16, + "grad_norm": 67.4588394165039, + "learning_rate": 4.16e-05, + "loss": 0.9571, + "step": 728 + }, + { + "epoch": 4.1657142857142855, + "grad_norm": 63.54456329345703, + "learning_rate": 4.165714285714286e-05, + "loss": 0.9622, + "step": 729 + }, + { + "epoch": 4.171428571428572, + "grad_norm": 30.054279327392578, + "learning_rate": 4.1714285714285714e-05, + "loss": 1.0214, + "step": 730 + }, + { + "epoch": 4.177142857142857, + "grad_norm": 50.66481018066406, + "learning_rate": 4.177142857142857e-05, + "loss": 0.8283, + "step": 731 + }, + { + "epoch": 4.182857142857143, + "grad_norm": 55.17572784423828, + "learning_rate": 4.1828571428571426e-05, + "loss": 0.9337, + "step": 732 + }, + { + "epoch": 4.188571428571429, + "grad_norm": 56.75128936767578, + "learning_rate": 4.188571428571429e-05, + "loss": 0.6922, + "step": 733 + }, + { + "epoch": 4.194285714285714, + "grad_norm": 46.1561279296875, + "learning_rate": 4.1942857142857145e-05, + "loss": 1.0934, + "step": 734 + }, + { + "epoch": 4.2, + "grad_norm": 56.81748962402344, + "learning_rate": 4.2e-05, + "loss": 0.7729, + "step": 735 + }, + { + "epoch": 4.2057142857142855, + "grad_norm": 51.30953598022461, + "learning_rate": 4.2057142857142864e-05, + "loss": 0.7968, + "step": 736 + }, + { + "epoch": 4.211428571428572, + "grad_norm": 36.7042236328125, + "learning_rate": 4.211428571428572e-05, + "loss": 0.8805, + "step": 737 + }, + { + "epoch": 4.217142857142857, + "grad_norm": 37.41236114501953, + "learning_rate": 4.2171428571428576e-05, + "loss": 1.0618, + "step": 738 + }, + { + "epoch": 4.222857142857142, + "grad_norm": 45.798583984375, + "learning_rate": 4.222857142857143e-05, + "loss": 0.9918, + "step": 739 + }, + { + "epoch": 4.228571428571429, + "grad_norm": 48.581058502197266, + "learning_rate": 4.228571428571429e-05, + "loss": 1.1005, + "step": 740 + }, + { + "epoch": 4.234285714285714, + "grad_norm": 70.04324340820312, + "learning_rate": 4.234285714285715e-05, + "loss": 0.7554, + "step": 741 + }, + { + "epoch": 4.24, + "grad_norm": 35.53608322143555, + "learning_rate": 4.24e-05, + "loss": 0.8303, + "step": 742 + }, + { + "epoch": 4.2457142857142856, + "grad_norm": 23.287988662719727, + "learning_rate": 4.245714285714285e-05, + "loss": 0.6577, + "step": 743 + }, + { + "epoch": 4.251428571428572, + "grad_norm": 31.08119010925293, + "learning_rate": 4.251428571428572e-05, + "loss": 0.8763, + "step": 744 + }, + { + "epoch": 4.257142857142857, + "grad_norm": 35.31334686279297, + "learning_rate": 4.257142857142857e-05, + "loss": 0.9691, + "step": 745 + }, + { + "epoch": 4.2628571428571425, + "grad_norm": 87.34281921386719, + "learning_rate": 4.262857142857143e-05, + "loss": 0.8698, + "step": 746 + }, + { + "epoch": 4.268571428571429, + "grad_norm": 48.2530517578125, + "learning_rate": 4.268571428571429e-05, + "loss": 0.8988, + "step": 747 + }, + { + "epoch": 4.274285714285714, + "grad_norm": 84.64009857177734, + "learning_rate": 4.2742857142857144e-05, + "loss": 1.1475, + "step": 748 + }, + { + "epoch": 4.28, + "grad_norm": 40.60107421875, + "learning_rate": 4.2800000000000004e-05, + "loss": 0.9865, + "step": 749 + }, + { + "epoch": 4.285714285714286, + "grad_norm": 52.748878479003906, + "learning_rate": 4.2857142857142856e-05, + "loss": 0.9031, + "step": 750 + }, + { + "epoch": 4.291428571428572, + "grad_norm": 101.19278717041016, + "learning_rate": 4.2914285714285716e-05, + "loss": 0.8664, + "step": 751 + }, + { + "epoch": 4.297142857142857, + "grad_norm": 73.33796691894531, + "learning_rate": 4.2971428571428575e-05, + "loss": 1.039, + "step": 752 + }, + { + "epoch": 4.3028571428571425, + "grad_norm": 33.10040283203125, + "learning_rate": 4.302857142857143e-05, + "loss": 0.9188, + "step": 753 + }, + { + "epoch": 4.308571428571429, + "grad_norm": 48.70182800292969, + "learning_rate": 4.308571428571429e-05, + "loss": 0.8472, + "step": 754 + }, + { + "epoch": 4.314285714285714, + "grad_norm": 44.06749725341797, + "learning_rate": 4.314285714285715e-05, + "loss": 0.8524, + "step": 755 + }, + { + "epoch": 4.32, + "grad_norm": 138.39503479003906, + "learning_rate": 4.32e-05, + "loss": 0.8525, + "step": 756 + }, + { + "epoch": 4.325714285714286, + "grad_norm": 42.73065948486328, + "learning_rate": 4.325714285714286e-05, + "loss": 0.9098, + "step": 757 + }, + { + "epoch": 4.331428571428571, + "grad_norm": 38.09630584716797, + "learning_rate": 4.331428571428572e-05, + "loss": 0.9752, + "step": 758 + }, + { + "epoch": 4.337142857142857, + "grad_norm": 70.47635650634766, + "learning_rate": 4.337142857142857e-05, + "loss": 1.0822, + "step": 759 + }, + { + "epoch": 4.3428571428571425, + "grad_norm": 55.644107818603516, + "learning_rate": 4.342857142857143e-05, + "loss": 1.0015, + "step": 760 + }, + { + "epoch": 4.348571428571429, + "grad_norm": 88.07673645019531, + "learning_rate": 4.3485714285714284e-05, + "loss": 1.0137, + "step": 761 + }, + { + "epoch": 4.354285714285714, + "grad_norm": 25.13230323791504, + "learning_rate": 4.354285714285714e-05, + "loss": 0.6897, + "step": 762 + }, + { + "epoch": 4.36, + "grad_norm": 59.5438346862793, + "learning_rate": 4.36e-05, + "loss": 1.0062, + "step": 763 + }, + { + "epoch": 4.365714285714286, + "grad_norm": 70.91270446777344, + "learning_rate": 4.3657142857142855e-05, + "loss": 0.9846, + "step": 764 + }, + { + "epoch": 4.371428571428572, + "grad_norm": 101.4989242553711, + "learning_rate": 4.371428571428572e-05, + "loss": 0.8398, + "step": 765 + }, + { + "epoch": 4.377142857142857, + "grad_norm": 38.134315490722656, + "learning_rate": 4.3771428571428574e-05, + "loss": 0.8914, + "step": 766 + }, + { + "epoch": 4.382857142857143, + "grad_norm": 45.63557434082031, + "learning_rate": 4.382857142857143e-05, + "loss": 0.8271, + "step": 767 + }, + { + "epoch": 4.388571428571429, + "grad_norm": 67.12691497802734, + "learning_rate": 4.388571428571429e-05, + "loss": 1.0859, + "step": 768 + }, + { + "epoch": 4.394285714285714, + "grad_norm": 29.074275970458984, + "learning_rate": 4.3942857142857146e-05, + "loss": 0.7501, + "step": 769 + }, + { + "epoch": 4.4, + "grad_norm": 46.619049072265625, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.9039, + "step": 770 + }, + { + "epoch": 4.405714285714286, + "grad_norm": 54.82202911376953, + "learning_rate": 4.405714285714286e-05, + "loss": 0.9477, + "step": 771 + }, + { + "epoch": 4.411428571428571, + "grad_norm": 27.81842041015625, + "learning_rate": 4.411428571428572e-05, + "loss": 0.8866, + "step": 772 + }, + { + "epoch": 4.417142857142857, + "grad_norm": 43.718441009521484, + "learning_rate": 4.417142857142858e-05, + "loss": 0.7752, + "step": 773 + }, + { + "epoch": 4.422857142857143, + "grad_norm": 39.321292877197266, + "learning_rate": 4.422857142857143e-05, + "loss": 0.8403, + "step": 774 + }, + { + "epoch": 4.428571428571429, + "grad_norm": 35.265846252441406, + "learning_rate": 4.428571428571428e-05, + "loss": 0.7081, + "step": 775 + }, + { + "epoch": 4.434285714285714, + "grad_norm": 24.192285537719727, + "learning_rate": 4.434285714285715e-05, + "loss": 0.8386, + "step": 776 + }, + { + "epoch": 4.44, + "grad_norm": 52.77553176879883, + "learning_rate": 4.44e-05, + "loss": 0.7076, + "step": 777 + }, + { + "epoch": 4.445714285714286, + "grad_norm": 45.69184494018555, + "learning_rate": 4.445714285714286e-05, + "loss": 0.9516, + "step": 778 + }, + { + "epoch": 4.451428571428571, + "grad_norm": 64.34020233154297, + "learning_rate": 4.4514285714285714e-05, + "loss": 0.7131, + "step": 779 + }, + { + "epoch": 4.457142857142857, + "grad_norm": 39.51726150512695, + "learning_rate": 4.4571428571428574e-05, + "loss": 0.8413, + "step": 780 + }, + { + "epoch": 4.462857142857143, + "grad_norm": 31.498125076293945, + "learning_rate": 4.462857142857143e-05, + "loss": 0.7674, + "step": 781 + }, + { + "epoch": 4.468571428571429, + "grad_norm": 39.82917785644531, + "learning_rate": 4.4685714285714286e-05, + "loss": 0.8519, + "step": 782 + }, + { + "epoch": 4.474285714285714, + "grad_norm": 103.14328002929688, + "learning_rate": 4.4742857142857145e-05, + "loss": 0.8795, + "step": 783 + }, + { + "epoch": 4.48, + "grad_norm": 50.75999069213867, + "learning_rate": 4.4800000000000005e-05, + "loss": 0.8551, + "step": 784 + }, + { + "epoch": 4.485714285714286, + "grad_norm": 35.23714828491211, + "learning_rate": 4.485714285714286e-05, + "loss": 0.7303, + "step": 785 + }, + { + "epoch": 4.491428571428571, + "grad_norm": 49.85736846923828, + "learning_rate": 4.491428571428572e-05, + "loss": 1.111, + "step": 786 + }, + { + "epoch": 4.497142857142857, + "grad_norm": 38.57877731323242, + "learning_rate": 4.4971428571428576e-05, + "loss": 0.8739, + "step": 787 + }, + { + "epoch": 4.502857142857143, + "grad_norm": 42.49000549316406, + "learning_rate": 4.502857142857143e-05, + "loss": 0.6994, + "step": 788 + }, + { + "epoch": 4.508571428571429, + "grad_norm": 163.63182067871094, + "learning_rate": 4.508571428571429e-05, + "loss": 0.8117, + "step": 789 + }, + { + "epoch": 4.514285714285714, + "grad_norm": 44.8123664855957, + "learning_rate": 4.514285714285714e-05, + "loss": 0.7256, + "step": 790 + }, + { + "epoch": 4.52, + "grad_norm": 44.75035858154297, + "learning_rate": 4.52e-05, + "loss": 0.7779, + "step": 791 + }, + { + "epoch": 4.525714285714286, + "grad_norm": 48.99624252319336, + "learning_rate": 4.525714285714286e-05, + "loss": 0.8817, + "step": 792 + }, + { + "epoch": 4.531428571428571, + "grad_norm": 24.009414672851562, + "learning_rate": 4.531428571428571e-05, + "loss": 0.8026, + "step": 793 + }, + { + "epoch": 4.537142857142857, + "grad_norm": 27.89925765991211, + "learning_rate": 4.537142857142857e-05, + "loss": 1.189, + "step": 794 + }, + { + "epoch": 4.542857142857143, + "grad_norm": 53.633575439453125, + "learning_rate": 4.542857142857143e-05, + "loss": 0.7647, + "step": 795 + }, + { + "epoch": 4.548571428571429, + "grad_norm": 49.11115646362305, + "learning_rate": 4.5485714285714285e-05, + "loss": 1.0091, + "step": 796 + }, + { + "epoch": 4.554285714285714, + "grad_norm": 42.562679290771484, + "learning_rate": 4.5542857142857144e-05, + "loss": 0.882, + "step": 797 + }, + { + "epoch": 4.5600000000000005, + "grad_norm": 44.13538360595703, + "learning_rate": 4.5600000000000004e-05, + "loss": 0.9015, + "step": 798 + }, + { + "epoch": 4.565714285714286, + "grad_norm": 95.10523223876953, + "learning_rate": 4.5657142857142857e-05, + "loss": 0.6015, + "step": 799 + }, + { + "epoch": 4.571428571428571, + "grad_norm": 37.34461975097656, + "learning_rate": 4.5714285714285716e-05, + "loss": 0.7921, + "step": 800 + }, + { + "epoch": 4.577142857142857, + "grad_norm": 57.865299224853516, + "learning_rate": 4.5771428571428576e-05, + "loss": 0.8411, + "step": 801 + }, + { + "epoch": 4.582857142857143, + "grad_norm": 50.93311309814453, + "learning_rate": 4.5828571428571435e-05, + "loss": 1.0067, + "step": 802 + }, + { + "epoch": 4.588571428571429, + "grad_norm": 43.336788177490234, + "learning_rate": 4.588571428571429e-05, + "loss": 1.073, + "step": 803 + }, + { + "epoch": 4.594285714285714, + "grad_norm": 33.9623908996582, + "learning_rate": 4.594285714285714e-05, + "loss": 0.6566, + "step": 804 + }, + { + "epoch": 4.6, + "grad_norm": 72.55892181396484, + "learning_rate": 4.600000000000001e-05, + "loss": 0.8664, + "step": 805 + }, + { + "epoch": 4.605714285714286, + "grad_norm": 43.72401428222656, + "learning_rate": 4.605714285714286e-05, + "loss": 0.8379, + "step": 806 + }, + { + "epoch": 4.611428571428571, + "grad_norm": 30.538040161132812, + "learning_rate": 4.611428571428571e-05, + "loss": 0.7254, + "step": 807 + }, + { + "epoch": 4.617142857142857, + "grad_norm": 35.443058013916016, + "learning_rate": 4.617142857142857e-05, + "loss": 0.9433, + "step": 808 + }, + { + "epoch": 4.622857142857143, + "grad_norm": 26.423377990722656, + "learning_rate": 4.622857142857143e-05, + "loss": 0.8263, + "step": 809 + }, + { + "epoch": 4.628571428571428, + "grad_norm": 35.029911041259766, + "learning_rate": 4.628571428571429e-05, + "loss": 0.9768, + "step": 810 + }, + { + "epoch": 4.634285714285714, + "grad_norm": 26.144128799438477, + "learning_rate": 4.6342857142857143e-05, + "loss": 0.7838, + "step": 811 + }, + { + "epoch": 4.64, + "grad_norm": 55.07661056518555, + "learning_rate": 4.64e-05, + "loss": 0.7111, + "step": 812 + }, + { + "epoch": 4.645714285714286, + "grad_norm": 33.119075775146484, + "learning_rate": 4.645714285714286e-05, + "loss": 1.148, + "step": 813 + }, + { + "epoch": 4.651428571428571, + "grad_norm": 48.644649505615234, + "learning_rate": 4.6514285714285715e-05, + "loss": 1.2616, + "step": 814 + }, + { + "epoch": 4.6571428571428575, + "grad_norm": 45.484859466552734, + "learning_rate": 4.6571428571428575e-05, + "loss": 0.9413, + "step": 815 + }, + { + "epoch": 4.662857142857143, + "grad_norm": 40.2558708190918, + "learning_rate": 4.6628571428571434e-05, + "loss": 0.8274, + "step": 816 + }, + { + "epoch": 4.668571428571429, + "grad_norm": 41.27311325073242, + "learning_rate": 4.668571428571429e-05, + "loss": 1.2366, + "step": 817 + }, + { + "epoch": 4.674285714285714, + "grad_norm": 29.65941047668457, + "learning_rate": 4.6742857142857146e-05, + "loss": 1.2603, + "step": 818 + }, + { + "epoch": 4.68, + "grad_norm": 49.044586181640625, + "learning_rate": 4.6800000000000006e-05, + "loss": 1.4969, + "step": 819 + }, + { + "epoch": 4.685714285714286, + "grad_norm": 34.03653335571289, + "learning_rate": 4.685714285714286e-05, + "loss": 0.7342, + "step": 820 + }, + { + "epoch": 4.691428571428571, + "grad_norm": 54.41427230834961, + "learning_rate": 4.691428571428572e-05, + "loss": 0.7467, + "step": 821 + }, + { + "epoch": 4.6971428571428575, + "grad_norm": 54.1391716003418, + "learning_rate": 4.697142857142857e-05, + "loss": 0.7678, + "step": 822 + }, + { + "epoch": 4.702857142857143, + "grad_norm": 81.07489013671875, + "learning_rate": 4.702857142857143e-05, + "loss": 0.5855, + "step": 823 + }, + { + "epoch": 4.708571428571428, + "grad_norm": 35.899017333984375, + "learning_rate": 4.708571428571429e-05, + "loss": 0.9845, + "step": 824 + }, + { + "epoch": 4.714285714285714, + "grad_norm": 52.27177047729492, + "learning_rate": 4.714285714285714e-05, + "loss": 0.8779, + "step": 825 + }, + { + "epoch": 4.72, + "grad_norm": 33.994163513183594, + "learning_rate": 4.72e-05, + "loss": 1.1185, + "step": 826 + }, + { + "epoch": 4.725714285714286, + "grad_norm": 42.173828125, + "learning_rate": 4.725714285714286e-05, + "loss": 1.0608, + "step": 827 + }, + { + "epoch": 4.731428571428571, + "grad_norm": 28.176475524902344, + "learning_rate": 4.7314285714285714e-05, + "loss": 1.2005, + "step": 828 + }, + { + "epoch": 4.737142857142857, + "grad_norm": 43.679264068603516, + "learning_rate": 4.7371428571428574e-05, + "loss": 0.675, + "step": 829 + }, + { + "epoch": 4.742857142857143, + "grad_norm": 26.070119857788086, + "learning_rate": 4.742857142857143e-05, + "loss": 0.6874, + "step": 830 + }, + { + "epoch": 4.748571428571428, + "grad_norm": 43.402976989746094, + "learning_rate": 4.7485714285714286e-05, + "loss": 0.7073, + "step": 831 + }, + { + "epoch": 4.7542857142857144, + "grad_norm": 42.3155403137207, + "learning_rate": 4.7542857142857146e-05, + "loss": 0.9456, + "step": 832 + }, + { + "epoch": 4.76, + "grad_norm": 31.70867156982422, + "learning_rate": 4.76e-05, + "loss": 0.8882, + "step": 833 + }, + { + "epoch": 4.765714285714286, + "grad_norm": 37.32833480834961, + "learning_rate": 4.7657142857142865e-05, + "loss": 0.7486, + "step": 834 + }, + { + "epoch": 4.771428571428571, + "grad_norm": 34.27206039428711, + "learning_rate": 4.771428571428572e-05, + "loss": 0.7769, + "step": 835 + }, + { + "epoch": 4.777142857142858, + "grad_norm": 21.571674346923828, + "learning_rate": 4.777142857142857e-05, + "loss": 0.9401, + "step": 836 + }, + { + "epoch": 4.782857142857143, + "grad_norm": 20.9748477935791, + "learning_rate": 4.782857142857143e-05, + "loss": 0.7598, + "step": 837 + }, + { + "epoch": 4.788571428571428, + "grad_norm": 27.693876266479492, + "learning_rate": 4.788571428571429e-05, + "loss": 0.5936, + "step": 838 + }, + { + "epoch": 4.7942857142857145, + "grad_norm": 38.792903900146484, + "learning_rate": 4.794285714285714e-05, + "loss": 1.0826, + "step": 839 + }, + { + "epoch": 4.8, + "grad_norm": 28.459941864013672, + "learning_rate": 4.8e-05, + "loss": 0.8539, + "step": 840 + }, + { + "epoch": 4.805714285714286, + "grad_norm": 89.08366394042969, + "learning_rate": 4.805714285714286e-05, + "loss": 0.8214, + "step": 841 + }, + { + "epoch": 4.811428571428571, + "grad_norm": 53.74074172973633, + "learning_rate": 4.811428571428572e-05, + "loss": 0.9308, + "step": 842 + }, + { + "epoch": 4.817142857142857, + "grad_norm": 16.795307159423828, + "learning_rate": 4.817142857142857e-05, + "loss": 0.9332, + "step": 843 + }, + { + "epoch": 4.822857142857143, + "grad_norm": 24.547548294067383, + "learning_rate": 4.8228571428571426e-05, + "loss": 0.6237, + "step": 844 + }, + { + "epoch": 4.828571428571428, + "grad_norm": 28.066781997680664, + "learning_rate": 4.828571428571429e-05, + "loss": 0.7644, + "step": 845 + }, + { + "epoch": 4.8342857142857145, + "grad_norm": 70.24605560302734, + "learning_rate": 4.8342857142857145e-05, + "loss": 0.7757, + "step": 846 + }, + { + "epoch": 4.84, + "grad_norm": 85.10889434814453, + "learning_rate": 4.8400000000000004e-05, + "loss": 0.9301, + "step": 847 + }, + { + "epoch": 4.845714285714286, + "grad_norm": 40.65078353881836, + "learning_rate": 4.8457142857142864e-05, + "loss": 0.8808, + "step": 848 + }, + { + "epoch": 4.851428571428571, + "grad_norm": 47.48426055908203, + "learning_rate": 4.8514285714285716e-05, + "loss": 0.6732, + "step": 849 + }, + { + "epoch": 4.857142857142857, + "grad_norm": 32.60896301269531, + "learning_rate": 4.8571428571428576e-05, + "loss": 1.0011, + "step": 850 + }, + { + "epoch": 4.862857142857143, + "grad_norm": 39.34482955932617, + "learning_rate": 4.862857142857143e-05, + "loss": 0.9106, + "step": 851 + }, + { + "epoch": 4.868571428571428, + "grad_norm": 45.15939712524414, + "learning_rate": 4.868571428571429e-05, + "loss": 0.8759, + "step": 852 + }, + { + "epoch": 4.8742857142857146, + "grad_norm": 32.01213455200195, + "learning_rate": 4.874285714285715e-05, + "loss": 0.8889, + "step": 853 + }, + { + "epoch": 4.88, + "grad_norm": 51.3536262512207, + "learning_rate": 4.88e-05, + "loss": 0.9379, + "step": 854 + }, + { + "epoch": 4.885714285714286, + "grad_norm": 37.42449951171875, + "learning_rate": 4.885714285714286e-05, + "loss": 0.8884, + "step": 855 + }, + { + "epoch": 4.8914285714285715, + "grad_norm": 60.717830657958984, + "learning_rate": 4.891428571428572e-05, + "loss": 0.9361, + "step": 856 + }, + { + "epoch": 4.897142857142857, + "grad_norm": 62.75659942626953, + "learning_rate": 4.897142857142857e-05, + "loss": 0.6528, + "step": 857 + }, + { + "epoch": 4.902857142857143, + "grad_norm": 29.475698471069336, + "learning_rate": 4.902857142857143e-05, + "loss": 0.7498, + "step": 858 + }, + { + "epoch": 4.908571428571428, + "grad_norm": 29.30791473388672, + "learning_rate": 4.908571428571429e-05, + "loss": 0.7958, + "step": 859 + }, + { + "epoch": 4.914285714285715, + "grad_norm": 40.96413040161133, + "learning_rate": 4.9142857142857144e-05, + "loss": 0.8702, + "step": 860 + }, + { + "epoch": 4.92, + "grad_norm": 25.47064971923828, + "learning_rate": 4.92e-05, + "loss": 0.9104, + "step": 861 + }, + { + "epoch": 4.925714285714285, + "grad_norm": 40.89109802246094, + "learning_rate": 4.9257142857142856e-05, + "loss": 0.6985, + "step": 862 + }, + { + "epoch": 4.9314285714285715, + "grad_norm": 45.912967681884766, + "learning_rate": 4.9314285714285716e-05, + "loss": 0.7432, + "step": 863 + }, + { + "epoch": 4.937142857142857, + "grad_norm": 54.912818908691406, + "learning_rate": 4.9371428571428575e-05, + "loss": 0.8789, + "step": 864 + }, + { + "epoch": 4.942857142857143, + "grad_norm": 36.55717849731445, + "learning_rate": 4.942857142857143e-05, + "loss": 0.7154, + "step": 865 + }, + { + "epoch": 4.948571428571428, + "grad_norm": 34.00408935546875, + "learning_rate": 4.9485714285714294e-05, + "loss": 0.9432, + "step": 866 + }, + { + "epoch": 4.954285714285715, + "grad_norm": 37.49279022216797, + "learning_rate": 4.954285714285715e-05, + "loss": 1.0998, + "step": 867 + }, + { + "epoch": 4.96, + "grad_norm": 72.15148162841797, + "learning_rate": 4.96e-05, + "loss": 0.7201, + "step": 868 + }, + { + "epoch": 4.965714285714286, + "grad_norm": 122.64563751220703, + "learning_rate": 4.965714285714286e-05, + "loss": 1.2198, + "step": 869 + }, + { + "epoch": 4.9714285714285715, + "grad_norm": 38.418392181396484, + "learning_rate": 4.971428571428572e-05, + "loss": 0.9186, + "step": 870 + }, + { + "epoch": 4.977142857142857, + "grad_norm": 86.30313110351562, + "learning_rate": 4.977142857142857e-05, + "loss": 0.7804, + "step": 871 + }, + { + "epoch": 4.982857142857143, + "grad_norm": 57.824893951416016, + "learning_rate": 4.982857142857143e-05, + "loss": 0.7648, + "step": 872 + }, + { + "epoch": 4.988571428571428, + "grad_norm": 30.781469345092773, + "learning_rate": 4.9885714285714283e-05, + "loss": 0.8022, + "step": 873 + }, + { + "epoch": 4.994285714285715, + "grad_norm": 60.66427230834961, + "learning_rate": 4.994285714285715e-05, + "loss": 1.0601, + "step": 874 + }, + { + "epoch": 5.0, + "grad_norm": 37.57851791381836, + "learning_rate": 5e-05, + "loss": 0.7755, + "step": 875 + }, + { + "epoch": 5.0, + "eval_classes": 0, + "eval_loss": 0.9345681071281433, + "eval_map": 0.827, + "eval_map_50": 0.914, + "eval_map_75": 0.8966, + "eval_map_large": 0.827, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.827, + "eval_map_small": -1.0, + "eval_mar_1": 0.7444, + "eval_mar_10": 0.939, + "eval_mar_100": 0.9556, + "eval_mar_100_per_class": 0.9556, + "eval_mar_large": 0.9556, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 17.7962, + "eval_samples_per_second": 16.52, + "eval_steps_per_second": 2.079, + "step": 875 + }, + { + "epoch": 5.005714285714285, + "grad_norm": 28.773176193237305, + "learning_rate": 4.9993650793650795e-05, + "loss": 0.786, + "step": 876 + }, + { + "epoch": 5.011428571428572, + "grad_norm": 20.412450790405273, + "learning_rate": 4.998730158730159e-05, + "loss": 1.1136, + "step": 877 + }, + { + "epoch": 5.017142857142857, + "grad_norm": 30.777389526367188, + "learning_rate": 4.998095238095239e-05, + "loss": 0.7639, + "step": 878 + }, + { + "epoch": 5.022857142857143, + "grad_norm": 26.684669494628906, + "learning_rate": 4.997460317460318e-05, + "loss": 0.7194, + "step": 879 + }, + { + "epoch": 5.0285714285714285, + "grad_norm": 32.90359115600586, + "learning_rate": 4.996825396825397e-05, + "loss": 1.1397, + "step": 880 + }, + { + "epoch": 5.034285714285715, + "grad_norm": 36.00369644165039, + "learning_rate": 4.9961904761904765e-05, + "loss": 1.0612, + "step": 881 + }, + { + "epoch": 5.04, + "grad_norm": 47.34328079223633, + "learning_rate": 4.995555555555556e-05, + "loss": 0.9564, + "step": 882 + }, + { + "epoch": 5.045714285714285, + "grad_norm": 29.983821868896484, + "learning_rate": 4.994920634920635e-05, + "loss": 1.1852, + "step": 883 + }, + { + "epoch": 5.051428571428572, + "grad_norm": 62.797935485839844, + "learning_rate": 4.994285714285715e-05, + "loss": 0.9991, + "step": 884 + }, + { + "epoch": 5.057142857142857, + "grad_norm": 34.72334671020508, + "learning_rate": 4.9936507936507936e-05, + "loss": 0.746, + "step": 885 + }, + { + "epoch": 5.062857142857143, + "grad_norm": 35.5029296875, + "learning_rate": 4.9930158730158735e-05, + "loss": 0.724, + "step": 886 + }, + { + "epoch": 5.0685714285714285, + "grad_norm": 66.18411254882812, + "learning_rate": 4.992380952380953e-05, + "loss": 0.8453, + "step": 887 + }, + { + "epoch": 5.074285714285715, + "grad_norm": 124.14593505859375, + "learning_rate": 4.991746031746032e-05, + "loss": 0.683, + "step": 888 + }, + { + "epoch": 5.08, + "grad_norm": 49.367523193359375, + "learning_rate": 4.991111111111111e-05, + "loss": 0.8275, + "step": 889 + }, + { + "epoch": 5.085714285714285, + "grad_norm": 17.534074783325195, + "learning_rate": 4.990476190476191e-05, + "loss": 0.8603, + "step": 890 + }, + { + "epoch": 5.091428571428572, + "grad_norm": 43.27067947387695, + "learning_rate": 4.98984126984127e-05, + "loss": 0.9633, + "step": 891 + }, + { + "epoch": 5.097142857142857, + "grad_norm": 28.913188934326172, + "learning_rate": 4.98920634920635e-05, + "loss": 0.7867, + "step": 892 + }, + { + "epoch": 5.102857142857143, + "grad_norm": 58.211517333984375, + "learning_rate": 4.9885714285714283e-05, + "loss": 0.8475, + "step": 893 + }, + { + "epoch": 5.1085714285714285, + "grad_norm": 36.70371627807617, + "learning_rate": 4.987936507936508e-05, + "loss": 0.8455, + "step": 894 + }, + { + "epoch": 5.114285714285714, + "grad_norm": 45.08826446533203, + "learning_rate": 4.9873015873015875e-05, + "loss": 0.7288, + "step": 895 + }, + { + "epoch": 5.12, + "grad_norm": 27.22219467163086, + "learning_rate": 4.986666666666667e-05, + "loss": 0.7087, + "step": 896 + }, + { + "epoch": 5.1257142857142854, + "grad_norm": 36.46015167236328, + "learning_rate": 4.986031746031746e-05, + "loss": 1.0503, + "step": 897 + }, + { + "epoch": 5.131428571428572, + "grad_norm": 33.393348693847656, + "learning_rate": 4.985396825396826e-05, + "loss": 0.7937, + "step": 898 + }, + { + "epoch": 5.137142857142857, + "grad_norm": 53.71676254272461, + "learning_rate": 4.9847619047619046e-05, + "loss": 0.6919, + "step": 899 + }, + { + "epoch": 5.142857142857143, + "grad_norm": 49.26475143432617, + "learning_rate": 4.9841269841269845e-05, + "loss": 0.8403, + "step": 900 + }, + { + "epoch": 5.148571428571429, + "grad_norm": 32.67655944824219, + "learning_rate": 4.983492063492064e-05, + "loss": 0.7301, + "step": 901 + }, + { + "epoch": 5.154285714285714, + "grad_norm": 67.62574005126953, + "learning_rate": 4.982857142857143e-05, + "loss": 1.0139, + "step": 902 + }, + { + "epoch": 5.16, + "grad_norm": 69.98014068603516, + "learning_rate": 4.982222222222222e-05, + "loss": 0.9043, + "step": 903 + }, + { + "epoch": 5.1657142857142855, + "grad_norm": 263.18438720703125, + "learning_rate": 4.9815873015873016e-05, + "loss": 0.6178, + "step": 904 + }, + { + "epoch": 5.171428571428572, + "grad_norm": 30.691463470458984, + "learning_rate": 4.980952380952381e-05, + "loss": 0.9376, + "step": 905 + }, + { + "epoch": 5.177142857142857, + "grad_norm": 22.068796157836914, + "learning_rate": 4.980317460317461e-05, + "loss": 0.9483, + "step": 906 + }, + { + "epoch": 5.182857142857143, + "grad_norm": 47.959068298339844, + "learning_rate": 4.97968253968254e-05, + "loss": 1.1893, + "step": 907 + }, + { + "epoch": 5.188571428571429, + "grad_norm": 24.972097396850586, + "learning_rate": 4.979047619047619e-05, + "loss": 0.8632, + "step": 908 + }, + { + "epoch": 5.194285714285714, + "grad_norm": 23.600223541259766, + "learning_rate": 4.9784126984126986e-05, + "loss": 1.0488, + "step": 909 + }, + { + "epoch": 5.2, + "grad_norm": 82.63482666015625, + "learning_rate": 4.977777777777778e-05, + "loss": 0.7555, + "step": 910 + }, + { + "epoch": 5.2057142857142855, + "grad_norm": 43.265769958496094, + "learning_rate": 4.977142857142857e-05, + "loss": 0.7966, + "step": 911 + }, + { + "epoch": 5.211428571428572, + "grad_norm": 43.13532638549805, + "learning_rate": 4.976507936507937e-05, + "loss": 0.8966, + "step": 912 + }, + { + "epoch": 5.217142857142857, + "grad_norm": 25.240903854370117, + "learning_rate": 4.975873015873016e-05, + "loss": 0.9683, + "step": 913 + }, + { + "epoch": 5.222857142857142, + "grad_norm": 19.700618743896484, + "learning_rate": 4.9752380952380956e-05, + "loss": 1.0708, + "step": 914 + }, + { + "epoch": 5.228571428571429, + "grad_norm": 39.732566833496094, + "learning_rate": 4.974603174603175e-05, + "loss": 0.5733, + "step": 915 + }, + { + "epoch": 5.234285714285714, + "grad_norm": 28.90740203857422, + "learning_rate": 4.973968253968254e-05, + "loss": 0.7555, + "step": 916 + }, + { + "epoch": 5.24, + "grad_norm": 63.37759017944336, + "learning_rate": 4.973333333333334e-05, + "loss": 0.8653, + "step": 917 + }, + { + "epoch": 5.2457142857142856, + "grad_norm": 59.77758026123047, + "learning_rate": 4.9726984126984126e-05, + "loss": 0.5666, + "step": 918 + }, + { + "epoch": 5.251428571428572, + "grad_norm": 32.72135543823242, + "learning_rate": 4.9720634920634926e-05, + "loss": 0.6469, + "step": 919 + }, + { + "epoch": 5.257142857142857, + "grad_norm": 50.85166931152344, + "learning_rate": 4.971428571428572e-05, + "loss": 1.2446, + "step": 920 + }, + { + "epoch": 5.2628571428571425, + "grad_norm": 41.593135833740234, + "learning_rate": 4.970793650793651e-05, + "loss": 1.0042, + "step": 921 + }, + { + "epoch": 5.268571428571429, + "grad_norm": 90.62390899658203, + "learning_rate": 4.9701587301587304e-05, + "loss": 1.2134, + "step": 922 + }, + { + "epoch": 5.274285714285714, + "grad_norm": 53.12033462524414, + "learning_rate": 4.96952380952381e-05, + "loss": 0.7162, + "step": 923 + }, + { + "epoch": 5.28, + "grad_norm": 24.408414840698242, + "learning_rate": 4.968888888888889e-05, + "loss": 0.7488, + "step": 924 + }, + { + "epoch": 5.285714285714286, + "grad_norm": 73.4721450805664, + "learning_rate": 4.968253968253969e-05, + "loss": 0.6289, + "step": 925 + }, + { + "epoch": 5.291428571428572, + "grad_norm": 40.17009353637695, + "learning_rate": 4.9676190476190474e-05, + "loss": 0.7478, + "step": 926 + }, + { + "epoch": 5.297142857142857, + "grad_norm": 48.18302536010742, + "learning_rate": 4.9669841269841274e-05, + "loss": 0.7197, + "step": 927 + }, + { + "epoch": 5.3028571428571425, + "grad_norm": 66.0279312133789, + "learning_rate": 4.9663492063492066e-05, + "loss": 0.9326, + "step": 928 + }, + { + "epoch": 5.308571428571429, + "grad_norm": 25.411184310913086, + "learning_rate": 4.965714285714286e-05, + "loss": 0.9918, + "step": 929 + }, + { + "epoch": 5.314285714285714, + "grad_norm": 63.230499267578125, + "learning_rate": 4.965079365079365e-05, + "loss": 0.8927, + "step": 930 + }, + { + "epoch": 5.32, + "grad_norm": 44.250484466552734, + "learning_rate": 4.964444444444445e-05, + "loss": 0.9199, + "step": 931 + }, + { + "epoch": 5.325714285714286, + "grad_norm": 51.27559280395508, + "learning_rate": 4.963809523809524e-05, + "loss": 0.8719, + "step": 932 + }, + { + "epoch": 5.331428571428571, + "grad_norm": 59.98835372924805, + "learning_rate": 4.9631746031746036e-05, + "loss": 0.8632, + "step": 933 + }, + { + "epoch": 5.337142857142857, + "grad_norm": 37.64426040649414, + "learning_rate": 4.962539682539683e-05, + "loss": 1.1554, + "step": 934 + }, + { + "epoch": 5.3428571428571425, + "grad_norm": 41.51221466064453, + "learning_rate": 4.961904761904762e-05, + "loss": 0.8295, + "step": 935 + }, + { + "epoch": 5.348571428571429, + "grad_norm": 46.496395111083984, + "learning_rate": 4.9612698412698414e-05, + "loss": 0.643, + "step": 936 + }, + { + "epoch": 5.354285714285714, + "grad_norm": 32.01964569091797, + "learning_rate": 4.960634920634921e-05, + "loss": 0.8428, + "step": 937 + }, + { + "epoch": 5.36, + "grad_norm": 24.820205688476562, + "learning_rate": 4.96e-05, + "loss": 0.5908, + "step": 938 + }, + { + "epoch": 5.365714285714286, + "grad_norm": 26.27269744873047, + "learning_rate": 4.95936507936508e-05, + "loss": 0.8872, + "step": 939 + }, + { + "epoch": 5.371428571428572, + "grad_norm": 30.581790924072266, + "learning_rate": 4.9587301587301585e-05, + "loss": 0.765, + "step": 940 + }, + { + "epoch": 5.377142857142857, + "grad_norm": 43.756492614746094, + "learning_rate": 4.9580952380952384e-05, + "loss": 1.0176, + "step": 941 + }, + { + "epoch": 5.382857142857143, + "grad_norm": 31.745285034179688, + "learning_rate": 4.957460317460318e-05, + "loss": 0.6567, + "step": 942 + }, + { + "epoch": 5.388571428571429, + "grad_norm": 29.369361877441406, + "learning_rate": 4.956825396825397e-05, + "loss": 0.9612, + "step": 943 + }, + { + "epoch": 5.394285714285714, + "grad_norm": 38.0923957824707, + "learning_rate": 4.956190476190476e-05, + "loss": 1.0614, + "step": 944 + }, + { + "epoch": 5.4, + "grad_norm": 36.38419723510742, + "learning_rate": 4.955555555555556e-05, + "loss": 0.8156, + "step": 945 + }, + { + "epoch": 5.405714285714286, + "grad_norm": 47.70192337036133, + "learning_rate": 4.9549206349206354e-05, + "loss": 0.8118, + "step": 946 + }, + { + "epoch": 5.411428571428571, + "grad_norm": 48.491085052490234, + "learning_rate": 4.954285714285715e-05, + "loss": 0.9573, + "step": 947 + }, + { + "epoch": 5.417142857142857, + "grad_norm": 59.52389907836914, + "learning_rate": 4.953650793650794e-05, + "loss": 0.813, + "step": 948 + }, + { + "epoch": 5.422857142857143, + "grad_norm": 39.79739761352539, + "learning_rate": 4.953015873015873e-05, + "loss": 0.7645, + "step": 949 + }, + { + "epoch": 5.428571428571429, + "grad_norm": 45.72932052612305, + "learning_rate": 4.9523809523809525e-05, + "loss": 0.6376, + "step": 950 + }, + { + "epoch": 5.434285714285714, + "grad_norm": 34.376991271972656, + "learning_rate": 4.951746031746032e-05, + "loss": 0.7663, + "step": 951 + }, + { + "epoch": 5.44, + "grad_norm": 22.378835678100586, + "learning_rate": 4.951111111111112e-05, + "loss": 0.7292, + "step": 952 + }, + { + "epoch": 5.445714285714286, + "grad_norm": 1413.2232666015625, + "learning_rate": 4.950476190476191e-05, + "loss": 0.6517, + "step": 953 + }, + { + "epoch": 5.451428571428571, + "grad_norm": 52.54072189331055, + "learning_rate": 4.94984126984127e-05, + "loss": 0.7968, + "step": 954 + }, + { + "epoch": 5.457142857142857, + "grad_norm": 53.125953674316406, + "learning_rate": 4.9492063492063495e-05, + "loss": 0.753, + "step": 955 + }, + { + "epoch": 5.462857142857143, + "grad_norm": 44.3760986328125, + "learning_rate": 4.9485714285714294e-05, + "loss": 0.6988, + "step": 956 + }, + { + "epoch": 5.468571428571429, + "grad_norm": 38.103187561035156, + "learning_rate": 4.947936507936508e-05, + "loss": 0.9433, + "step": 957 + }, + { + "epoch": 5.474285714285714, + "grad_norm": 57.01421356201172, + "learning_rate": 4.947301587301588e-05, + "loss": 0.6795, + "step": 958 + }, + { + "epoch": 5.48, + "grad_norm": 47.392242431640625, + "learning_rate": 4.9466666666666665e-05, + "loss": 0.7837, + "step": 959 + }, + { + "epoch": 5.485714285714286, + "grad_norm": 21.470067977905273, + "learning_rate": 4.9460317460317465e-05, + "loss": 1.0356, + "step": 960 + }, + { + "epoch": 5.491428571428571, + "grad_norm": 28.541791915893555, + "learning_rate": 4.945396825396826e-05, + "loss": 0.6212, + "step": 961 + }, + { + "epoch": 5.497142857142857, + "grad_norm": 37.67040252685547, + "learning_rate": 4.944761904761905e-05, + "loss": 0.6977, + "step": 962 + }, + { + "epoch": 5.502857142857143, + "grad_norm": 67.51469421386719, + "learning_rate": 4.944126984126984e-05, + "loss": 0.8533, + "step": 963 + }, + { + "epoch": 5.508571428571429, + "grad_norm": 78.92288208007812, + "learning_rate": 4.943492063492064e-05, + "loss": 0.7135, + "step": 964 + }, + { + "epoch": 5.514285714285714, + "grad_norm": 60.324790954589844, + "learning_rate": 4.942857142857143e-05, + "loss": 1.0517, + "step": 965 + }, + { + "epoch": 5.52, + "grad_norm": 66.66876220703125, + "learning_rate": 4.942222222222223e-05, + "loss": 0.7924, + "step": 966 + }, + { + "epoch": 5.525714285714286, + "grad_norm": 55.20885467529297, + "learning_rate": 4.941587301587302e-05, + "loss": 1.2296, + "step": 967 + }, + { + "epoch": 5.531428571428571, + "grad_norm": 40.8668212890625, + "learning_rate": 4.940952380952381e-05, + "loss": 0.8239, + "step": 968 + }, + { + "epoch": 5.537142857142857, + "grad_norm": 37.452362060546875, + "learning_rate": 4.9403174603174605e-05, + "loss": 0.665, + "step": 969 + }, + { + "epoch": 5.542857142857143, + "grad_norm": 78.19581604003906, + "learning_rate": 4.93968253968254e-05, + "loss": 0.9963, + "step": 970 + }, + { + "epoch": 5.548571428571429, + "grad_norm": 43.955406188964844, + "learning_rate": 4.939047619047619e-05, + "loss": 0.674, + "step": 971 + }, + { + "epoch": 5.554285714285714, + "grad_norm": 257.32330322265625, + "learning_rate": 4.938412698412699e-05, + "loss": 0.9603, + "step": 972 + }, + { + "epoch": 5.5600000000000005, + "grad_norm": 97.81949615478516, + "learning_rate": 4.9377777777777776e-05, + "loss": 0.824, + "step": 973 + }, + { + "epoch": 5.565714285714286, + "grad_norm": 66.90892028808594, + "learning_rate": 4.9371428571428575e-05, + "loss": 0.8135, + "step": 974 + }, + { + "epoch": 5.571428571428571, + "grad_norm": 49.98130416870117, + "learning_rate": 4.936507936507937e-05, + "loss": 1.0069, + "step": 975 + }, + { + "epoch": 5.577142857142857, + "grad_norm": 42.15127944946289, + "learning_rate": 4.935873015873016e-05, + "loss": 0.9639, + "step": 976 + }, + { + "epoch": 5.582857142857143, + "grad_norm": 18.45213508605957, + "learning_rate": 4.935238095238095e-05, + "loss": 0.7923, + "step": 977 + }, + { + "epoch": 5.588571428571429, + "grad_norm": 28.863927841186523, + "learning_rate": 4.934603174603175e-05, + "loss": 0.7692, + "step": 978 + }, + { + "epoch": 5.594285714285714, + "grad_norm": 24.05852508544922, + "learning_rate": 4.933968253968254e-05, + "loss": 0.7133, + "step": 979 + }, + { + "epoch": 5.6, + "grad_norm": 19.306772232055664, + "learning_rate": 4.933333333333334e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 5.605714285714286, + "grad_norm": 41.536617279052734, + "learning_rate": 4.932698412698413e-05, + "loss": 0.829, + "step": 981 + }, + { + "epoch": 5.611428571428571, + "grad_norm": 48.1365966796875, + "learning_rate": 4.932063492063492e-05, + "loss": 0.9519, + "step": 982 + }, + { + "epoch": 5.617142857142857, + "grad_norm": 32.03211975097656, + "learning_rate": 4.9314285714285716e-05, + "loss": 0.7137, + "step": 983 + }, + { + "epoch": 5.622857142857143, + "grad_norm": 26.54397964477539, + "learning_rate": 4.930793650793651e-05, + "loss": 0.8798, + "step": 984 + }, + { + "epoch": 5.628571428571428, + "grad_norm": 66.23500061035156, + "learning_rate": 4.930158730158731e-05, + "loss": 0.7708, + "step": 985 + }, + { + "epoch": 5.634285714285714, + "grad_norm": 82.78375244140625, + "learning_rate": 4.92952380952381e-05, + "loss": 0.8031, + "step": 986 + }, + { + "epoch": 5.64, + "grad_norm": 69.4685287475586, + "learning_rate": 4.928888888888889e-05, + "loss": 0.9085, + "step": 987 + }, + { + "epoch": 5.645714285714286, + "grad_norm": 41.04307556152344, + "learning_rate": 4.9282539682539685e-05, + "loss": 0.6301, + "step": 988 + }, + { + "epoch": 5.651428571428571, + "grad_norm": 96.05631256103516, + "learning_rate": 4.927619047619048e-05, + "loss": 0.7981, + "step": 989 + }, + { + "epoch": 5.6571428571428575, + "grad_norm": 37.76736068725586, + "learning_rate": 4.926984126984127e-05, + "loss": 0.7604, + "step": 990 + }, + { + "epoch": 5.662857142857143, + "grad_norm": 64.40799713134766, + "learning_rate": 4.926349206349207e-05, + "loss": 0.8603, + "step": 991 + }, + { + "epoch": 5.668571428571429, + "grad_norm": 22.061336517333984, + "learning_rate": 4.9257142857142856e-05, + "loss": 0.8048, + "step": 992 + }, + { + "epoch": 5.674285714285714, + "grad_norm": 46.94278335571289, + "learning_rate": 4.9250793650793655e-05, + "loss": 0.7711, + "step": 993 + }, + { + "epoch": 5.68, + "grad_norm": 47.216609954833984, + "learning_rate": 4.924444444444445e-05, + "loss": 1.5314, + "step": 994 + }, + { + "epoch": 5.685714285714286, + "grad_norm": 24.833250045776367, + "learning_rate": 4.923809523809524e-05, + "loss": 0.6962, + "step": 995 + }, + { + "epoch": 5.691428571428571, + "grad_norm": 115.3731918334961, + "learning_rate": 4.923174603174603e-05, + "loss": 0.9526, + "step": 996 + }, + { + "epoch": 5.6971428571428575, + "grad_norm": 55.14940643310547, + "learning_rate": 4.922539682539683e-05, + "loss": 0.7778, + "step": 997 + }, + { + "epoch": 5.702857142857143, + "grad_norm": 25.706256866455078, + "learning_rate": 4.921904761904762e-05, + "loss": 0.6183, + "step": 998 + }, + { + "epoch": 5.708571428571428, + "grad_norm": 35.29422378540039, + "learning_rate": 4.921269841269842e-05, + "loss": 0.6405, + "step": 999 + }, + { + "epoch": 5.714285714285714, + "grad_norm": 341.161865234375, + "learning_rate": 4.9206349206349204e-05, + "loss": 0.7592, + "step": 1000 + }, + { + "epoch": 5.72, + "grad_norm": 60.03837585449219, + "learning_rate": 4.92e-05, + "loss": 0.5991, + "step": 1001 + }, + { + "epoch": 5.725714285714286, + "grad_norm": 41.89161682128906, + "learning_rate": 4.9193650793650796e-05, + "loss": 0.8142, + "step": 1002 + }, + { + "epoch": 5.731428571428571, + "grad_norm": 34.78620910644531, + "learning_rate": 4.918730158730159e-05, + "loss": 0.7668, + "step": 1003 + }, + { + "epoch": 5.737142857142857, + "grad_norm": 34.432884216308594, + "learning_rate": 4.918095238095238e-05, + "loss": 0.7478, + "step": 1004 + }, + { + "epoch": 5.742857142857143, + "grad_norm": 49.911746978759766, + "learning_rate": 4.917460317460318e-05, + "loss": 0.8738, + "step": 1005 + }, + { + "epoch": 5.748571428571428, + "grad_norm": 46.06341552734375, + "learning_rate": 4.9168253968253966e-05, + "loss": 0.6954, + "step": 1006 + }, + { + "epoch": 5.7542857142857144, + "grad_norm": 55.89427947998047, + "learning_rate": 4.9161904761904766e-05, + "loss": 0.5505, + "step": 1007 + }, + { + "epoch": 5.76, + "grad_norm": 36.81814193725586, + "learning_rate": 4.915555555555556e-05, + "loss": 0.6636, + "step": 1008 + }, + { + "epoch": 5.765714285714286, + "grad_norm": 57.308223724365234, + "learning_rate": 4.914920634920635e-05, + "loss": 0.7844, + "step": 1009 + }, + { + "epoch": 5.771428571428571, + "grad_norm": 55.2410774230957, + "learning_rate": 4.9142857142857144e-05, + "loss": 0.7025, + "step": 1010 + }, + { + "epoch": 5.777142857142858, + "grad_norm": 47.4276123046875, + "learning_rate": 4.9136507936507936e-05, + "loss": 0.8052, + "step": 1011 + }, + { + "epoch": 5.782857142857143, + "grad_norm": 44.55537033081055, + "learning_rate": 4.913015873015873e-05, + "loss": 0.759, + "step": 1012 + }, + { + "epoch": 5.788571428571428, + "grad_norm": 425.6323547363281, + "learning_rate": 4.912380952380953e-05, + "loss": 0.7912, + "step": 1013 + }, + { + "epoch": 5.7942857142857145, + "grad_norm": 22.003156661987305, + "learning_rate": 4.9117460317460314e-05, + "loss": 0.7615, + "step": 1014 + }, + { + "epoch": 5.8, + "grad_norm": 43.3438720703125, + "learning_rate": 4.9111111111111114e-05, + "loss": 0.7115, + "step": 1015 + }, + { + "epoch": 5.805714285714286, + "grad_norm": 77.6767578125, + "learning_rate": 4.9104761904761906e-05, + "loss": 0.9186, + "step": 1016 + }, + { + "epoch": 5.811428571428571, + "grad_norm": 57.89418029785156, + "learning_rate": 4.90984126984127e-05, + "loss": 0.759, + "step": 1017 + }, + { + "epoch": 5.817142857142857, + "grad_norm": 21.17589569091797, + "learning_rate": 4.909206349206349e-05, + "loss": 0.9768, + "step": 1018 + }, + { + "epoch": 5.822857142857143, + "grad_norm": 33.41554641723633, + "learning_rate": 4.908571428571429e-05, + "loss": 0.8376, + "step": 1019 + }, + { + "epoch": 5.828571428571428, + "grad_norm": 21.341121673583984, + "learning_rate": 4.9079365079365084e-05, + "loss": 0.6253, + "step": 1020 + }, + { + "epoch": 5.8342857142857145, + "grad_norm": 79.05905151367188, + "learning_rate": 4.9073015873015876e-05, + "loss": 0.8679, + "step": 1021 + }, + { + "epoch": 5.84, + "grad_norm": 34.44978332519531, + "learning_rate": 4.906666666666667e-05, + "loss": 1.0964, + "step": 1022 + }, + { + "epoch": 5.845714285714286, + "grad_norm": 27.523412704467773, + "learning_rate": 4.906031746031746e-05, + "loss": 0.5364, + "step": 1023 + }, + { + "epoch": 5.851428571428571, + "grad_norm": 47.8846435546875, + "learning_rate": 4.905396825396826e-05, + "loss": 0.7681, + "step": 1024 + }, + { + "epoch": 5.857142857142857, + "grad_norm": 60.275428771972656, + "learning_rate": 4.904761904761905e-05, + "loss": 0.7527, + "step": 1025 + }, + { + "epoch": 5.862857142857143, + "grad_norm": 31.929855346679688, + "learning_rate": 4.9041269841269846e-05, + "loss": 0.8223, + "step": 1026 + }, + { + "epoch": 5.868571428571428, + "grad_norm": 56.61876678466797, + "learning_rate": 4.903492063492064e-05, + "loss": 0.7186, + "step": 1027 + }, + { + "epoch": 5.8742857142857146, + "grad_norm": 33.72767639160156, + "learning_rate": 4.902857142857143e-05, + "loss": 0.8864, + "step": 1028 + }, + { + "epoch": 5.88, + "grad_norm": 52.361454010009766, + "learning_rate": 4.9022222222222224e-05, + "loss": 0.5077, + "step": 1029 + }, + { + "epoch": 5.885714285714286, + "grad_norm": 18.788724899291992, + "learning_rate": 4.9015873015873024e-05, + "loss": 0.6581, + "step": 1030 + }, + { + "epoch": 5.8914285714285715, + "grad_norm": 79.2744369506836, + "learning_rate": 4.900952380952381e-05, + "loss": 0.8661, + "step": 1031 + }, + { + "epoch": 5.897142857142857, + "grad_norm": 239.77687072753906, + "learning_rate": 4.900317460317461e-05, + "loss": 0.8039, + "step": 1032 + }, + { + "epoch": 5.902857142857143, + "grad_norm": 65.74775695800781, + "learning_rate": 4.8996825396825395e-05, + "loss": 0.7899, + "step": 1033 + }, + { + "epoch": 5.908571428571428, + "grad_norm": 34.65435791015625, + "learning_rate": 4.8990476190476194e-05, + "loss": 0.6254, + "step": 1034 + }, + { + "epoch": 5.914285714285715, + "grad_norm": 137.52218627929688, + "learning_rate": 4.898412698412699e-05, + "loss": 0.8915, + "step": 1035 + }, + { + "epoch": 5.92, + "grad_norm": 54.810523986816406, + "learning_rate": 4.897777777777778e-05, + "loss": 0.8686, + "step": 1036 + }, + { + "epoch": 5.925714285714285, + "grad_norm": 30.277795791625977, + "learning_rate": 4.897142857142857e-05, + "loss": 0.7904, + "step": 1037 + }, + { + "epoch": 5.9314285714285715, + "grad_norm": 42.795936584472656, + "learning_rate": 4.896507936507937e-05, + "loss": 0.6607, + "step": 1038 + }, + { + "epoch": 5.937142857142857, + "grad_norm": 53.916908264160156, + "learning_rate": 4.895873015873016e-05, + "loss": 0.6555, + "step": 1039 + }, + { + "epoch": 5.942857142857143, + "grad_norm": 45.74040985107422, + "learning_rate": 4.895238095238096e-05, + "loss": 0.6064, + "step": 1040 + }, + { + "epoch": 5.948571428571428, + "grad_norm": 40.435882568359375, + "learning_rate": 4.894603174603175e-05, + "loss": 0.7522, + "step": 1041 + }, + { + "epoch": 5.954285714285715, + "grad_norm": 61.07969665527344, + "learning_rate": 4.893968253968254e-05, + "loss": 0.9887, + "step": 1042 + }, + { + "epoch": 5.96, + "grad_norm": 46.681976318359375, + "learning_rate": 4.8933333333333335e-05, + "loss": 0.8548, + "step": 1043 + }, + { + "epoch": 5.965714285714286, + "grad_norm": 35.48418045043945, + "learning_rate": 4.892698412698413e-05, + "loss": 0.6326, + "step": 1044 + }, + { + "epoch": 5.9714285714285715, + "grad_norm": 56.91579818725586, + "learning_rate": 4.892063492063492e-05, + "loss": 0.8085, + "step": 1045 + }, + { + "epoch": 5.977142857142857, + "grad_norm": 26.985816955566406, + "learning_rate": 4.891428571428572e-05, + "loss": 0.8397, + "step": 1046 + }, + { + "epoch": 5.982857142857143, + "grad_norm": 48.0628662109375, + "learning_rate": 4.8907936507936505e-05, + "loss": 0.78, + "step": 1047 + }, + { + "epoch": 5.988571428571428, + "grad_norm": 59.83071517944336, + "learning_rate": 4.8901587301587305e-05, + "loss": 0.7693, + "step": 1048 + }, + { + "epoch": 5.994285714285715, + "grad_norm": 23.15542984008789, + "learning_rate": 4.88952380952381e-05, + "loss": 0.6467, + "step": 1049 + }, + { + "epoch": 6.0, + "grad_norm": 50.857933044433594, + "learning_rate": 4.888888888888889e-05, + "loss": 0.7164, + "step": 1050 + }, + { + "epoch": 6.0, + "eval_classes": 0, + "eval_loss": 0.8369960188865662, + "eval_map": 0.852, + "eval_map_50": 0.9449, + "eval_map_75": 0.924, + "eval_map_large": 0.852, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.852, + "eval_map_small": -1.0, + "eval_mar_1": 0.7546, + "eval_mar_10": 0.9413, + "eval_mar_100": 0.9635, + "eval_mar_100_per_class": 0.9635, + "eval_mar_large": 0.9635, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 17.0881, + "eval_samples_per_second": 17.205, + "eval_steps_per_second": 2.165, + "step": 1050 + }, + { + "epoch": 6.005714285714285, + "grad_norm": 24.954538345336914, + "learning_rate": 4.888253968253968e-05, + "loss": 0.7493, + "step": 1051 + }, + { + "epoch": 6.011428571428572, + "grad_norm": 23.112192153930664, + "learning_rate": 4.887619047619048e-05, + "loss": 0.6879, + "step": 1052 + }, + { + "epoch": 6.017142857142857, + "grad_norm": 102.9321517944336, + "learning_rate": 4.886984126984127e-05, + "loss": 0.7954, + "step": 1053 + }, + { + "epoch": 6.022857142857143, + "grad_norm": 33.70575714111328, + "learning_rate": 4.886349206349207e-05, + "loss": 0.8101, + "step": 1054 + }, + { + "epoch": 6.0285714285714285, + "grad_norm": 39.177547454833984, + "learning_rate": 4.885714285714286e-05, + "loss": 0.711, + "step": 1055 + }, + { + "epoch": 6.034285714285715, + "grad_norm": 55.89052963256836, + "learning_rate": 4.885079365079365e-05, + "loss": 0.8373, + "step": 1056 + }, + { + "epoch": 6.04, + "grad_norm": 37.87918472290039, + "learning_rate": 4.8844444444444445e-05, + "loss": 0.6406, + "step": 1057 + }, + { + "epoch": 6.045714285714285, + "grad_norm": 37.094642639160156, + "learning_rate": 4.883809523809524e-05, + "loss": 0.853, + "step": 1058 + }, + { + "epoch": 6.051428571428572, + "grad_norm": 52.7163200378418, + "learning_rate": 4.883174603174604e-05, + "loss": 0.5881, + "step": 1059 + }, + { + "epoch": 6.057142857142857, + "grad_norm": 31.145267486572266, + "learning_rate": 4.882539682539683e-05, + "loss": 0.6518, + "step": 1060 + }, + { + "epoch": 6.062857142857143, + "grad_norm": 68.63578796386719, + "learning_rate": 4.881904761904762e-05, + "loss": 0.7544, + "step": 1061 + }, + { + "epoch": 6.0685714285714285, + "grad_norm": 82.92304992675781, + "learning_rate": 4.8812698412698415e-05, + "loss": 0.59, + "step": 1062 + }, + { + "epoch": 6.074285714285715, + "grad_norm": 63.362648010253906, + "learning_rate": 4.8806349206349214e-05, + "loss": 0.6871, + "step": 1063 + }, + { + "epoch": 6.08, + "grad_norm": 52.70489501953125, + "learning_rate": 4.88e-05, + "loss": 0.649, + "step": 1064 + }, + { + "epoch": 6.085714285714285, + "grad_norm": 47.088253021240234, + "learning_rate": 4.87936507936508e-05, + "loss": 0.634, + "step": 1065 + }, + { + "epoch": 6.091428571428572, + "grad_norm": 39.26316833496094, + "learning_rate": 4.8787301587301586e-05, + "loss": 0.7083, + "step": 1066 + }, + { + "epoch": 6.097142857142857, + "grad_norm": 36.16307830810547, + "learning_rate": 4.8780952380952385e-05, + "loss": 0.776, + "step": 1067 + }, + { + "epoch": 6.102857142857143, + "grad_norm": 31.930225372314453, + "learning_rate": 4.877460317460318e-05, + "loss": 0.7513, + "step": 1068 + }, + { + "epoch": 6.1085714285714285, + "grad_norm": 77.57910919189453, + "learning_rate": 4.876825396825397e-05, + "loss": 0.5557, + "step": 1069 + }, + { + "epoch": 6.114285714285714, + "grad_norm": 57.64714431762695, + "learning_rate": 4.876190476190476e-05, + "loss": 0.644, + "step": 1070 + }, + { + "epoch": 6.12, + "grad_norm": 162.1901092529297, + "learning_rate": 4.875555555555556e-05, + "loss": 0.6925, + "step": 1071 + }, + { + "epoch": 6.1257142857142854, + "grad_norm": 19.613445281982422, + "learning_rate": 4.874920634920635e-05, + "loss": 0.6037, + "step": 1072 + }, + { + "epoch": 6.131428571428572, + "grad_norm": 41.59284973144531, + "learning_rate": 4.874285714285715e-05, + "loss": 1.1079, + "step": 1073 + }, + { + "epoch": 6.137142857142857, + "grad_norm": 79.42727661132812, + "learning_rate": 4.873650793650794e-05, + "loss": 0.4649, + "step": 1074 + }, + { + "epoch": 6.142857142857143, + "grad_norm": 26.3876953125, + "learning_rate": 4.873015873015873e-05, + "loss": 0.5414, + "step": 1075 + }, + { + "epoch": 6.148571428571429, + "grad_norm": 132.17300415039062, + "learning_rate": 4.8723809523809526e-05, + "loss": 0.9906, + "step": 1076 + }, + { + "epoch": 6.154285714285714, + "grad_norm": 35.66273880004883, + "learning_rate": 4.871746031746032e-05, + "loss": 0.8514, + "step": 1077 + }, + { + "epoch": 6.16, + "grad_norm": 70.73888397216797, + "learning_rate": 4.871111111111111e-05, + "loss": 0.6043, + "step": 1078 + }, + { + "epoch": 6.1657142857142855, + "grad_norm": 12.699053764343262, + "learning_rate": 4.870476190476191e-05, + "loss": 0.6757, + "step": 1079 + }, + { + "epoch": 6.171428571428572, + "grad_norm": 38.0023078918457, + "learning_rate": 4.8698412698412696e-05, + "loss": 0.5962, + "step": 1080 + }, + { + "epoch": 6.177142857142857, + "grad_norm": 39.17434310913086, + "learning_rate": 4.8692063492063495e-05, + "loss": 0.8336, + "step": 1081 + }, + { + "epoch": 6.182857142857143, + "grad_norm": 74.63424682617188, + "learning_rate": 4.868571428571429e-05, + "loss": 0.9345, + "step": 1082 + }, + { + "epoch": 6.188571428571429, + "grad_norm": 53.93844223022461, + "learning_rate": 4.867936507936508e-05, + "loss": 1.0408, + "step": 1083 + }, + { + "epoch": 6.194285714285714, + "grad_norm": 66.69645690917969, + "learning_rate": 4.867301587301587e-05, + "loss": 0.8289, + "step": 1084 + }, + { + "epoch": 6.2, + "grad_norm": 96.8067626953125, + "learning_rate": 4.866666666666667e-05, + "loss": 1.3159, + "step": 1085 + }, + { + "epoch": 6.2057142857142855, + "grad_norm": 59.63151168823242, + "learning_rate": 4.866031746031746e-05, + "loss": 0.8477, + "step": 1086 + }, + { + "epoch": 6.211428571428572, + "grad_norm": 45.384647369384766, + "learning_rate": 4.865396825396826e-05, + "loss": 0.9704, + "step": 1087 + }, + { + "epoch": 6.217142857142857, + "grad_norm": 54.9943733215332, + "learning_rate": 4.864761904761905e-05, + "loss": 0.8903, + "step": 1088 + }, + { + "epoch": 6.222857142857142, + "grad_norm": 56.89737319946289, + "learning_rate": 4.864126984126984e-05, + "loss": 0.6687, + "step": 1089 + }, + { + "epoch": 6.228571428571429, + "grad_norm": 32.90390396118164, + "learning_rate": 4.8634920634920636e-05, + "loss": 0.8242, + "step": 1090 + }, + { + "epoch": 6.234285714285714, + "grad_norm": 46.840389251708984, + "learning_rate": 4.862857142857143e-05, + "loss": 0.7521, + "step": 1091 + }, + { + "epoch": 6.24, + "grad_norm": 88.7275161743164, + "learning_rate": 4.862222222222222e-05, + "loss": 0.8655, + "step": 1092 + }, + { + "epoch": 6.2457142857142856, + "grad_norm": 58.718074798583984, + "learning_rate": 4.861587301587302e-05, + "loss": 0.7266, + "step": 1093 + }, + { + "epoch": 6.251428571428572, + "grad_norm": 43.9836311340332, + "learning_rate": 4.860952380952381e-05, + "loss": 0.6816, + "step": 1094 + }, + { + "epoch": 6.257142857142857, + "grad_norm": 38.8942985534668, + "learning_rate": 4.8603174603174606e-05, + "loss": 0.6908, + "step": 1095 + }, + { + "epoch": 6.2628571428571425, + "grad_norm": 93.62848663330078, + "learning_rate": 4.85968253968254e-05, + "loss": 0.6644, + "step": 1096 + }, + { + "epoch": 6.268571428571429, + "grad_norm": 58.32928466796875, + "learning_rate": 4.859047619047619e-05, + "loss": 0.6923, + "step": 1097 + }, + { + "epoch": 6.274285714285714, + "grad_norm": 46.724342346191406, + "learning_rate": 4.858412698412699e-05, + "loss": 0.6886, + "step": 1098 + }, + { + "epoch": 6.28, + "grad_norm": 61.18104934692383, + "learning_rate": 4.8577777777777776e-05, + "loss": 0.6711, + "step": 1099 + }, + { + "epoch": 6.285714285714286, + "grad_norm": 25.95333480834961, + "learning_rate": 4.8571428571428576e-05, + "loss": 0.9204, + "step": 1100 + }, + { + "epoch": 6.291428571428572, + "grad_norm": 44.279659271240234, + "learning_rate": 4.856507936507937e-05, + "loss": 0.468, + "step": 1101 + }, + { + "epoch": 6.297142857142857, + "grad_norm": 58.290977478027344, + "learning_rate": 4.855873015873016e-05, + "loss": 1.0472, + "step": 1102 + }, + { + "epoch": 6.3028571428571425, + "grad_norm": 19.661542892456055, + "learning_rate": 4.8552380952380954e-05, + "loss": 0.6991, + "step": 1103 + }, + { + "epoch": 6.308571428571429, + "grad_norm": 56.441322326660156, + "learning_rate": 4.854603174603175e-05, + "loss": 0.8712, + "step": 1104 + }, + { + "epoch": 6.314285714285714, + "grad_norm": 37.64546203613281, + "learning_rate": 4.853968253968254e-05, + "loss": 0.6974, + "step": 1105 + }, + { + "epoch": 6.32, + "grad_norm": 27.978652954101562, + "learning_rate": 4.853333333333334e-05, + "loss": 0.6804, + "step": 1106 + }, + { + "epoch": 6.325714285714286, + "grad_norm": 31.64655303955078, + "learning_rate": 4.852698412698413e-05, + "loss": 0.5039, + "step": 1107 + }, + { + "epoch": 6.331428571428571, + "grad_norm": 48.66095733642578, + "learning_rate": 4.8520634920634924e-05, + "loss": 0.4374, + "step": 1108 + }, + { + "epoch": 6.337142857142857, + "grad_norm": 35.47930908203125, + "learning_rate": 4.8514285714285716e-05, + "loss": 0.6162, + "step": 1109 + }, + { + "epoch": 6.3428571428571425, + "grad_norm": 22.606897354125977, + "learning_rate": 4.850793650793651e-05, + "loss": 0.8655, + "step": 1110 + }, + { + "epoch": 6.348571428571429, + "grad_norm": 46.42298126220703, + "learning_rate": 4.85015873015873e-05, + "loss": 0.4887, + "step": 1111 + }, + { + "epoch": 6.354285714285714, + "grad_norm": 40.30084991455078, + "learning_rate": 4.84952380952381e-05, + "loss": 0.8729, + "step": 1112 + }, + { + "epoch": 6.36, + "grad_norm": 18.799062728881836, + "learning_rate": 4.848888888888889e-05, + "loss": 0.3812, + "step": 1113 + }, + { + "epoch": 6.365714285714286, + "grad_norm": 47.63445281982422, + "learning_rate": 4.8482539682539686e-05, + "loss": 0.5685, + "step": 1114 + }, + { + "epoch": 6.371428571428572, + "grad_norm": 102.43451690673828, + "learning_rate": 4.847619047619048e-05, + "loss": 1.2137, + "step": 1115 + }, + { + "epoch": 6.377142857142857, + "grad_norm": 72.0689926147461, + "learning_rate": 4.846984126984127e-05, + "loss": 0.7643, + "step": 1116 + }, + { + "epoch": 6.382857142857143, + "grad_norm": 44.23721694946289, + "learning_rate": 4.8463492063492064e-05, + "loss": 0.6036, + "step": 1117 + }, + { + "epoch": 6.388571428571429, + "grad_norm": 25.187232971191406, + "learning_rate": 4.8457142857142864e-05, + "loss": 0.6362, + "step": 1118 + }, + { + "epoch": 6.394285714285714, + "grad_norm": 32.348670959472656, + "learning_rate": 4.845079365079365e-05, + "loss": 0.8032, + "step": 1119 + }, + { + "epoch": 6.4, + "grad_norm": 19.9083309173584, + "learning_rate": 4.844444444444445e-05, + "loss": 0.6215, + "step": 1120 + }, + { + "epoch": 6.405714285714286, + "grad_norm": 27.486642837524414, + "learning_rate": 4.8438095238095235e-05, + "loss": 0.9652, + "step": 1121 + }, + { + "epoch": 6.411428571428571, + "grad_norm": 46.83481979370117, + "learning_rate": 4.8431746031746034e-05, + "loss": 0.5544, + "step": 1122 + }, + { + "epoch": 6.417142857142857, + "grad_norm": 43.2258415222168, + "learning_rate": 4.842539682539683e-05, + "loss": 0.6362, + "step": 1123 + }, + { + "epoch": 6.422857142857143, + "grad_norm": 25.14783477783203, + "learning_rate": 4.841904761904762e-05, + "loss": 1.0612, + "step": 1124 + }, + { + "epoch": 6.428571428571429, + "grad_norm": 39.89846420288086, + "learning_rate": 4.841269841269841e-05, + "loss": 0.6556, + "step": 1125 + }, + { + "epoch": 6.434285714285714, + "grad_norm": 31.60659408569336, + "learning_rate": 4.840634920634921e-05, + "loss": 0.7325, + "step": 1126 + }, + { + "epoch": 6.44, + "grad_norm": 37.08771514892578, + "learning_rate": 4.8400000000000004e-05, + "loss": 0.783, + "step": 1127 + }, + { + "epoch": 6.445714285714286, + "grad_norm": 20.82538604736328, + "learning_rate": 4.83936507936508e-05, + "loss": 0.6206, + "step": 1128 + }, + { + "epoch": 6.451428571428571, + "grad_norm": 27.13603973388672, + "learning_rate": 4.838730158730159e-05, + "loss": 0.7035, + "step": 1129 + }, + { + "epoch": 6.457142857142857, + "grad_norm": 55.448936462402344, + "learning_rate": 4.838095238095238e-05, + "loss": 0.6635, + "step": 1130 + }, + { + "epoch": 6.462857142857143, + "grad_norm": 42.173458099365234, + "learning_rate": 4.8374603174603175e-05, + "loss": 1.2021, + "step": 1131 + }, + { + "epoch": 6.468571428571429, + "grad_norm": 288.70526123046875, + "learning_rate": 4.836825396825397e-05, + "loss": 0.7537, + "step": 1132 + }, + { + "epoch": 6.474285714285714, + "grad_norm": 64.1880874633789, + "learning_rate": 4.836190476190477e-05, + "loss": 0.7211, + "step": 1133 + }, + { + "epoch": 6.48, + "grad_norm": 31.468538284301758, + "learning_rate": 4.835555555555556e-05, + "loss": 0.9399, + "step": 1134 + }, + { + "epoch": 6.485714285714286, + "grad_norm": 32.73714065551758, + "learning_rate": 4.834920634920635e-05, + "loss": 0.6802, + "step": 1135 + }, + { + "epoch": 6.491428571428571, + "grad_norm": 45.5434455871582, + "learning_rate": 4.8342857142857145e-05, + "loss": 0.6964, + "step": 1136 + }, + { + "epoch": 6.497142857142857, + "grad_norm": 46.5738410949707, + "learning_rate": 4.8336507936507944e-05, + "loss": 0.639, + "step": 1137 + }, + { + "epoch": 6.502857142857143, + "grad_norm": 39.77674865722656, + "learning_rate": 4.833015873015873e-05, + "loss": 0.7366, + "step": 1138 + }, + { + "epoch": 6.508571428571429, + "grad_norm": 28.013656616210938, + "learning_rate": 4.832380952380953e-05, + "loss": 0.9194, + "step": 1139 + }, + { + "epoch": 6.514285714285714, + "grad_norm": 41.1294059753418, + "learning_rate": 4.831746031746032e-05, + "loss": 0.797, + "step": 1140 + }, + { + "epoch": 6.52, + "grad_norm": 50.677650451660156, + "learning_rate": 4.8311111111111115e-05, + "loss": 0.9168, + "step": 1141 + }, + { + "epoch": 6.525714285714286, + "grad_norm": 43.122467041015625, + "learning_rate": 4.830476190476191e-05, + "loss": 0.605, + "step": 1142 + }, + { + "epoch": 6.531428571428571, + "grad_norm": 52.74139404296875, + "learning_rate": 4.82984126984127e-05, + "loss": 0.6699, + "step": 1143 + }, + { + "epoch": 6.537142857142857, + "grad_norm": 39.470733642578125, + "learning_rate": 4.829206349206349e-05, + "loss": 0.5968, + "step": 1144 + }, + { + "epoch": 6.542857142857143, + "grad_norm": 23.233463287353516, + "learning_rate": 4.828571428571429e-05, + "loss": 0.7266, + "step": 1145 + }, + { + "epoch": 6.548571428571429, + "grad_norm": 43.671607971191406, + "learning_rate": 4.827936507936508e-05, + "loss": 0.4698, + "step": 1146 + }, + { + "epoch": 6.554285714285714, + "grad_norm": 75.19342041015625, + "learning_rate": 4.827301587301588e-05, + "loss": 0.892, + "step": 1147 + }, + { + "epoch": 6.5600000000000005, + "grad_norm": 21.33753776550293, + "learning_rate": 4.826666666666667e-05, + "loss": 0.5585, + "step": 1148 + }, + { + "epoch": 6.565714285714286, + "grad_norm": 56.81877517700195, + "learning_rate": 4.826031746031746e-05, + "loss": 0.7324, + "step": 1149 + }, + { + "epoch": 6.571428571428571, + "grad_norm": 21.56536102294922, + "learning_rate": 4.8253968253968255e-05, + "loss": 0.639, + "step": 1150 + }, + { + "epoch": 6.577142857142857, + "grad_norm": 59.72079086303711, + "learning_rate": 4.8247619047619055e-05, + "loss": 0.8404, + "step": 1151 + }, + { + "epoch": 6.582857142857143, + "grad_norm": 52.377994537353516, + "learning_rate": 4.824126984126984e-05, + "loss": 0.7984, + "step": 1152 + }, + { + "epoch": 6.588571428571429, + "grad_norm": 62.65300369262695, + "learning_rate": 4.823492063492064e-05, + "loss": 0.5371, + "step": 1153 + }, + { + "epoch": 6.594285714285714, + "grad_norm": 44.0941047668457, + "learning_rate": 4.8228571428571426e-05, + "loss": 0.5253, + "step": 1154 + }, + { + "epoch": 6.6, + "grad_norm": 43.50336456298828, + "learning_rate": 4.8222222222222225e-05, + "loss": 0.8101, + "step": 1155 + }, + { + "epoch": 6.605714285714286, + "grad_norm": 37.503929138183594, + "learning_rate": 4.821587301587302e-05, + "loss": 0.804, + "step": 1156 + }, + { + "epoch": 6.611428571428571, + "grad_norm": 54.677154541015625, + "learning_rate": 4.820952380952381e-05, + "loss": 0.7564, + "step": 1157 + }, + { + "epoch": 6.617142857142857, + "grad_norm": 48.551334381103516, + "learning_rate": 4.82031746031746e-05, + "loss": 0.7482, + "step": 1158 + }, + { + "epoch": 6.622857142857143, + "grad_norm": 46.53056335449219, + "learning_rate": 4.81968253968254e-05, + "loss": 0.7155, + "step": 1159 + }, + { + "epoch": 6.628571428571428, + "grad_norm": 31.326919555664062, + "learning_rate": 4.819047619047619e-05, + "loss": 0.9375, + "step": 1160 + }, + { + "epoch": 6.634285714285714, + "grad_norm": 42.24070358276367, + "learning_rate": 4.818412698412699e-05, + "loss": 0.7436, + "step": 1161 + }, + { + "epoch": 6.64, + "grad_norm": 62.546688079833984, + "learning_rate": 4.817777777777778e-05, + "loss": 0.6693, + "step": 1162 + }, + { + "epoch": 6.645714285714286, + "grad_norm": 52.980587005615234, + "learning_rate": 4.817142857142857e-05, + "loss": 0.9501, + "step": 1163 + }, + { + "epoch": 6.651428571428571, + "grad_norm": 44.53245162963867, + "learning_rate": 4.8165079365079366e-05, + "loss": 0.6875, + "step": 1164 + }, + { + "epoch": 6.6571428571428575, + "grad_norm": 37.25103759765625, + "learning_rate": 4.815873015873016e-05, + "loss": 0.65, + "step": 1165 + }, + { + "epoch": 6.662857142857143, + "grad_norm": 36.22304153442383, + "learning_rate": 4.815238095238096e-05, + "loss": 0.9507, + "step": 1166 + }, + { + "epoch": 6.668571428571429, + "grad_norm": 36.808326721191406, + "learning_rate": 4.814603174603175e-05, + "loss": 0.4853, + "step": 1167 + }, + { + "epoch": 6.674285714285714, + "grad_norm": 37.148014068603516, + "learning_rate": 4.813968253968254e-05, + "loss": 0.6508, + "step": 1168 + }, + { + "epoch": 6.68, + "grad_norm": 37.951263427734375, + "learning_rate": 4.8133333333333336e-05, + "loss": 0.6891, + "step": 1169 + }, + { + "epoch": 6.685714285714286, + "grad_norm": 31.897668838500977, + "learning_rate": 4.812698412698413e-05, + "loss": 0.8311, + "step": 1170 + }, + { + "epoch": 6.691428571428571, + "grad_norm": 50.647037506103516, + "learning_rate": 4.812063492063492e-05, + "loss": 0.6032, + "step": 1171 + }, + { + "epoch": 6.6971428571428575, + "grad_norm": 41.20783996582031, + "learning_rate": 4.811428571428572e-05, + "loss": 0.5875, + "step": 1172 + }, + { + "epoch": 6.702857142857143, + "grad_norm": 33.35331726074219, + "learning_rate": 4.810793650793651e-05, + "loss": 0.6017, + "step": 1173 + }, + { + "epoch": 6.708571428571428, + "grad_norm": 25.426799774169922, + "learning_rate": 4.8101587301587305e-05, + "loss": 0.7326, + "step": 1174 + }, + { + "epoch": 6.714285714285714, + "grad_norm": 40.838165283203125, + "learning_rate": 4.80952380952381e-05, + "loss": 0.9523, + "step": 1175 + }, + { + "epoch": 6.72, + "grad_norm": 152.543701171875, + "learning_rate": 4.808888888888889e-05, + "loss": 0.7916, + "step": 1176 + }, + { + "epoch": 6.725714285714286, + "grad_norm": 51.32667922973633, + "learning_rate": 4.8082539682539683e-05, + "loss": 1.0948, + "step": 1177 + }, + { + "epoch": 6.731428571428571, + "grad_norm": 28.265817642211914, + "learning_rate": 4.807619047619048e-05, + "loss": 0.5807, + "step": 1178 + }, + { + "epoch": 6.737142857142857, + "grad_norm": 116.4324722290039, + "learning_rate": 4.806984126984127e-05, + "loss": 0.669, + "step": 1179 + }, + { + "epoch": 6.742857142857143, + "grad_norm": 34.159423828125, + "learning_rate": 4.806349206349207e-05, + "loss": 0.8612, + "step": 1180 + }, + { + "epoch": 6.748571428571428, + "grad_norm": 39.030296325683594, + "learning_rate": 4.805714285714286e-05, + "loss": 0.873, + "step": 1181 + }, + { + "epoch": 6.7542857142857144, + "grad_norm": 37.374820709228516, + "learning_rate": 4.805079365079365e-05, + "loss": 0.8782, + "step": 1182 + }, + { + "epoch": 6.76, + "grad_norm": 119.02398681640625, + "learning_rate": 4.8044444444444446e-05, + "loss": 0.7441, + "step": 1183 + }, + { + "epoch": 6.765714285714286, + "grad_norm": 50.47015380859375, + "learning_rate": 4.8038095238095245e-05, + "loss": 0.669, + "step": 1184 + }, + { + "epoch": 6.771428571428571, + "grad_norm": 28.232337951660156, + "learning_rate": 4.803174603174603e-05, + "loss": 0.9575, + "step": 1185 + }, + { + "epoch": 6.777142857142858, + "grad_norm": 48.09858322143555, + "learning_rate": 4.802539682539683e-05, + "loss": 0.6097, + "step": 1186 + }, + { + "epoch": 6.782857142857143, + "grad_norm": 75.99871063232422, + "learning_rate": 4.8019047619047617e-05, + "loss": 0.8402, + "step": 1187 + }, + { + "epoch": 6.788571428571428, + "grad_norm": 36.24739074707031, + "learning_rate": 4.8012698412698416e-05, + "loss": 0.54, + "step": 1188 + }, + { + "epoch": 6.7942857142857145, + "grad_norm": 66.97037506103516, + "learning_rate": 4.800634920634921e-05, + "loss": 0.651, + "step": 1189 + }, + { + "epoch": 6.8, + "grad_norm": 110.5335464477539, + "learning_rate": 4.8e-05, + "loss": 0.5993, + "step": 1190 + }, + { + "epoch": 6.805714285714286, + "grad_norm": 24.29607582092285, + "learning_rate": 4.7993650793650794e-05, + "loss": 0.5497, + "step": 1191 + }, + { + "epoch": 6.811428571428571, + "grad_norm": 72.72708892822266, + "learning_rate": 4.798730158730159e-05, + "loss": 0.4804, + "step": 1192 + }, + { + "epoch": 6.817142857142857, + "grad_norm": 39.02521514892578, + "learning_rate": 4.798095238095238e-05, + "loss": 0.6332, + "step": 1193 + }, + { + "epoch": 6.822857142857143, + "grad_norm": 71.21878814697266, + "learning_rate": 4.797460317460318e-05, + "loss": 0.5008, + "step": 1194 + }, + { + "epoch": 6.828571428571428, + "grad_norm": 52.14028549194336, + "learning_rate": 4.7968253968253964e-05, + "loss": 0.848, + "step": 1195 + }, + { + "epoch": 6.8342857142857145, + "grad_norm": 64.03514099121094, + "learning_rate": 4.7961904761904764e-05, + "loss": 0.726, + "step": 1196 + }, + { + "epoch": 6.84, + "grad_norm": 64.52749633789062, + "learning_rate": 4.7955555555555556e-05, + "loss": 0.5717, + "step": 1197 + }, + { + "epoch": 6.845714285714286, + "grad_norm": 28.972579956054688, + "learning_rate": 4.794920634920635e-05, + "loss": 0.7247, + "step": 1198 + }, + { + "epoch": 6.851428571428571, + "grad_norm": 32.29021453857422, + "learning_rate": 4.794285714285714e-05, + "loss": 0.6866, + "step": 1199 + }, + { + "epoch": 6.857142857142857, + "grad_norm": 67.7391128540039, + "learning_rate": 4.793650793650794e-05, + "loss": 0.6087, + "step": 1200 + }, + { + "epoch": 6.862857142857143, + "grad_norm": 42.552764892578125, + "learning_rate": 4.7930158730158734e-05, + "loss": 0.5617, + "step": 1201 + }, + { + "epoch": 6.868571428571428, + "grad_norm": 24.80632209777832, + "learning_rate": 4.7923809523809526e-05, + "loss": 0.6602, + "step": 1202 + }, + { + "epoch": 6.8742857142857146, + "grad_norm": 59.35768508911133, + "learning_rate": 4.791746031746032e-05, + "loss": 0.7463, + "step": 1203 + }, + { + "epoch": 6.88, + "grad_norm": 98.59971618652344, + "learning_rate": 4.791111111111111e-05, + "loss": 0.4685, + "step": 1204 + }, + { + "epoch": 6.885714285714286, + "grad_norm": 67.02965545654297, + "learning_rate": 4.790476190476191e-05, + "loss": 0.7761, + "step": 1205 + }, + { + "epoch": 6.8914285714285715, + "grad_norm": 35.71647262573242, + "learning_rate": 4.78984126984127e-05, + "loss": 0.5647, + "step": 1206 + }, + { + "epoch": 6.897142857142857, + "grad_norm": 31.601408004760742, + "learning_rate": 4.7892063492063496e-05, + "loss": 0.5825, + "step": 1207 + }, + { + "epoch": 6.902857142857143, + "grad_norm": 28.59677505493164, + "learning_rate": 4.788571428571429e-05, + "loss": 0.5724, + "step": 1208 + }, + { + "epoch": 6.908571428571428, + "grad_norm": 43.35946273803711, + "learning_rate": 4.787936507936508e-05, + "loss": 0.748, + "step": 1209 + }, + { + "epoch": 6.914285714285715, + "grad_norm": 92.41816711425781, + "learning_rate": 4.7873015873015874e-05, + "loss": 0.7642, + "step": 1210 + }, + { + "epoch": 6.92, + "grad_norm": 30.898258209228516, + "learning_rate": 4.7866666666666674e-05, + "loss": 0.6595, + "step": 1211 + }, + { + "epoch": 6.925714285714285, + "grad_norm": 48.89678192138672, + "learning_rate": 4.786031746031746e-05, + "loss": 0.7994, + "step": 1212 + }, + { + "epoch": 6.9314285714285715, + "grad_norm": 51.480499267578125, + "learning_rate": 4.785396825396826e-05, + "loss": 0.6359, + "step": 1213 + }, + { + "epoch": 6.937142857142857, + "grad_norm": 53.00624465942383, + "learning_rate": 4.784761904761905e-05, + "loss": 0.6924, + "step": 1214 + }, + { + "epoch": 6.942857142857143, + "grad_norm": 23.87933349609375, + "learning_rate": 4.7841269841269844e-05, + "loss": 0.8247, + "step": 1215 + }, + { + "epoch": 6.948571428571428, + "grad_norm": 41.4123649597168, + "learning_rate": 4.783492063492064e-05, + "loss": 0.7448, + "step": 1216 + }, + { + "epoch": 6.954285714285715, + "grad_norm": 21.50604820251465, + "learning_rate": 4.782857142857143e-05, + "loss": 0.5331, + "step": 1217 + }, + { + "epoch": 6.96, + "grad_norm": 32.19643783569336, + "learning_rate": 4.782222222222222e-05, + "loss": 0.6036, + "step": 1218 + }, + { + "epoch": 6.965714285714286, + "grad_norm": 212.23252868652344, + "learning_rate": 4.781587301587302e-05, + "loss": 1.1959, + "step": 1219 + }, + { + "epoch": 6.9714285714285715, + "grad_norm": 52.73761749267578, + "learning_rate": 4.780952380952381e-05, + "loss": 0.7818, + "step": 1220 + }, + { + "epoch": 6.977142857142857, + "grad_norm": 43.433170318603516, + "learning_rate": 4.780317460317461e-05, + "loss": 0.9563, + "step": 1221 + }, + { + "epoch": 6.982857142857143, + "grad_norm": 70.16417694091797, + "learning_rate": 4.77968253968254e-05, + "loss": 0.9158, + "step": 1222 + }, + { + "epoch": 6.988571428571428, + "grad_norm": 49.823883056640625, + "learning_rate": 4.779047619047619e-05, + "loss": 0.8667, + "step": 1223 + }, + { + "epoch": 6.994285714285715, + "grad_norm": 53.50132369995117, + "learning_rate": 4.7784126984126985e-05, + "loss": 0.7879, + "step": 1224 + }, + { + "epoch": 7.0, + "grad_norm": 37.431339263916016, + "learning_rate": 4.7777777777777784e-05, + "loss": 0.6104, + "step": 1225 + }, + { + "epoch": 7.0, + "eval_classes": 0, + "eval_loss": 0.7867908477783203, + "eval_map": 0.8772, + "eval_map_50": 0.9382, + "eval_map_75": 0.9197, + "eval_map_large": 0.8773, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.8772, + "eval_map_small": -1.0, + "eval_mar_1": 0.7635, + "eval_mar_10": 0.9635, + "eval_mar_100": 0.9724, + "eval_mar_100_per_class": 0.9724, + "eval_mar_large": 0.9724, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 18.3837, + "eval_samples_per_second": 15.992, + "eval_steps_per_second": 2.013, + "step": 1225 + }, + { + "epoch": 7.005714285714285, + "grad_norm": 57.21017837524414, + "learning_rate": 4.777142857142857e-05, + "loss": 0.8237, + "step": 1226 + }, + { + "epoch": 7.011428571428572, + "grad_norm": 33.716190338134766, + "learning_rate": 4.776507936507937e-05, + "loss": 0.6962, + "step": 1227 + }, + { + "epoch": 7.017142857142857, + "grad_norm": 32.71946716308594, + "learning_rate": 4.7758730158730155e-05, + "loss": 0.5775, + "step": 1228 + }, + { + "epoch": 7.022857142857143, + "grad_norm": 19.849760055541992, + "learning_rate": 4.7752380952380955e-05, + "loss": 0.7152, + "step": 1229 + }, + { + "epoch": 7.0285714285714285, + "grad_norm": 61.321693420410156, + "learning_rate": 4.774603174603175e-05, + "loss": 0.505, + "step": 1230 + }, + { + "epoch": 7.034285714285715, + "grad_norm": 26.332571029663086, + "learning_rate": 4.773968253968254e-05, + "loss": 0.7814, + "step": 1231 + }, + { + "epoch": 7.04, + "grad_norm": 30.386167526245117, + "learning_rate": 4.773333333333333e-05, + "loss": 0.7395, + "step": 1232 + }, + { + "epoch": 7.045714285714285, + "grad_norm": 56.83769989013672, + "learning_rate": 4.772698412698413e-05, + "loss": 0.683, + "step": 1233 + }, + { + "epoch": 7.051428571428572, + "grad_norm": 24.878450393676758, + "learning_rate": 4.7720634920634925e-05, + "loss": 0.9383, + "step": 1234 + }, + { + "epoch": 7.057142857142857, + "grad_norm": 23.832763671875, + "learning_rate": 4.771428571428572e-05, + "loss": 0.6939, + "step": 1235 + }, + { + "epoch": 7.062857142857143, + "grad_norm": 35.99517822265625, + "learning_rate": 4.770793650793651e-05, + "loss": 0.6861, + "step": 1236 + }, + { + "epoch": 7.0685714285714285, + "grad_norm": 40.71198272705078, + "learning_rate": 4.77015873015873e-05, + "loss": 0.5809, + "step": 1237 + }, + { + "epoch": 7.074285714285715, + "grad_norm": 36.933433532714844, + "learning_rate": 4.7695238095238095e-05, + "loss": 0.466, + "step": 1238 + }, + { + "epoch": 7.08, + "grad_norm": 52.858970642089844, + "learning_rate": 4.768888888888889e-05, + "loss": 0.5116, + "step": 1239 + }, + { + "epoch": 7.085714285714285, + "grad_norm": 61.59458923339844, + "learning_rate": 4.768253968253969e-05, + "loss": 0.8465, + "step": 1240 + }, + { + "epoch": 7.091428571428572, + "grad_norm": 50.026336669921875, + "learning_rate": 4.767619047619048e-05, + "loss": 0.585, + "step": 1241 + }, + { + "epoch": 7.097142857142857, + "grad_norm": 98.24055480957031, + "learning_rate": 4.766984126984127e-05, + "loss": 0.5558, + "step": 1242 + }, + { + "epoch": 7.102857142857143, + "grad_norm": 52.92375564575195, + "learning_rate": 4.7663492063492065e-05, + "loss": 0.6798, + "step": 1243 + }, + { + "epoch": 7.1085714285714285, + "grad_norm": 144.95716857910156, + "learning_rate": 4.7657142857142865e-05, + "loss": 0.9123, + "step": 1244 + }, + { + "epoch": 7.114285714285714, + "grad_norm": 53.37465286254883, + "learning_rate": 4.765079365079365e-05, + "loss": 0.6079, + "step": 1245 + }, + { + "epoch": 7.12, + "grad_norm": 58.13810348510742, + "learning_rate": 4.764444444444445e-05, + "loss": 0.5448, + "step": 1246 + }, + { + "epoch": 7.1257142857142854, + "grad_norm": 51.799503326416016, + "learning_rate": 4.763809523809524e-05, + "loss": 0.4733, + "step": 1247 + }, + { + "epoch": 7.131428571428572, + "grad_norm": 41.86768341064453, + "learning_rate": 4.7631746031746035e-05, + "loss": 0.5872, + "step": 1248 + }, + { + "epoch": 7.137142857142857, + "grad_norm": 75.33160400390625, + "learning_rate": 4.762539682539683e-05, + "loss": 0.5285, + "step": 1249 + }, + { + "epoch": 7.142857142857143, + "grad_norm": 37.584312438964844, + "learning_rate": 4.761904761904762e-05, + "loss": 0.5908, + "step": 1250 + }, + { + "epoch": 7.148571428571429, + "grad_norm": 27.99728012084961, + "learning_rate": 4.761269841269841e-05, + "loss": 0.7489, + "step": 1251 + }, + { + "epoch": 7.154285714285714, + "grad_norm": 215.4183349609375, + "learning_rate": 4.760634920634921e-05, + "loss": 0.7163, + "step": 1252 + }, + { + "epoch": 7.16, + "grad_norm": 41.62477493286133, + "learning_rate": 4.76e-05, + "loss": 0.9882, + "step": 1253 + }, + { + "epoch": 7.1657142857142855, + "grad_norm": 28.22425651550293, + "learning_rate": 4.75936507936508e-05, + "loss": 0.847, + "step": 1254 + }, + { + "epoch": 7.171428571428572, + "grad_norm": 53.103084564208984, + "learning_rate": 4.758730158730159e-05, + "loss": 0.5099, + "step": 1255 + }, + { + "epoch": 7.177142857142857, + "grad_norm": 49.915714263916016, + "learning_rate": 4.758095238095238e-05, + "loss": 0.7455, + "step": 1256 + }, + { + "epoch": 7.182857142857143, + "grad_norm": 77.69715118408203, + "learning_rate": 4.7574603174603176e-05, + "loss": 0.7597, + "step": 1257 + }, + { + "epoch": 7.188571428571429, + "grad_norm": 50.848731994628906, + "learning_rate": 4.7568253968253975e-05, + "loss": 0.6128, + "step": 1258 + }, + { + "epoch": 7.194285714285714, + "grad_norm": 24.732351303100586, + "learning_rate": 4.756190476190476e-05, + "loss": 0.7537, + "step": 1259 + }, + { + "epoch": 7.2, + "grad_norm": 21.70195198059082, + "learning_rate": 4.755555555555556e-05, + "loss": 0.4549, + "step": 1260 + }, + { + "epoch": 7.2057142857142855, + "grad_norm": 25.89827537536621, + "learning_rate": 4.7549206349206346e-05, + "loss": 0.4765, + "step": 1261 + }, + { + "epoch": 7.211428571428572, + "grad_norm": 33.5347900390625, + "learning_rate": 4.7542857142857146e-05, + "loss": 0.772, + "step": 1262 + }, + { + "epoch": 7.217142857142857, + "grad_norm": 65.69261169433594, + "learning_rate": 4.753650793650794e-05, + "loss": 0.7734, + "step": 1263 + }, + { + "epoch": 7.222857142857142, + "grad_norm": 30.24980926513672, + "learning_rate": 4.753015873015873e-05, + "loss": 0.7583, + "step": 1264 + }, + { + "epoch": 7.228571428571429, + "grad_norm": 98.29232788085938, + "learning_rate": 4.7523809523809523e-05, + "loss": 0.7562, + "step": 1265 + }, + { + "epoch": 7.234285714285714, + "grad_norm": 179.77772521972656, + "learning_rate": 4.751746031746032e-05, + "loss": 0.8792, + "step": 1266 + }, + { + "epoch": 7.24, + "grad_norm": 29.68090057373047, + "learning_rate": 4.751111111111111e-05, + "loss": 0.6853, + "step": 1267 + }, + { + "epoch": 7.2457142857142856, + "grad_norm": 72.9825668334961, + "learning_rate": 4.750476190476191e-05, + "loss": 0.8319, + "step": 1268 + }, + { + "epoch": 7.251428571428572, + "grad_norm": 28.135278701782227, + "learning_rate": 4.74984126984127e-05, + "loss": 0.7565, + "step": 1269 + }, + { + "epoch": 7.257142857142857, + "grad_norm": 40.16902542114258, + "learning_rate": 4.7492063492063493e-05, + "loss": 0.5983, + "step": 1270 + }, + { + "epoch": 7.2628571428571425, + "grad_norm": 32.82265090942383, + "learning_rate": 4.7485714285714286e-05, + "loss": 0.5208, + "step": 1271 + }, + { + "epoch": 7.268571428571429, + "grad_norm": 36.688411712646484, + "learning_rate": 4.747936507936508e-05, + "loss": 0.6193, + "step": 1272 + }, + { + "epoch": 7.274285714285714, + "grad_norm": 63.6164665222168, + "learning_rate": 4.747301587301588e-05, + "loss": 0.6022, + "step": 1273 + }, + { + "epoch": 7.28, + "grad_norm": 29.837209701538086, + "learning_rate": 4.746666666666667e-05, + "loss": 0.5733, + "step": 1274 + }, + { + "epoch": 7.285714285714286, + "grad_norm": 45.36772155761719, + "learning_rate": 4.746031746031746e-05, + "loss": 0.5461, + "step": 1275 + }, + { + "epoch": 7.291428571428572, + "grad_norm": 23.052791595458984, + "learning_rate": 4.7453968253968256e-05, + "loss": 0.5568, + "step": 1276 + }, + { + "epoch": 7.297142857142857, + "grad_norm": 70.93611145019531, + "learning_rate": 4.744761904761905e-05, + "loss": 0.6384, + "step": 1277 + }, + { + "epoch": 7.3028571428571425, + "grad_norm": 28.31834602355957, + "learning_rate": 4.744126984126984e-05, + "loss": 0.6331, + "step": 1278 + }, + { + "epoch": 7.308571428571429, + "grad_norm": 60.236297607421875, + "learning_rate": 4.743492063492064e-05, + "loss": 0.6148, + "step": 1279 + }, + { + "epoch": 7.314285714285714, + "grad_norm": 371.16424560546875, + "learning_rate": 4.742857142857143e-05, + "loss": 0.4808, + "step": 1280 + }, + { + "epoch": 7.32, + "grad_norm": 75.56861877441406, + "learning_rate": 4.7422222222222226e-05, + "loss": 0.5723, + "step": 1281 + }, + { + "epoch": 7.325714285714286, + "grad_norm": 92.06929016113281, + "learning_rate": 4.741587301587302e-05, + "loss": 0.714, + "step": 1282 + }, + { + "epoch": 7.331428571428571, + "grad_norm": 58.06058120727539, + "learning_rate": 4.740952380952381e-05, + "loss": 0.6472, + "step": 1283 + }, + { + "epoch": 7.337142857142857, + "grad_norm": 49.69181823730469, + "learning_rate": 4.7403174603174604e-05, + "loss": 0.6676, + "step": 1284 + }, + { + "epoch": 7.3428571428571425, + "grad_norm": 43.00157928466797, + "learning_rate": 4.73968253968254e-05, + "loss": 0.7564, + "step": 1285 + }, + { + "epoch": 7.348571428571429, + "grad_norm": 22.465194702148438, + "learning_rate": 4.739047619047619e-05, + "loss": 0.7006, + "step": 1286 + }, + { + "epoch": 7.354285714285714, + "grad_norm": 48.402339935302734, + "learning_rate": 4.738412698412699e-05, + "loss": 0.5376, + "step": 1287 + }, + { + "epoch": 7.36, + "grad_norm": 26.851852416992188, + "learning_rate": 4.737777777777778e-05, + "loss": 0.7149, + "step": 1288 + }, + { + "epoch": 7.365714285714286, + "grad_norm": 102.73119354248047, + "learning_rate": 4.7371428571428574e-05, + "loss": 0.6729, + "step": 1289 + }, + { + "epoch": 7.371428571428572, + "grad_norm": 40.6550178527832, + "learning_rate": 4.7365079365079366e-05, + "loss": 0.6184, + "step": 1290 + }, + { + "epoch": 7.377142857142857, + "grad_norm": 42.261741638183594, + "learning_rate": 4.7358730158730166e-05, + "loss": 0.5753, + "step": 1291 + }, + { + "epoch": 7.382857142857143, + "grad_norm": 50.98457336425781, + "learning_rate": 4.735238095238095e-05, + "loss": 0.5469, + "step": 1292 + }, + { + "epoch": 7.388571428571429, + "grad_norm": 45.39360427856445, + "learning_rate": 4.734603174603175e-05, + "loss": 0.4814, + "step": 1293 + }, + { + "epoch": 7.394285714285714, + "grad_norm": 51.51216125488281, + "learning_rate": 4.733968253968254e-05, + "loss": 0.7583, + "step": 1294 + }, + { + "epoch": 7.4, + "grad_norm": 22.863304138183594, + "learning_rate": 4.7333333333333336e-05, + "loss": 0.6165, + "step": 1295 + }, + { + "epoch": 7.405714285714286, + "grad_norm": 17.304244995117188, + "learning_rate": 4.732698412698413e-05, + "loss": 0.6645, + "step": 1296 + }, + { + "epoch": 7.411428571428571, + "grad_norm": 67.12471771240234, + "learning_rate": 4.732063492063492e-05, + "loss": 0.7557, + "step": 1297 + }, + { + "epoch": 7.417142857142857, + "grad_norm": 35.99345397949219, + "learning_rate": 4.7314285714285714e-05, + "loss": 0.6616, + "step": 1298 + }, + { + "epoch": 7.422857142857143, + "grad_norm": 34.61998748779297, + "learning_rate": 4.7307936507936514e-05, + "loss": 0.5287, + "step": 1299 + }, + { + "epoch": 7.428571428571429, + "grad_norm": 31.36380386352539, + "learning_rate": 4.73015873015873e-05, + "loss": 0.5124, + "step": 1300 + }, + { + "epoch": 7.434285714285714, + "grad_norm": 52.2266845703125, + "learning_rate": 4.72952380952381e-05, + "loss": 0.7854, + "step": 1301 + }, + { + "epoch": 7.44, + "grad_norm": 30.61333465576172, + "learning_rate": 4.728888888888889e-05, + "loss": 0.6238, + "step": 1302 + }, + { + "epoch": 7.445714285714286, + "grad_norm": 48.91444778442383, + "learning_rate": 4.7282539682539684e-05, + "loss": 0.567, + "step": 1303 + }, + { + "epoch": 7.451428571428571, + "grad_norm": 18.876323699951172, + "learning_rate": 4.727619047619048e-05, + "loss": 0.7001, + "step": 1304 + }, + { + "epoch": 7.457142857142857, + "grad_norm": 75.31610107421875, + "learning_rate": 4.726984126984127e-05, + "loss": 0.6682, + "step": 1305 + }, + { + "epoch": 7.462857142857143, + "grad_norm": 24.598119735717773, + "learning_rate": 4.726349206349206e-05, + "loss": 0.7838, + "step": 1306 + }, + { + "epoch": 7.468571428571429, + "grad_norm": 56.07027816772461, + "learning_rate": 4.725714285714286e-05, + "loss": 0.6504, + "step": 1307 + }, + { + "epoch": 7.474285714285714, + "grad_norm": 41.39461135864258, + "learning_rate": 4.7250793650793654e-05, + "loss": 0.6751, + "step": 1308 + }, + { + "epoch": 7.48, + "grad_norm": 18.566652297973633, + "learning_rate": 4.724444444444445e-05, + "loss": 0.4242, + "step": 1309 + }, + { + "epoch": 7.485714285714286, + "grad_norm": 25.184492111206055, + "learning_rate": 4.723809523809524e-05, + "loss": 0.7987, + "step": 1310 + }, + { + "epoch": 7.491428571428571, + "grad_norm": 42.953147888183594, + "learning_rate": 4.723174603174603e-05, + "loss": 0.8759, + "step": 1311 + }, + { + "epoch": 7.497142857142857, + "grad_norm": 45.01292037963867, + "learning_rate": 4.722539682539683e-05, + "loss": 0.5476, + "step": 1312 + }, + { + "epoch": 7.502857142857143, + "grad_norm": 77.92054748535156, + "learning_rate": 4.7219047619047624e-05, + "loss": 0.6594, + "step": 1313 + }, + { + "epoch": 7.508571428571429, + "grad_norm": 72.87895202636719, + "learning_rate": 4.721269841269842e-05, + "loss": 0.8035, + "step": 1314 + }, + { + "epoch": 7.514285714285714, + "grad_norm": 74.46797943115234, + "learning_rate": 4.720634920634921e-05, + "loss": 1.1645, + "step": 1315 + }, + { + "epoch": 7.52, + "grad_norm": 26.602317810058594, + "learning_rate": 4.72e-05, + "loss": 0.6856, + "step": 1316 + }, + { + "epoch": 7.525714285714286, + "grad_norm": 25.693401336669922, + "learning_rate": 4.7193650793650795e-05, + "loss": 0.6172, + "step": 1317 + }, + { + "epoch": 7.531428571428571, + "grad_norm": 31.481958389282227, + "learning_rate": 4.7187301587301594e-05, + "loss": 0.7717, + "step": 1318 + }, + { + "epoch": 7.537142857142857, + "grad_norm": 83.2188491821289, + "learning_rate": 4.718095238095238e-05, + "loss": 0.7369, + "step": 1319 + }, + { + "epoch": 7.542857142857143, + "grad_norm": 366.4823303222656, + "learning_rate": 4.717460317460318e-05, + "loss": 0.7258, + "step": 1320 + }, + { + "epoch": 7.548571428571429, + "grad_norm": 86.60832977294922, + "learning_rate": 4.716825396825397e-05, + "loss": 0.5646, + "step": 1321 + }, + { + "epoch": 7.554285714285714, + "grad_norm": 51.94847106933594, + "learning_rate": 4.7161904761904765e-05, + "loss": 0.5762, + "step": 1322 + }, + { + "epoch": 7.5600000000000005, + "grad_norm": 50.4522590637207, + "learning_rate": 4.715555555555556e-05, + "loss": 0.5024, + "step": 1323 + }, + { + "epoch": 7.565714285714286, + "grad_norm": 30.071168899536133, + "learning_rate": 4.714920634920636e-05, + "loss": 0.6713, + "step": 1324 + }, + { + "epoch": 7.571428571428571, + "grad_norm": 33.680992126464844, + "learning_rate": 4.714285714285714e-05, + "loss": 0.7059, + "step": 1325 + }, + { + "epoch": 7.577142857142857, + "grad_norm": 34.198062896728516, + "learning_rate": 4.713650793650794e-05, + "loss": 0.7452, + "step": 1326 + }, + { + "epoch": 7.582857142857143, + "grad_norm": 60.24821090698242, + "learning_rate": 4.713015873015873e-05, + "loss": 0.5497, + "step": 1327 + }, + { + "epoch": 7.588571428571429, + "grad_norm": 35.51438903808594, + "learning_rate": 4.712380952380953e-05, + "loss": 0.6105, + "step": 1328 + }, + { + "epoch": 7.594285714285714, + "grad_norm": 73.2537612915039, + "learning_rate": 4.711746031746032e-05, + "loss": 0.5574, + "step": 1329 + }, + { + "epoch": 7.6, + "grad_norm": 30.033363342285156, + "learning_rate": 4.711111111111111e-05, + "loss": 0.5753, + "step": 1330 + }, + { + "epoch": 7.605714285714286, + "grad_norm": 73.85240936279297, + "learning_rate": 4.7104761904761905e-05, + "loss": 0.7066, + "step": 1331 + }, + { + "epoch": 7.611428571428571, + "grad_norm": 81.2567367553711, + "learning_rate": 4.7098412698412705e-05, + "loss": 0.817, + "step": 1332 + }, + { + "epoch": 7.617142857142857, + "grad_norm": 31.7690486907959, + "learning_rate": 4.709206349206349e-05, + "loss": 0.4587, + "step": 1333 + }, + { + "epoch": 7.622857142857143, + "grad_norm": 23.420303344726562, + "learning_rate": 4.708571428571429e-05, + "loss": 0.6593, + "step": 1334 + }, + { + "epoch": 7.628571428571428, + "grad_norm": 38.06245803833008, + "learning_rate": 4.707936507936508e-05, + "loss": 0.7267, + "step": 1335 + }, + { + "epoch": 7.634285714285714, + "grad_norm": 58.60105895996094, + "learning_rate": 4.7073015873015875e-05, + "loss": 0.5329, + "step": 1336 + }, + { + "epoch": 7.64, + "grad_norm": 46.24578094482422, + "learning_rate": 4.706666666666667e-05, + "loss": 0.885, + "step": 1337 + }, + { + "epoch": 7.645714285714286, + "grad_norm": 45.6176643371582, + "learning_rate": 4.706031746031746e-05, + "loss": 0.5294, + "step": 1338 + }, + { + "epoch": 7.651428571428571, + "grad_norm": 78.04761505126953, + "learning_rate": 4.705396825396825e-05, + "loss": 0.6382, + "step": 1339 + }, + { + "epoch": 7.6571428571428575, + "grad_norm": 321.0185241699219, + "learning_rate": 4.704761904761905e-05, + "loss": 0.7118, + "step": 1340 + }, + { + "epoch": 7.662857142857143, + "grad_norm": 45.66987991333008, + "learning_rate": 4.704126984126984e-05, + "loss": 0.9693, + "step": 1341 + }, + { + "epoch": 7.668571428571429, + "grad_norm": 47.87458038330078, + "learning_rate": 4.703492063492064e-05, + "loss": 0.6789, + "step": 1342 + }, + { + "epoch": 7.674285714285714, + "grad_norm": 32.397735595703125, + "learning_rate": 4.702857142857143e-05, + "loss": 0.543, + "step": 1343 + }, + { + "epoch": 7.68, + "grad_norm": 54.12117385864258, + "learning_rate": 4.702222222222222e-05, + "loss": 0.5905, + "step": 1344 + }, + { + "epoch": 7.685714285714286, + "grad_norm": 60.069007873535156, + "learning_rate": 4.7015873015873016e-05, + "loss": 0.8836, + "step": 1345 + }, + { + "epoch": 7.691428571428571, + "grad_norm": 67.7389144897461, + "learning_rate": 4.7009523809523815e-05, + "loss": 0.8242, + "step": 1346 + }, + { + "epoch": 7.6971428571428575, + "grad_norm": 24.725425720214844, + "learning_rate": 4.700317460317461e-05, + "loss": 0.6177, + "step": 1347 + }, + { + "epoch": 7.702857142857143, + "grad_norm": 34.804195404052734, + "learning_rate": 4.69968253968254e-05, + "loss": 0.4738, + "step": 1348 + }, + { + "epoch": 7.708571428571428, + "grad_norm": 125.37673950195312, + "learning_rate": 4.699047619047619e-05, + "loss": 0.6758, + "step": 1349 + }, + { + "epoch": 7.714285714285714, + "grad_norm": 50.93052673339844, + "learning_rate": 4.6984126984126986e-05, + "loss": 0.5257, + "step": 1350 + }, + { + "epoch": 7.72, + "grad_norm": 55.35873031616211, + "learning_rate": 4.6977777777777785e-05, + "loss": 0.6723, + "step": 1351 + }, + { + "epoch": 7.725714285714286, + "grad_norm": 27.543476104736328, + "learning_rate": 4.697142857142857e-05, + "loss": 0.6399, + "step": 1352 + }, + { + "epoch": 7.731428571428571, + "grad_norm": 114.15330505371094, + "learning_rate": 4.696507936507937e-05, + "loss": 0.6198, + "step": 1353 + }, + { + "epoch": 7.737142857142857, + "grad_norm": 51.38933181762695, + "learning_rate": 4.695873015873016e-05, + "loss": 0.5771, + "step": 1354 + }, + { + "epoch": 7.742857142857143, + "grad_norm": 40.537330627441406, + "learning_rate": 4.6952380952380956e-05, + "loss": 0.5921, + "step": 1355 + }, + { + "epoch": 7.748571428571428, + "grad_norm": 63.388702392578125, + "learning_rate": 4.694603174603175e-05, + "loss": 0.4777, + "step": 1356 + }, + { + "epoch": 7.7542857142857144, + "grad_norm": 39.542999267578125, + "learning_rate": 4.693968253968255e-05, + "loss": 0.6808, + "step": 1357 + }, + { + "epoch": 7.76, + "grad_norm": 35.982200622558594, + "learning_rate": 4.6933333333333333e-05, + "loss": 0.7238, + "step": 1358 + }, + { + "epoch": 7.765714285714286, + "grad_norm": 49.393272399902344, + "learning_rate": 4.692698412698413e-05, + "loss": 0.598, + "step": 1359 + }, + { + "epoch": 7.771428571428571, + "grad_norm": 34.336341857910156, + "learning_rate": 4.692063492063492e-05, + "loss": 0.9183, + "step": 1360 + }, + { + "epoch": 7.777142857142858, + "grad_norm": 35.617088317871094, + "learning_rate": 4.691428571428572e-05, + "loss": 0.7478, + "step": 1361 + }, + { + "epoch": 7.782857142857143, + "grad_norm": 61.48871612548828, + "learning_rate": 4.690793650793651e-05, + "loss": 0.636, + "step": 1362 + }, + { + "epoch": 7.788571428571428, + "grad_norm": 27.061933517456055, + "learning_rate": 4.6901587301587303e-05, + "loss": 0.3823, + "step": 1363 + }, + { + "epoch": 7.7942857142857145, + "grad_norm": 27.507038116455078, + "learning_rate": 4.6895238095238096e-05, + "loss": 0.7432, + "step": 1364 + }, + { + "epoch": 7.8, + "grad_norm": 24.477672576904297, + "learning_rate": 4.6888888888888895e-05, + "loss": 0.5468, + "step": 1365 + }, + { + "epoch": 7.805714285714286, + "grad_norm": 40.26399230957031, + "learning_rate": 4.688253968253968e-05, + "loss": 0.733, + "step": 1366 + }, + { + "epoch": 7.811428571428571, + "grad_norm": 103.63063049316406, + "learning_rate": 4.687619047619048e-05, + "loss": 0.517, + "step": 1367 + }, + { + "epoch": 7.817142857142857, + "grad_norm": 28.04522132873535, + "learning_rate": 4.686984126984127e-05, + "loss": 0.5636, + "step": 1368 + }, + { + "epoch": 7.822857142857143, + "grad_norm": 41.827449798583984, + "learning_rate": 4.6863492063492066e-05, + "loss": 0.4993, + "step": 1369 + }, + { + "epoch": 7.828571428571428, + "grad_norm": 32.13361740112305, + "learning_rate": 4.685714285714286e-05, + "loss": 0.5968, + "step": 1370 + }, + { + "epoch": 7.8342857142857145, + "grad_norm": 60.024898529052734, + "learning_rate": 4.685079365079365e-05, + "loss": 0.874, + "step": 1371 + }, + { + "epoch": 7.84, + "grad_norm": 42.77864456176758, + "learning_rate": 4.6844444444444444e-05, + "loss": 0.4321, + "step": 1372 + }, + { + "epoch": 7.845714285714286, + "grad_norm": 73.90796661376953, + "learning_rate": 4.683809523809524e-05, + "loss": 0.8918, + "step": 1373 + }, + { + "epoch": 7.851428571428571, + "grad_norm": 82.85469055175781, + "learning_rate": 4.683174603174603e-05, + "loss": 0.3985, + "step": 1374 + }, + { + "epoch": 7.857142857142857, + "grad_norm": 41.67444610595703, + "learning_rate": 4.682539682539683e-05, + "loss": 0.4506, + "step": 1375 + }, + { + "epoch": 7.862857142857143, + "grad_norm": 74.92281341552734, + "learning_rate": 4.681904761904762e-05, + "loss": 0.5536, + "step": 1376 + }, + { + "epoch": 7.868571428571428, + "grad_norm": 55.40838623046875, + "learning_rate": 4.6812698412698414e-05, + "loss": 0.6726, + "step": 1377 + }, + { + "epoch": 7.8742857142857146, + "grad_norm": 25.91588592529297, + "learning_rate": 4.6806349206349207e-05, + "loss": 0.5684, + "step": 1378 + }, + { + "epoch": 7.88, + "grad_norm": 60.40071105957031, + "learning_rate": 4.6800000000000006e-05, + "loss": 0.947, + "step": 1379 + }, + { + "epoch": 7.885714285714286, + "grad_norm": 50.07804489135742, + "learning_rate": 4.679365079365079e-05, + "loss": 0.5797, + "step": 1380 + }, + { + "epoch": 7.8914285714285715, + "grad_norm": 29.26346778869629, + "learning_rate": 4.678730158730159e-05, + "loss": 0.5418, + "step": 1381 + }, + { + "epoch": 7.897142857142857, + "grad_norm": 122.21076965332031, + "learning_rate": 4.6780952380952384e-05, + "loss": 0.5719, + "step": 1382 + }, + { + "epoch": 7.902857142857143, + "grad_norm": 175.2599639892578, + "learning_rate": 4.6774603174603176e-05, + "loss": 0.8481, + "step": 1383 + }, + { + "epoch": 7.908571428571428, + "grad_norm": 115.75862884521484, + "learning_rate": 4.676825396825397e-05, + "loss": 0.5336, + "step": 1384 + }, + { + "epoch": 7.914285714285715, + "grad_norm": 32.81181335449219, + "learning_rate": 4.676190476190476e-05, + "loss": 0.4968, + "step": 1385 + }, + { + "epoch": 7.92, + "grad_norm": 52.347774505615234, + "learning_rate": 4.675555555555556e-05, + "loss": 0.4297, + "step": 1386 + }, + { + "epoch": 7.925714285714285, + "grad_norm": 63.84890365600586, + "learning_rate": 4.6749206349206354e-05, + "loss": 0.4286, + "step": 1387 + }, + { + "epoch": 7.9314285714285715, + "grad_norm": 59.2192497253418, + "learning_rate": 4.6742857142857146e-05, + "loss": 0.5731, + "step": 1388 + }, + { + "epoch": 7.937142857142857, + "grad_norm": 75.16362762451172, + "learning_rate": 4.673650793650794e-05, + "loss": 0.6157, + "step": 1389 + }, + { + "epoch": 7.942857142857143, + "grad_norm": 19.244441986083984, + "learning_rate": 4.673015873015874e-05, + "loss": 0.889, + "step": 1390 + }, + { + "epoch": 7.948571428571428, + "grad_norm": 86.41683959960938, + "learning_rate": 4.6723809523809524e-05, + "loss": 0.7396, + "step": 1391 + }, + { + "epoch": 7.954285714285715, + "grad_norm": 36.017669677734375, + "learning_rate": 4.6717460317460324e-05, + "loss": 0.6135, + "step": 1392 + }, + { + "epoch": 7.96, + "grad_norm": 50.021759033203125, + "learning_rate": 4.671111111111111e-05, + "loss": 0.6035, + "step": 1393 + }, + { + "epoch": 7.965714285714286, + "grad_norm": 33.08448028564453, + "learning_rate": 4.670476190476191e-05, + "loss": 0.7405, + "step": 1394 + }, + { + "epoch": 7.9714285714285715, + "grad_norm": 113.28244018554688, + "learning_rate": 4.66984126984127e-05, + "loss": 0.7022, + "step": 1395 + }, + { + "epoch": 7.977142857142857, + "grad_norm": 44.64534378051758, + "learning_rate": 4.6692063492063494e-05, + "loss": 0.5826, + "step": 1396 + }, + { + "epoch": 7.982857142857143, + "grad_norm": 232.41506958007812, + "learning_rate": 4.668571428571429e-05, + "loss": 0.8322, + "step": 1397 + }, + { + "epoch": 7.988571428571428, + "grad_norm": 30.54139518737793, + "learning_rate": 4.6679365079365086e-05, + "loss": 0.6798, + "step": 1398 + }, + { + "epoch": 7.994285714285715, + "grad_norm": 41.59028244018555, + "learning_rate": 4.667301587301587e-05, + "loss": 0.8065, + "step": 1399 + }, + { + "epoch": 8.0, + "grad_norm": 36.8554801940918, + "learning_rate": 4.666666666666667e-05, + "loss": 0.6687, + "step": 1400 + }, + { + "epoch": 8.0, + "eval_classes": 0, + "eval_loss": 0.6754283308982849, + "eval_map": 0.9018, + "eval_map_50": 0.9583, + "eval_map_75": 0.9421, + "eval_map_large": 0.9019, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9018, + "eval_map_small": -1.0, + "eval_mar_1": 0.7838, + "eval_mar_10": 0.9625, + "eval_mar_100": 0.9727, + "eval_mar_100_per_class": 0.9727, + "eval_mar_large": 0.9727, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 17.0817, + "eval_samples_per_second": 17.211, + "eval_steps_per_second": 2.166, + "step": 1400 + }, + { + "epoch": 8.005714285714285, + "grad_norm": 35.78780746459961, + "learning_rate": 4.6660317460317464e-05, + "loss": 0.4911, + "step": 1401 + }, + { + "epoch": 8.01142857142857, + "grad_norm": 22.01752281188965, + "learning_rate": 4.665396825396826e-05, + "loss": 0.6943, + "step": 1402 + }, + { + "epoch": 8.017142857142858, + "grad_norm": 80.87184143066406, + "learning_rate": 4.664761904761905e-05, + "loss": 0.4523, + "step": 1403 + }, + { + "epoch": 8.022857142857143, + "grad_norm": 53.428428649902344, + "learning_rate": 4.664126984126984e-05, + "loss": 0.536, + "step": 1404 + }, + { + "epoch": 8.028571428571428, + "grad_norm": 83.90179443359375, + "learning_rate": 4.6634920634920635e-05, + "loss": 0.5229, + "step": 1405 + }, + { + "epoch": 8.034285714285714, + "grad_norm": 31.12773895263672, + "learning_rate": 4.6628571428571434e-05, + "loss": 0.4013, + "step": 1406 + }, + { + "epoch": 8.04, + "grad_norm": 29.049760818481445, + "learning_rate": 4.662222222222222e-05, + "loss": 0.6954, + "step": 1407 + }, + { + "epoch": 8.045714285714286, + "grad_norm": 35.7201042175293, + "learning_rate": 4.661587301587302e-05, + "loss": 0.5723, + "step": 1408 + }, + { + "epoch": 8.051428571428572, + "grad_norm": 34.02865219116211, + "learning_rate": 4.660952380952381e-05, + "loss": 0.6863, + "step": 1409 + }, + { + "epoch": 8.057142857142857, + "grad_norm": 286.44732666015625, + "learning_rate": 4.6603174603174605e-05, + "loss": 0.5025, + "step": 1410 + }, + { + "epoch": 8.062857142857142, + "grad_norm": 55.78438949584961, + "learning_rate": 4.65968253968254e-05, + "loss": 0.6305, + "step": 1411 + }, + { + "epoch": 8.06857142857143, + "grad_norm": 51.090538024902344, + "learning_rate": 4.659047619047619e-05, + "loss": 0.6065, + "step": 1412 + }, + { + "epoch": 8.074285714285715, + "grad_norm": 34.32136917114258, + "learning_rate": 4.658412698412698e-05, + "loss": 0.6657, + "step": 1413 + }, + { + "epoch": 8.08, + "grad_norm": 75.9567642211914, + "learning_rate": 4.657777777777778e-05, + "loss": 0.481, + "step": 1414 + }, + { + "epoch": 8.085714285714285, + "grad_norm": 99.80516052246094, + "learning_rate": 4.6571428571428575e-05, + "loss": 0.6294, + "step": 1415 + }, + { + "epoch": 8.09142857142857, + "grad_norm": 44.61457824707031, + "learning_rate": 4.656507936507937e-05, + "loss": 0.4226, + "step": 1416 + }, + { + "epoch": 8.097142857142858, + "grad_norm": 77.42556762695312, + "learning_rate": 4.655873015873016e-05, + "loss": 0.5829, + "step": 1417 + }, + { + "epoch": 8.102857142857143, + "grad_norm": 113.65733337402344, + "learning_rate": 4.655238095238095e-05, + "loss": 0.7022, + "step": 1418 + }, + { + "epoch": 8.108571428571429, + "grad_norm": 67.45095825195312, + "learning_rate": 4.6546031746031745e-05, + "loss": 0.6034, + "step": 1419 + }, + { + "epoch": 8.114285714285714, + "grad_norm": 39.48252868652344, + "learning_rate": 4.6539682539682545e-05, + "loss": 0.6611, + "step": 1420 + }, + { + "epoch": 8.12, + "grad_norm": 36.0588264465332, + "learning_rate": 4.653333333333334e-05, + "loss": 0.6547, + "step": 1421 + }, + { + "epoch": 8.125714285714286, + "grad_norm": 56.38534164428711, + "learning_rate": 4.652698412698413e-05, + "loss": 0.643, + "step": 1422 + }, + { + "epoch": 8.131428571428572, + "grad_norm": 27.028913497924805, + "learning_rate": 4.652063492063492e-05, + "loss": 0.4471, + "step": 1423 + }, + { + "epoch": 8.137142857142857, + "grad_norm": 24.878549575805664, + "learning_rate": 4.6514285714285715e-05, + "loss": 0.5971, + "step": 1424 + }, + { + "epoch": 8.142857142857142, + "grad_norm": 113.82731628417969, + "learning_rate": 4.6507936507936515e-05, + "loss": 0.7132, + "step": 1425 + }, + { + "epoch": 8.14857142857143, + "grad_norm": 66.78875732421875, + "learning_rate": 4.65015873015873e-05, + "loss": 0.4648, + "step": 1426 + }, + { + "epoch": 8.154285714285715, + "grad_norm": 33.675479888916016, + "learning_rate": 4.64952380952381e-05, + "loss": 0.5242, + "step": 1427 + }, + { + "epoch": 8.16, + "grad_norm": 50.45384216308594, + "learning_rate": 4.648888888888889e-05, + "loss": 0.6918, + "step": 1428 + }, + { + "epoch": 8.165714285714285, + "grad_norm": 14.417842864990234, + "learning_rate": 4.6482539682539685e-05, + "loss": 0.6082, + "step": 1429 + }, + { + "epoch": 8.17142857142857, + "grad_norm": 47.269161224365234, + "learning_rate": 4.647619047619048e-05, + "loss": 0.6355, + "step": 1430 + }, + { + "epoch": 8.177142857142858, + "grad_norm": 31.608606338500977, + "learning_rate": 4.646984126984128e-05, + "loss": 0.5568, + "step": 1431 + }, + { + "epoch": 8.182857142857143, + "grad_norm": 37.61075210571289, + "learning_rate": 4.646349206349206e-05, + "loss": 0.4326, + "step": 1432 + }, + { + "epoch": 8.188571428571429, + "grad_norm": 113.44019317626953, + "learning_rate": 4.645714285714286e-05, + "loss": 0.4667, + "step": 1433 + }, + { + "epoch": 8.194285714285714, + "grad_norm": 49.1456184387207, + "learning_rate": 4.645079365079365e-05, + "loss": 0.7065, + "step": 1434 + }, + { + "epoch": 8.2, + "grad_norm": 40.68552780151367, + "learning_rate": 4.644444444444445e-05, + "loss": 0.5833, + "step": 1435 + }, + { + "epoch": 8.205714285714286, + "grad_norm": 54.234046936035156, + "learning_rate": 4.643809523809524e-05, + "loss": 0.6348, + "step": 1436 + }, + { + "epoch": 8.211428571428572, + "grad_norm": 84.55835723876953, + "learning_rate": 4.643174603174603e-05, + "loss": 0.5484, + "step": 1437 + }, + { + "epoch": 8.217142857142857, + "grad_norm": 95.15739440917969, + "learning_rate": 4.6425396825396826e-05, + "loss": 0.6561, + "step": 1438 + }, + { + "epoch": 8.222857142857142, + "grad_norm": 98.21651458740234, + "learning_rate": 4.6419047619047625e-05, + "loss": 0.8239, + "step": 1439 + }, + { + "epoch": 8.228571428571428, + "grad_norm": 36.658851623535156, + "learning_rate": 4.641269841269841e-05, + "loss": 0.7417, + "step": 1440 + }, + { + "epoch": 8.234285714285715, + "grad_norm": 85.5366439819336, + "learning_rate": 4.640634920634921e-05, + "loss": 0.4967, + "step": 1441 + }, + { + "epoch": 8.24, + "grad_norm": 85.452880859375, + "learning_rate": 4.64e-05, + "loss": 0.6752, + "step": 1442 + }, + { + "epoch": 8.245714285714286, + "grad_norm": 39.44017791748047, + "learning_rate": 4.6393650793650796e-05, + "loss": 0.5459, + "step": 1443 + }, + { + "epoch": 8.251428571428571, + "grad_norm": 27.0288143157959, + "learning_rate": 4.638730158730159e-05, + "loss": 0.6288, + "step": 1444 + }, + { + "epoch": 8.257142857142856, + "grad_norm": 51.48039245605469, + "learning_rate": 4.638095238095238e-05, + "loss": 0.6322, + "step": 1445 + }, + { + "epoch": 8.262857142857143, + "grad_norm": 35.94298553466797, + "learning_rate": 4.6374603174603174e-05, + "loss": 0.5504, + "step": 1446 + }, + { + "epoch": 8.268571428571429, + "grad_norm": 76.44605255126953, + "learning_rate": 4.636825396825397e-05, + "loss": 0.6296, + "step": 1447 + }, + { + "epoch": 8.274285714285714, + "grad_norm": 95.23500061035156, + "learning_rate": 4.636190476190476e-05, + "loss": 0.7675, + "step": 1448 + }, + { + "epoch": 8.28, + "grad_norm": 63.241539001464844, + "learning_rate": 4.635555555555556e-05, + "loss": 0.6487, + "step": 1449 + }, + { + "epoch": 8.285714285714286, + "grad_norm": 158.42202758789062, + "learning_rate": 4.634920634920635e-05, + "loss": 0.4883, + "step": 1450 + }, + { + "epoch": 8.291428571428572, + "grad_norm": 40.19489288330078, + "learning_rate": 4.6342857142857143e-05, + "loss": 0.7871, + "step": 1451 + }, + { + "epoch": 8.297142857142857, + "grad_norm": 45.30269241333008, + "learning_rate": 4.6336507936507936e-05, + "loss": 0.686, + "step": 1452 + }, + { + "epoch": 8.302857142857142, + "grad_norm": 83.68958282470703, + "learning_rate": 4.6330158730158736e-05, + "loss": 0.7743, + "step": 1453 + }, + { + "epoch": 8.308571428571428, + "grad_norm": 91.59910583496094, + "learning_rate": 4.632380952380953e-05, + "loss": 0.5252, + "step": 1454 + }, + { + "epoch": 8.314285714285715, + "grad_norm": 44.00296401977539, + "learning_rate": 4.631746031746032e-05, + "loss": 0.6858, + "step": 1455 + }, + { + "epoch": 8.32, + "grad_norm": 38.646297454833984, + "learning_rate": 4.6311111111111113e-05, + "loss": 0.6036, + "step": 1456 + }, + { + "epoch": 8.325714285714286, + "grad_norm": 31.151046752929688, + "learning_rate": 4.6304761904761906e-05, + "loss": 0.5736, + "step": 1457 + }, + { + "epoch": 8.331428571428571, + "grad_norm": 36.38899230957031, + "learning_rate": 4.62984126984127e-05, + "loss": 0.5169, + "step": 1458 + }, + { + "epoch": 8.337142857142856, + "grad_norm": 21.914627075195312, + "learning_rate": 4.629206349206349e-05, + "loss": 0.6808, + "step": 1459 + }, + { + "epoch": 8.342857142857143, + "grad_norm": 18.928544998168945, + "learning_rate": 4.628571428571429e-05, + "loss": 0.9638, + "step": 1460 + }, + { + "epoch": 8.348571428571429, + "grad_norm": 41.01659393310547, + "learning_rate": 4.6279365079365083e-05, + "loss": 0.4674, + "step": 1461 + }, + { + "epoch": 8.354285714285714, + "grad_norm": 24.344030380249023, + "learning_rate": 4.6273015873015876e-05, + "loss": 0.4353, + "step": 1462 + }, + { + "epoch": 8.36, + "grad_norm": 24.396142959594727, + "learning_rate": 4.626666666666667e-05, + "loss": 0.8538, + "step": 1463 + }, + { + "epoch": 8.365714285714287, + "grad_norm": 40.090240478515625, + "learning_rate": 4.626031746031747e-05, + "loss": 0.8105, + "step": 1464 + }, + { + "epoch": 8.371428571428572, + "grad_norm": 167.61058044433594, + "learning_rate": 4.6253968253968254e-05, + "loss": 0.5838, + "step": 1465 + }, + { + "epoch": 8.377142857142857, + "grad_norm": 46.45691680908203, + "learning_rate": 4.624761904761905e-05, + "loss": 0.5343, + "step": 1466 + }, + { + "epoch": 8.382857142857143, + "grad_norm": 90.0116958618164, + "learning_rate": 4.624126984126984e-05, + "loss": 0.5916, + "step": 1467 + }, + { + "epoch": 8.388571428571428, + "grad_norm": 17.666582107543945, + "learning_rate": 4.623492063492064e-05, + "loss": 0.5452, + "step": 1468 + }, + { + "epoch": 8.394285714285715, + "grad_norm": 51.96952438354492, + "learning_rate": 4.622857142857143e-05, + "loss": 0.6797, + "step": 1469 + }, + { + "epoch": 8.4, + "grad_norm": 41.560142517089844, + "learning_rate": 4.6222222222222224e-05, + "loss": 0.8717, + "step": 1470 + }, + { + "epoch": 8.405714285714286, + "grad_norm": 34.73108673095703, + "learning_rate": 4.6215873015873017e-05, + "loss": 0.541, + "step": 1471 + }, + { + "epoch": 8.411428571428571, + "grad_norm": 47.17734146118164, + "learning_rate": 4.6209523809523816e-05, + "loss": 0.6391, + "step": 1472 + }, + { + "epoch": 8.417142857142856, + "grad_norm": 44.5860481262207, + "learning_rate": 4.62031746031746e-05, + "loss": 0.7368, + "step": 1473 + }, + { + "epoch": 8.422857142857143, + "grad_norm": 38.533416748046875, + "learning_rate": 4.61968253968254e-05, + "loss": 0.6083, + "step": 1474 + }, + { + "epoch": 8.428571428571429, + "grad_norm": 81.4775161743164, + "learning_rate": 4.6190476190476194e-05, + "loss": 0.7288, + "step": 1475 + }, + { + "epoch": 8.434285714285714, + "grad_norm": 114.19631958007812, + "learning_rate": 4.6184126984126986e-05, + "loss": 0.6884, + "step": 1476 + }, + { + "epoch": 8.44, + "grad_norm": 66.40652465820312, + "learning_rate": 4.617777777777778e-05, + "loss": 0.6874, + "step": 1477 + }, + { + "epoch": 8.445714285714285, + "grad_norm": 101.87186431884766, + "learning_rate": 4.617142857142857e-05, + "loss": 0.7092, + "step": 1478 + }, + { + "epoch": 8.451428571428572, + "grad_norm": 197.94064331054688, + "learning_rate": 4.6165079365079364e-05, + "loss": 0.6513, + "step": 1479 + }, + { + "epoch": 8.457142857142857, + "grad_norm": 31.79027557373047, + "learning_rate": 4.6158730158730164e-05, + "loss": 0.6526, + "step": 1480 + }, + { + "epoch": 8.462857142857143, + "grad_norm": 31.410551071166992, + "learning_rate": 4.615238095238095e-05, + "loss": 0.6396, + "step": 1481 + }, + { + "epoch": 8.468571428571428, + "grad_norm": 29.926599502563477, + "learning_rate": 4.614603174603175e-05, + "loss": 0.5031, + "step": 1482 + }, + { + "epoch": 8.474285714285715, + "grad_norm": 24.395957946777344, + "learning_rate": 4.613968253968254e-05, + "loss": 0.7398, + "step": 1483 + }, + { + "epoch": 8.48, + "grad_norm": 53.47405242919922, + "learning_rate": 4.6133333333333334e-05, + "loss": 0.5225, + "step": 1484 + }, + { + "epoch": 8.485714285714286, + "grad_norm": 50.648536682128906, + "learning_rate": 4.612698412698413e-05, + "loss": 0.7729, + "step": 1485 + }, + { + "epoch": 8.491428571428571, + "grad_norm": 84.7298355102539, + "learning_rate": 4.6120634920634926e-05, + "loss": 0.7538, + "step": 1486 + }, + { + "epoch": 8.497142857142856, + "grad_norm": 73.40445709228516, + "learning_rate": 4.611428571428571e-05, + "loss": 0.7775, + "step": 1487 + }, + { + "epoch": 8.502857142857144, + "grad_norm": 75.76046752929688, + "learning_rate": 4.610793650793651e-05, + "loss": 0.6632, + "step": 1488 + }, + { + "epoch": 8.508571428571429, + "grad_norm": 59.162559509277344, + "learning_rate": 4.6101587301587304e-05, + "loss": 0.8116, + "step": 1489 + }, + { + "epoch": 8.514285714285714, + "grad_norm": 74.70832824707031, + "learning_rate": 4.60952380952381e-05, + "loss": 0.8448, + "step": 1490 + }, + { + "epoch": 8.52, + "grad_norm": 31.210222244262695, + "learning_rate": 4.608888888888889e-05, + "loss": 0.6818, + "step": 1491 + }, + { + "epoch": 8.525714285714285, + "grad_norm": 122.73198699951172, + "learning_rate": 4.608253968253968e-05, + "loss": 0.5263, + "step": 1492 + }, + { + "epoch": 8.531428571428572, + "grad_norm": 56.048561096191406, + "learning_rate": 4.607619047619048e-05, + "loss": 0.4712, + "step": 1493 + }, + { + "epoch": 8.537142857142857, + "grad_norm": 560.8683471679688, + "learning_rate": 4.6069841269841274e-05, + "loss": 0.5263, + "step": 1494 + }, + { + "epoch": 8.542857142857143, + "grad_norm": 31.27094841003418, + "learning_rate": 4.606349206349207e-05, + "loss": 0.5868, + "step": 1495 + }, + { + "epoch": 8.548571428571428, + "grad_norm": 41.043006896972656, + "learning_rate": 4.605714285714286e-05, + "loss": 0.6195, + "step": 1496 + }, + { + "epoch": 8.554285714285715, + "grad_norm": 71.65432739257812, + "learning_rate": 4.605079365079365e-05, + "loss": 0.7321, + "step": 1497 + }, + { + "epoch": 8.56, + "grad_norm": 90.78630065917969, + "learning_rate": 4.6044444444444445e-05, + "loss": 0.5513, + "step": 1498 + }, + { + "epoch": 8.565714285714286, + "grad_norm": 40.308448791503906, + "learning_rate": 4.6038095238095244e-05, + "loss": 0.6837, + "step": 1499 + }, + { + "epoch": 8.571428571428571, + "grad_norm": 27.45469856262207, + "learning_rate": 4.603174603174603e-05, + "loss": 0.7172, + "step": 1500 + }, + { + "epoch": 8.577142857142857, + "grad_norm": 29.458553314208984, + "learning_rate": 4.602539682539683e-05, + "loss": 0.4317, + "step": 1501 + }, + { + "epoch": 8.582857142857144, + "grad_norm": 96.34957122802734, + "learning_rate": 4.601904761904762e-05, + "loss": 0.7332, + "step": 1502 + }, + { + "epoch": 8.588571428571429, + "grad_norm": 27.539478302001953, + "learning_rate": 4.6012698412698415e-05, + "loss": 0.4862, + "step": 1503 + }, + { + "epoch": 8.594285714285714, + "grad_norm": 27.598003387451172, + "learning_rate": 4.600634920634921e-05, + "loss": 0.5812, + "step": 1504 + }, + { + "epoch": 8.6, + "grad_norm": 139.9840087890625, + "learning_rate": 4.600000000000001e-05, + "loss": 0.5812, + "step": 1505 + }, + { + "epoch": 8.605714285714285, + "grad_norm": 37.582332611083984, + "learning_rate": 4.599365079365079e-05, + "loss": 0.7514, + "step": 1506 + }, + { + "epoch": 8.611428571428572, + "grad_norm": 58.73211669921875, + "learning_rate": 4.598730158730159e-05, + "loss": 0.4071, + "step": 1507 + }, + { + "epoch": 8.617142857142857, + "grad_norm": 60.2987174987793, + "learning_rate": 4.5980952380952385e-05, + "loss": 0.526, + "step": 1508 + }, + { + "epoch": 8.622857142857143, + "grad_norm": 55.10319137573242, + "learning_rate": 4.597460317460318e-05, + "loss": 0.7074, + "step": 1509 + }, + { + "epoch": 8.628571428571428, + "grad_norm": 28.07171058654785, + "learning_rate": 4.596825396825397e-05, + "loss": 0.6715, + "step": 1510 + }, + { + "epoch": 8.634285714285713, + "grad_norm": 138.1480712890625, + "learning_rate": 4.596190476190476e-05, + "loss": 0.6359, + "step": 1511 + }, + { + "epoch": 8.64, + "grad_norm": 48.170631408691406, + "learning_rate": 4.5955555555555555e-05, + "loss": 0.6623, + "step": 1512 + }, + { + "epoch": 8.645714285714286, + "grad_norm": 44.70737075805664, + "learning_rate": 4.5949206349206355e-05, + "loss": 0.6849, + "step": 1513 + }, + { + "epoch": 8.651428571428571, + "grad_norm": 53.35809326171875, + "learning_rate": 4.594285714285714e-05, + "loss": 0.5214, + "step": 1514 + }, + { + "epoch": 8.657142857142857, + "grad_norm": 59.18272399902344, + "learning_rate": 4.593650793650794e-05, + "loss": 0.7109, + "step": 1515 + }, + { + "epoch": 8.662857142857142, + "grad_norm": 76.57801055908203, + "learning_rate": 4.593015873015873e-05, + "loss": 0.7157, + "step": 1516 + }, + { + "epoch": 8.668571428571429, + "grad_norm": 47.933834075927734, + "learning_rate": 4.5923809523809525e-05, + "loss": 0.6008, + "step": 1517 + }, + { + "epoch": 8.674285714285714, + "grad_norm": 34.38920593261719, + "learning_rate": 4.591746031746032e-05, + "loss": 0.5298, + "step": 1518 + }, + { + "epoch": 8.68, + "grad_norm": 36.14894104003906, + "learning_rate": 4.591111111111112e-05, + "loss": 0.5072, + "step": 1519 + }, + { + "epoch": 8.685714285714285, + "grad_norm": 36.38679504394531, + "learning_rate": 4.59047619047619e-05, + "loss": 0.5074, + "step": 1520 + }, + { + "epoch": 8.691428571428572, + "grad_norm": 35.809410095214844, + "learning_rate": 4.58984126984127e-05, + "loss": 0.5394, + "step": 1521 + }, + { + "epoch": 8.697142857142858, + "grad_norm": 76.38274383544922, + "learning_rate": 4.589206349206349e-05, + "loss": 0.6313, + "step": 1522 + }, + { + "epoch": 8.702857142857143, + "grad_norm": 37.26008987426758, + "learning_rate": 4.588571428571429e-05, + "loss": 0.6359, + "step": 1523 + }, + { + "epoch": 8.708571428571428, + "grad_norm": 52.82963943481445, + "learning_rate": 4.587936507936508e-05, + "loss": 0.5529, + "step": 1524 + }, + { + "epoch": 8.714285714285714, + "grad_norm": 33.750099182128906, + "learning_rate": 4.587301587301587e-05, + "loss": 0.5891, + "step": 1525 + }, + { + "epoch": 8.72, + "grad_norm": 26.14128875732422, + "learning_rate": 4.5866666666666666e-05, + "loss": 0.8089, + "step": 1526 + }, + { + "epoch": 8.725714285714286, + "grad_norm": 91.86772155761719, + "learning_rate": 4.5860317460317465e-05, + "loss": 0.5509, + "step": 1527 + }, + { + "epoch": 8.731428571428571, + "grad_norm": 38.546119689941406, + "learning_rate": 4.585396825396826e-05, + "loss": 0.5187, + "step": 1528 + }, + { + "epoch": 8.737142857142857, + "grad_norm": 35.20556640625, + "learning_rate": 4.584761904761905e-05, + "loss": 0.5353, + "step": 1529 + }, + { + "epoch": 8.742857142857144, + "grad_norm": 39.48540496826172, + "learning_rate": 4.584126984126984e-05, + "loss": 0.3533, + "step": 1530 + }, + { + "epoch": 8.748571428571429, + "grad_norm": 27.24939727783203, + "learning_rate": 4.5834920634920636e-05, + "loss": 0.5308, + "step": 1531 + }, + { + "epoch": 8.754285714285714, + "grad_norm": 64.14990997314453, + "learning_rate": 4.5828571428571435e-05, + "loss": 0.6566, + "step": 1532 + }, + { + "epoch": 8.76, + "grad_norm": 58.110958099365234, + "learning_rate": 4.582222222222222e-05, + "loss": 0.6346, + "step": 1533 + }, + { + "epoch": 8.765714285714285, + "grad_norm": 53.27418899536133, + "learning_rate": 4.581587301587302e-05, + "loss": 0.6744, + "step": 1534 + }, + { + "epoch": 8.771428571428572, + "grad_norm": 44.7225227355957, + "learning_rate": 4.580952380952381e-05, + "loss": 0.6127, + "step": 1535 + }, + { + "epoch": 8.777142857142858, + "grad_norm": 72.10116577148438, + "learning_rate": 4.5803174603174606e-05, + "loss": 0.6284, + "step": 1536 + }, + { + "epoch": 8.782857142857143, + "grad_norm": 43.42138671875, + "learning_rate": 4.57968253968254e-05, + "loss": 0.7004, + "step": 1537 + }, + { + "epoch": 8.788571428571428, + "grad_norm": 25.551225662231445, + "learning_rate": 4.57904761904762e-05, + "loss": 0.6069, + "step": 1538 + }, + { + "epoch": 8.794285714285714, + "grad_norm": 58.870418548583984, + "learning_rate": 4.5784126984126984e-05, + "loss": 0.7462, + "step": 1539 + }, + { + "epoch": 8.8, + "grad_norm": 29.90438461303711, + "learning_rate": 4.577777777777778e-05, + "loss": 0.5822, + "step": 1540 + }, + { + "epoch": 8.805714285714286, + "grad_norm": 45.953643798828125, + "learning_rate": 4.5771428571428576e-05, + "loss": 0.7325, + "step": 1541 + }, + { + "epoch": 8.811428571428571, + "grad_norm": 26.630359649658203, + "learning_rate": 4.576507936507937e-05, + "loss": 0.5846, + "step": 1542 + }, + { + "epoch": 8.817142857142857, + "grad_norm": 25.59412384033203, + "learning_rate": 4.575873015873016e-05, + "loss": 0.6627, + "step": 1543 + }, + { + "epoch": 8.822857142857142, + "grad_norm": 29.028831481933594, + "learning_rate": 4.5752380952380953e-05, + "loss": 0.718, + "step": 1544 + }, + { + "epoch": 8.82857142857143, + "grad_norm": 52.75741195678711, + "learning_rate": 4.5746031746031746e-05, + "loss": 0.4036, + "step": 1545 + }, + { + "epoch": 8.834285714285715, + "grad_norm": 42.16880798339844, + "learning_rate": 4.5739682539682546e-05, + "loss": 0.6891, + "step": 1546 + }, + { + "epoch": 8.84, + "grad_norm": 18.44571304321289, + "learning_rate": 4.573333333333333e-05, + "loss": 0.4403, + "step": 1547 + }, + { + "epoch": 8.845714285714285, + "grad_norm": 42.27539825439453, + "learning_rate": 4.572698412698413e-05, + "loss": 0.6595, + "step": 1548 + }, + { + "epoch": 8.85142857142857, + "grad_norm": 60.38774108886719, + "learning_rate": 4.5720634920634923e-05, + "loss": 0.4491, + "step": 1549 + }, + { + "epoch": 8.857142857142858, + "grad_norm": 42.25185012817383, + "learning_rate": 4.5714285714285716e-05, + "loss": 0.5451, + "step": 1550 + }, + { + "epoch": 8.862857142857143, + "grad_norm": 83.79962158203125, + "learning_rate": 4.570793650793651e-05, + "loss": 0.6691, + "step": 1551 + }, + { + "epoch": 8.868571428571428, + "grad_norm": 45.134437561035156, + "learning_rate": 4.570158730158731e-05, + "loss": 0.7317, + "step": 1552 + }, + { + "epoch": 8.874285714285714, + "grad_norm": 27.27747344970703, + "learning_rate": 4.5695238095238094e-05, + "loss": 0.5246, + "step": 1553 + }, + { + "epoch": 8.88, + "grad_norm": 29.774110794067383, + "learning_rate": 4.5688888888888893e-05, + "loss": 0.4865, + "step": 1554 + }, + { + "epoch": 8.885714285714286, + "grad_norm": 41.41864776611328, + "learning_rate": 4.568253968253968e-05, + "loss": 0.7461, + "step": 1555 + }, + { + "epoch": 8.891428571428571, + "grad_norm": 52.09632873535156, + "learning_rate": 4.567619047619048e-05, + "loss": 0.6364, + "step": 1556 + }, + { + "epoch": 8.897142857142857, + "grad_norm": 44.600128173828125, + "learning_rate": 4.566984126984127e-05, + "loss": 0.639, + "step": 1557 + }, + { + "epoch": 8.902857142857142, + "grad_norm": 26.527629852294922, + "learning_rate": 4.5663492063492064e-05, + "loss": 0.7991, + "step": 1558 + }, + { + "epoch": 8.90857142857143, + "grad_norm": 41.0920524597168, + "learning_rate": 4.5657142857142857e-05, + "loss": 0.9872, + "step": 1559 + }, + { + "epoch": 8.914285714285715, + "grad_norm": 82.2793960571289, + "learning_rate": 4.5650793650793656e-05, + "loss": 0.582, + "step": 1560 + }, + { + "epoch": 8.92, + "grad_norm": 22.054826736450195, + "learning_rate": 4.564444444444444e-05, + "loss": 0.4866, + "step": 1561 + }, + { + "epoch": 8.925714285714285, + "grad_norm": 24.977052688598633, + "learning_rate": 4.563809523809524e-05, + "loss": 0.9632, + "step": 1562 + }, + { + "epoch": 8.93142857142857, + "grad_norm": 18.51016616821289, + "learning_rate": 4.5631746031746034e-05, + "loss": 0.8068, + "step": 1563 + }, + { + "epoch": 8.937142857142858, + "grad_norm": 20.75175666809082, + "learning_rate": 4.5625396825396827e-05, + "loss": 0.7239, + "step": 1564 + }, + { + "epoch": 8.942857142857143, + "grad_norm": 51.263912200927734, + "learning_rate": 4.561904761904762e-05, + "loss": 0.4197, + "step": 1565 + }, + { + "epoch": 8.948571428571428, + "grad_norm": 53.12901306152344, + "learning_rate": 4.561269841269841e-05, + "loss": 0.7149, + "step": 1566 + }, + { + "epoch": 8.954285714285714, + "grad_norm": 30.954853057861328, + "learning_rate": 4.560634920634921e-05, + "loss": 0.5854, + "step": 1567 + }, + { + "epoch": 8.96, + "grad_norm": 15.945786476135254, + "learning_rate": 4.5600000000000004e-05, + "loss": 0.6039, + "step": 1568 + }, + { + "epoch": 8.965714285714286, + "grad_norm": 37.43260192871094, + "learning_rate": 4.5593650793650797e-05, + "loss": 0.3747, + "step": 1569 + }, + { + "epoch": 8.971428571428572, + "grad_norm": 70.73409271240234, + "learning_rate": 4.558730158730159e-05, + "loss": 0.8083, + "step": 1570 + }, + { + "epoch": 8.977142857142857, + "grad_norm": 25.643779754638672, + "learning_rate": 4.558095238095239e-05, + "loss": 0.6793, + "step": 1571 + }, + { + "epoch": 8.982857142857142, + "grad_norm": 34.19021224975586, + "learning_rate": 4.5574603174603174e-05, + "loss": 0.8151, + "step": 1572 + }, + { + "epoch": 8.98857142857143, + "grad_norm": 39.52885437011719, + "learning_rate": 4.5568253968253974e-05, + "loss": 0.3994, + "step": 1573 + }, + { + "epoch": 8.994285714285715, + "grad_norm": 95.88760375976562, + "learning_rate": 4.5561904761904766e-05, + "loss": 0.6294, + "step": 1574 + }, + { + "epoch": 9.0, + "grad_norm": 33.499820709228516, + "learning_rate": 4.555555555555556e-05, + "loss": 0.8259, + "step": 1575 + }, + { + "epoch": 9.0, + "eval_classes": 0, + "eval_loss": 0.7303072214126587, + "eval_map": 0.8974, + "eval_map_50": 0.9515, + "eval_map_75": 0.9372, + "eval_map_large": 0.8975, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.8974, + "eval_map_small": -1.0, + "eval_mar_1": 0.7594, + "eval_mar_10": 0.9619, + "eval_mar_100": 0.9756, + "eval_mar_100_per_class": 0.9756, + "eval_mar_large": 0.9756, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 17.5672, + "eval_samples_per_second": 16.736, + "eval_steps_per_second": 2.106, + "step": 1575 + }, + { + "epoch": 9.005714285714285, + "grad_norm": 96.72337341308594, + "learning_rate": 4.554920634920635e-05, + "loss": 0.5228, + "step": 1576 + }, + { + "epoch": 9.01142857142857, + "grad_norm": 29.805259704589844, + "learning_rate": 4.5542857142857144e-05, + "loss": 0.3786, + "step": 1577 + }, + { + "epoch": 9.017142857142858, + "grad_norm": 21.426660537719727, + "learning_rate": 4.553650793650794e-05, + "loss": 0.5206, + "step": 1578 + }, + { + "epoch": 9.022857142857143, + "grad_norm": 73.40640258789062, + "learning_rate": 4.5530158730158736e-05, + "loss": 0.4438, + "step": 1579 + }, + { + "epoch": 9.028571428571428, + "grad_norm": 50.55246353149414, + "learning_rate": 4.552380952380952e-05, + "loss": 0.4576, + "step": 1580 + }, + { + "epoch": 9.034285714285714, + "grad_norm": 55.30791473388672, + "learning_rate": 4.551746031746032e-05, + "loss": 0.3573, + "step": 1581 + }, + { + "epoch": 9.04, + "grad_norm": 17.10489845275879, + "learning_rate": 4.5511111111111114e-05, + "loss": 0.4981, + "step": 1582 + }, + { + "epoch": 9.045714285714286, + "grad_norm": 54.23091506958008, + "learning_rate": 4.550476190476191e-05, + "loss": 0.5639, + "step": 1583 + }, + { + "epoch": 9.051428571428572, + "grad_norm": 46.959014892578125, + "learning_rate": 4.54984126984127e-05, + "loss": 0.3785, + "step": 1584 + }, + { + "epoch": 9.057142857142857, + "grad_norm": 41.589237213134766, + "learning_rate": 4.54920634920635e-05, + "loss": 0.6079, + "step": 1585 + }, + { + "epoch": 9.062857142857142, + "grad_norm": 59.98491668701172, + "learning_rate": 4.5485714285714285e-05, + "loss": 0.4779, + "step": 1586 + }, + { + "epoch": 9.06857142857143, + "grad_norm": 59.319087982177734, + "learning_rate": 4.5479365079365084e-05, + "loss": 0.858, + "step": 1587 + }, + { + "epoch": 9.074285714285715, + "grad_norm": 15.701525688171387, + "learning_rate": 4.547301587301587e-05, + "loss": 0.5491, + "step": 1588 + }, + { + "epoch": 9.08, + "grad_norm": 34.07634735107422, + "learning_rate": 4.546666666666667e-05, + "loss": 0.5342, + "step": 1589 + }, + { + "epoch": 9.085714285714285, + "grad_norm": 17.197433471679688, + "learning_rate": 4.546031746031746e-05, + "loss": 0.7168, + "step": 1590 + }, + { + "epoch": 9.09142857142857, + "grad_norm": 43.62481689453125, + "learning_rate": 4.5453968253968255e-05, + "loss": 0.4793, + "step": 1591 + }, + { + "epoch": 9.097142857142858, + "grad_norm": 191.11241149902344, + "learning_rate": 4.544761904761905e-05, + "loss": 0.5385, + "step": 1592 + }, + { + "epoch": 9.102857142857143, + "grad_norm": 17.969465255737305, + "learning_rate": 4.544126984126985e-05, + "loss": 0.494, + "step": 1593 + }, + { + "epoch": 9.108571428571429, + "grad_norm": 171.6042938232422, + "learning_rate": 4.543492063492063e-05, + "loss": 0.6234, + "step": 1594 + }, + { + "epoch": 9.114285714285714, + "grad_norm": 67.10899353027344, + "learning_rate": 4.542857142857143e-05, + "loss": 0.4691, + "step": 1595 + }, + { + "epoch": 9.12, + "grad_norm": 35.339996337890625, + "learning_rate": 4.5422222222222225e-05, + "loss": 0.4985, + "step": 1596 + }, + { + "epoch": 9.125714285714286, + "grad_norm": 16.23410987854004, + "learning_rate": 4.541587301587302e-05, + "loss": 0.5747, + "step": 1597 + }, + { + "epoch": 9.131428571428572, + "grad_norm": 35.63035202026367, + "learning_rate": 4.540952380952381e-05, + "loss": 0.5536, + "step": 1598 + }, + { + "epoch": 9.137142857142857, + "grad_norm": 19.772794723510742, + "learning_rate": 4.54031746031746e-05, + "loss": 0.5108, + "step": 1599 + }, + { + "epoch": 9.142857142857142, + "grad_norm": 34.77439498901367, + "learning_rate": 4.5396825396825395e-05, + "loss": 0.5934, + "step": 1600 + }, + { + "epoch": 9.14857142857143, + "grad_norm": 33.682281494140625, + "learning_rate": 4.5390476190476195e-05, + "loss": 0.4317, + "step": 1601 + }, + { + "epoch": 9.154285714285715, + "grad_norm": 69.66313934326172, + "learning_rate": 4.538412698412699e-05, + "loss": 0.9722, + "step": 1602 + }, + { + "epoch": 9.16, + "grad_norm": 37.474693298339844, + "learning_rate": 4.537777777777778e-05, + "loss": 0.4238, + "step": 1603 + }, + { + "epoch": 9.165714285714285, + "grad_norm": 54.01615905761719, + "learning_rate": 4.537142857142857e-05, + "loss": 0.6075, + "step": 1604 + }, + { + "epoch": 9.17142857142857, + "grad_norm": 73.04664611816406, + "learning_rate": 4.5365079365079365e-05, + "loss": 0.638, + "step": 1605 + }, + { + "epoch": 9.177142857142858, + "grad_norm": 46.8115348815918, + "learning_rate": 4.5358730158730165e-05, + "loss": 0.5162, + "step": 1606 + }, + { + "epoch": 9.182857142857143, + "grad_norm": 51.396697998046875, + "learning_rate": 4.535238095238096e-05, + "loss": 0.4573, + "step": 1607 + }, + { + "epoch": 9.188571428571429, + "grad_norm": 25.829246520996094, + "learning_rate": 4.534603174603175e-05, + "loss": 0.5303, + "step": 1608 + }, + { + "epoch": 9.194285714285714, + "grad_norm": 1186.63720703125, + "learning_rate": 4.533968253968254e-05, + "loss": 0.428, + "step": 1609 + }, + { + "epoch": 9.2, + "grad_norm": 45.02142333984375, + "learning_rate": 4.5333333333333335e-05, + "loss": 0.5237, + "step": 1610 + }, + { + "epoch": 9.205714285714286, + "grad_norm": 462.3764343261719, + "learning_rate": 4.532698412698413e-05, + "loss": 0.671, + "step": 1611 + }, + { + "epoch": 9.211428571428572, + "grad_norm": 56.87477493286133, + "learning_rate": 4.532063492063493e-05, + "loss": 0.5889, + "step": 1612 + }, + { + "epoch": 9.217142857142857, + "grad_norm": 24.812471389770508, + "learning_rate": 4.531428571428571e-05, + "loss": 0.5464, + "step": 1613 + }, + { + "epoch": 9.222857142857142, + "grad_norm": 71.20873260498047, + "learning_rate": 4.530793650793651e-05, + "loss": 0.6917, + "step": 1614 + }, + { + "epoch": 9.228571428571428, + "grad_norm": 57.25650405883789, + "learning_rate": 4.5301587301587305e-05, + "loss": 0.7093, + "step": 1615 + }, + { + "epoch": 9.234285714285715, + "grad_norm": 61.251834869384766, + "learning_rate": 4.52952380952381e-05, + "loss": 0.5466, + "step": 1616 + }, + { + "epoch": 9.24, + "grad_norm": 144.06771850585938, + "learning_rate": 4.528888888888889e-05, + "loss": 0.5203, + "step": 1617 + }, + { + "epoch": 9.245714285714286, + "grad_norm": 42.55318832397461, + "learning_rate": 4.528253968253969e-05, + "loss": 0.6319, + "step": 1618 + }, + { + "epoch": 9.251428571428571, + "grad_norm": 27.46671485900879, + "learning_rate": 4.5276190476190476e-05, + "loss": 0.4394, + "step": 1619 + }, + { + "epoch": 9.257142857142856, + "grad_norm": 31.322734832763672, + "learning_rate": 4.5269841269841275e-05, + "loss": 0.7483, + "step": 1620 + }, + { + "epoch": 9.262857142857143, + "grad_norm": 74.06669616699219, + "learning_rate": 4.526349206349206e-05, + "loss": 0.8727, + "step": 1621 + }, + { + "epoch": 9.268571428571429, + "grad_norm": 29.42599868774414, + "learning_rate": 4.525714285714286e-05, + "loss": 0.4346, + "step": 1622 + }, + { + "epoch": 9.274285714285714, + "grad_norm": 68.91696166992188, + "learning_rate": 4.525079365079365e-05, + "loss": 0.6046, + "step": 1623 + }, + { + "epoch": 9.28, + "grad_norm": 30.62226104736328, + "learning_rate": 4.5244444444444446e-05, + "loss": 0.4043, + "step": 1624 + }, + { + "epoch": 9.285714285714286, + "grad_norm": 26.922250747680664, + "learning_rate": 4.523809523809524e-05, + "loss": 0.3778, + "step": 1625 + }, + { + "epoch": 9.291428571428572, + "grad_norm": 37.2835578918457, + "learning_rate": 4.523174603174604e-05, + "loss": 0.4843, + "step": 1626 + }, + { + "epoch": 9.297142857142857, + "grad_norm": 58.060791015625, + "learning_rate": 4.5225396825396824e-05, + "loss": 0.5054, + "step": 1627 + }, + { + "epoch": 9.302857142857142, + "grad_norm": 30.876922607421875, + "learning_rate": 4.521904761904762e-05, + "loss": 0.7619, + "step": 1628 + }, + { + "epoch": 9.308571428571428, + "grad_norm": 35.82794952392578, + "learning_rate": 4.521269841269841e-05, + "loss": 0.8043, + "step": 1629 + }, + { + "epoch": 9.314285714285715, + "grad_norm": 28.258895874023438, + "learning_rate": 4.520634920634921e-05, + "loss": 0.7233, + "step": 1630 + }, + { + "epoch": 9.32, + "grad_norm": 76.31919860839844, + "learning_rate": 4.52e-05, + "loss": 0.607, + "step": 1631 + }, + { + "epoch": 9.325714285714286, + "grad_norm": 201.3551025390625, + "learning_rate": 4.5193650793650794e-05, + "loss": 0.6616, + "step": 1632 + }, + { + "epoch": 9.331428571428571, + "grad_norm": 23.321441650390625, + "learning_rate": 4.5187301587301586e-05, + "loss": 0.5547, + "step": 1633 + }, + { + "epoch": 9.337142857142856, + "grad_norm": 53.26896286010742, + "learning_rate": 4.5180952380952386e-05, + "loss": 0.6694, + "step": 1634 + }, + { + "epoch": 9.342857142857143, + "grad_norm": 82.87789916992188, + "learning_rate": 4.517460317460318e-05, + "loss": 0.5523, + "step": 1635 + }, + { + "epoch": 9.348571428571429, + "grad_norm": 50.41506576538086, + "learning_rate": 4.516825396825397e-05, + "loss": 0.5354, + "step": 1636 + }, + { + "epoch": 9.354285714285714, + "grad_norm": 42.313018798828125, + "learning_rate": 4.5161904761904764e-05, + "loss": 0.5686, + "step": 1637 + }, + { + "epoch": 9.36, + "grad_norm": 181.2873992919922, + "learning_rate": 4.5155555555555556e-05, + "loss": 0.6026, + "step": 1638 + }, + { + "epoch": 9.365714285714287, + "grad_norm": 22.492979049682617, + "learning_rate": 4.5149206349206356e-05, + "loss": 0.7822, + "step": 1639 + }, + { + "epoch": 9.371428571428572, + "grad_norm": 26.08000373840332, + "learning_rate": 4.514285714285714e-05, + "loss": 0.5648, + "step": 1640 + }, + { + "epoch": 9.377142857142857, + "grad_norm": 48.00055694580078, + "learning_rate": 4.513650793650794e-05, + "loss": 0.6257, + "step": 1641 + }, + { + "epoch": 9.382857142857143, + "grad_norm": 65.03575134277344, + "learning_rate": 4.5130158730158733e-05, + "loss": 0.5279, + "step": 1642 + }, + { + "epoch": 9.388571428571428, + "grad_norm": 58.578128814697266, + "learning_rate": 4.5123809523809526e-05, + "loss": 0.5983, + "step": 1643 + }, + { + "epoch": 9.394285714285715, + "grad_norm": 35.81114196777344, + "learning_rate": 4.511746031746032e-05, + "loss": 0.5741, + "step": 1644 + }, + { + "epoch": 9.4, + "grad_norm": 22.605865478515625, + "learning_rate": 4.511111111111112e-05, + "loss": 0.7414, + "step": 1645 + }, + { + "epoch": 9.405714285714286, + "grad_norm": 43.81585693359375, + "learning_rate": 4.5104761904761904e-05, + "loss": 0.4807, + "step": 1646 + }, + { + "epoch": 9.411428571428571, + "grad_norm": 48.78623580932617, + "learning_rate": 4.5098412698412703e-05, + "loss": 0.497, + "step": 1647 + }, + { + "epoch": 9.417142857142856, + "grad_norm": 35.52511215209961, + "learning_rate": 4.5092063492063496e-05, + "loss": 0.8295, + "step": 1648 + }, + { + "epoch": 9.422857142857143, + "grad_norm": 35.88123321533203, + "learning_rate": 4.508571428571429e-05, + "loss": 0.5782, + "step": 1649 + }, + { + "epoch": 9.428571428571429, + "grad_norm": 28.440359115600586, + "learning_rate": 4.507936507936508e-05, + "loss": 0.5606, + "step": 1650 + }, + { + "epoch": 9.434285714285714, + "grad_norm": 31.525901794433594, + "learning_rate": 4.5073015873015874e-05, + "loss": 0.3901, + "step": 1651 + }, + { + "epoch": 9.44, + "grad_norm": 31.054569244384766, + "learning_rate": 4.5066666666666667e-05, + "loss": 0.5627, + "step": 1652 + }, + { + "epoch": 9.445714285714285, + "grad_norm": 30.92431640625, + "learning_rate": 4.5060317460317466e-05, + "loss": 0.5484, + "step": 1653 + }, + { + "epoch": 9.451428571428572, + "grad_norm": 45.711769104003906, + "learning_rate": 4.505396825396825e-05, + "loss": 0.5364, + "step": 1654 + }, + { + "epoch": 9.457142857142857, + "grad_norm": 33.75905990600586, + "learning_rate": 4.504761904761905e-05, + "loss": 0.3607, + "step": 1655 + }, + { + "epoch": 9.462857142857143, + "grad_norm": 68.22660827636719, + "learning_rate": 4.5041269841269844e-05, + "loss": 0.5863, + "step": 1656 + }, + { + "epoch": 9.468571428571428, + "grad_norm": 39.716426849365234, + "learning_rate": 4.5034920634920637e-05, + "loss": 0.5162, + "step": 1657 + }, + { + "epoch": 9.474285714285715, + "grad_norm": 57.95941162109375, + "learning_rate": 4.502857142857143e-05, + "loss": 0.4783, + "step": 1658 + }, + { + "epoch": 9.48, + "grad_norm": 36.185951232910156, + "learning_rate": 4.502222222222223e-05, + "loss": 0.626, + "step": 1659 + }, + { + "epoch": 9.485714285714286, + "grad_norm": 26.185272216796875, + "learning_rate": 4.5015873015873014e-05, + "loss": 0.5006, + "step": 1660 + }, + { + "epoch": 9.491428571428571, + "grad_norm": 30.795080184936523, + "learning_rate": 4.5009523809523814e-05, + "loss": 0.4489, + "step": 1661 + }, + { + "epoch": 9.497142857142856, + "grad_norm": 91.52268981933594, + "learning_rate": 4.50031746031746e-05, + "loss": 0.6501, + "step": 1662 + }, + { + "epoch": 9.502857142857144, + "grad_norm": 156.91587829589844, + "learning_rate": 4.49968253968254e-05, + "loss": 0.5349, + "step": 1663 + }, + { + "epoch": 9.508571428571429, + "grad_norm": 71.10111236572266, + "learning_rate": 4.499047619047619e-05, + "loss": 0.587, + "step": 1664 + }, + { + "epoch": 9.514285714285714, + "grad_norm": 404.21942138671875, + "learning_rate": 4.4984126984126984e-05, + "loss": 0.5079, + "step": 1665 + }, + { + "epoch": 9.52, + "grad_norm": 36.196353912353516, + "learning_rate": 4.497777777777778e-05, + "loss": 0.5316, + "step": 1666 + }, + { + "epoch": 9.525714285714285, + "grad_norm": 44.460018157958984, + "learning_rate": 4.4971428571428576e-05, + "loss": 0.9241, + "step": 1667 + }, + { + "epoch": 9.531428571428572, + "grad_norm": 34.32107162475586, + "learning_rate": 4.496507936507936e-05, + "loss": 0.5829, + "step": 1668 + }, + { + "epoch": 9.537142857142857, + "grad_norm": 55.574440002441406, + "learning_rate": 4.495873015873016e-05, + "loss": 0.8107, + "step": 1669 + }, + { + "epoch": 9.542857142857143, + "grad_norm": 32.89865493774414, + "learning_rate": 4.4952380952380954e-05, + "loss": 0.6595, + "step": 1670 + }, + { + "epoch": 9.548571428571428, + "grad_norm": 68.20197296142578, + "learning_rate": 4.494603174603175e-05, + "loss": 0.5237, + "step": 1671 + }, + { + "epoch": 9.554285714285715, + "grad_norm": 31.13216209411621, + "learning_rate": 4.493968253968254e-05, + "loss": 0.4951, + "step": 1672 + }, + { + "epoch": 9.56, + "grad_norm": 39.833072662353516, + "learning_rate": 4.493333333333333e-05, + "loss": 0.4265, + "step": 1673 + }, + { + "epoch": 9.565714285714286, + "grad_norm": 31.166410446166992, + "learning_rate": 4.492698412698413e-05, + "loss": 0.4458, + "step": 1674 + }, + { + "epoch": 9.571428571428571, + "grad_norm": 81.6888198852539, + "learning_rate": 4.4920634920634924e-05, + "loss": 0.6065, + "step": 1675 + }, + { + "epoch": 9.577142857142857, + "grad_norm": 62.83003616333008, + "learning_rate": 4.491428571428572e-05, + "loss": 0.5295, + "step": 1676 + }, + { + "epoch": 9.582857142857144, + "grad_norm": 52.28730773925781, + "learning_rate": 4.490793650793651e-05, + "loss": 0.5219, + "step": 1677 + }, + { + "epoch": 9.588571428571429, + "grad_norm": 42.736724853515625, + "learning_rate": 4.490158730158731e-05, + "loss": 0.4746, + "step": 1678 + }, + { + "epoch": 9.594285714285714, + "grad_norm": 91.87010955810547, + "learning_rate": 4.4895238095238095e-05, + "loss": 0.4918, + "step": 1679 + }, + { + "epoch": 9.6, + "grad_norm": 51.22637939453125, + "learning_rate": 4.4888888888888894e-05, + "loss": 0.6637, + "step": 1680 + }, + { + "epoch": 9.605714285714285, + "grad_norm": 42.326847076416016, + "learning_rate": 4.488253968253969e-05, + "loss": 0.6179, + "step": 1681 + }, + { + "epoch": 9.611428571428572, + "grad_norm": 33.708980560302734, + "learning_rate": 4.487619047619048e-05, + "loss": 0.5218, + "step": 1682 + }, + { + "epoch": 9.617142857142857, + "grad_norm": 38.000892639160156, + "learning_rate": 4.486984126984127e-05, + "loss": 0.8485, + "step": 1683 + }, + { + "epoch": 9.622857142857143, + "grad_norm": 67.10118103027344, + "learning_rate": 4.4863492063492065e-05, + "loss": 0.5679, + "step": 1684 + }, + { + "epoch": 9.628571428571428, + "grad_norm": 49.57404327392578, + "learning_rate": 4.485714285714286e-05, + "loss": 0.4918, + "step": 1685 + }, + { + "epoch": 9.634285714285713, + "grad_norm": 52.69540786743164, + "learning_rate": 4.485079365079366e-05, + "loss": 0.6589, + "step": 1686 + }, + { + "epoch": 9.64, + "grad_norm": 51.51718521118164, + "learning_rate": 4.484444444444444e-05, + "loss": 0.3976, + "step": 1687 + }, + { + "epoch": 9.645714285714286, + "grad_norm": 62.12007522583008, + "learning_rate": 4.483809523809524e-05, + "loss": 0.6888, + "step": 1688 + }, + { + "epoch": 9.651428571428571, + "grad_norm": 47.324432373046875, + "learning_rate": 4.4831746031746035e-05, + "loss": 0.6809, + "step": 1689 + }, + { + "epoch": 9.657142857142857, + "grad_norm": 189.40052795410156, + "learning_rate": 4.482539682539683e-05, + "loss": 0.4769, + "step": 1690 + }, + { + "epoch": 9.662857142857142, + "grad_norm": 147.95223999023438, + "learning_rate": 4.481904761904762e-05, + "loss": 0.3775, + "step": 1691 + }, + { + "epoch": 9.668571428571429, + "grad_norm": 32.29826736450195, + "learning_rate": 4.481269841269842e-05, + "loss": 0.8048, + "step": 1692 + }, + { + "epoch": 9.674285714285714, + "grad_norm": 53.42995071411133, + "learning_rate": 4.4806349206349205e-05, + "loss": 0.3771, + "step": 1693 + }, + { + "epoch": 9.68, + "grad_norm": 67.25894165039062, + "learning_rate": 4.4800000000000005e-05, + "loss": 0.6628, + "step": 1694 + }, + { + "epoch": 9.685714285714285, + "grad_norm": 50.50597381591797, + "learning_rate": 4.479365079365079e-05, + "loss": 0.495, + "step": 1695 + }, + { + "epoch": 9.691428571428572, + "grad_norm": 37.99165725708008, + "learning_rate": 4.478730158730159e-05, + "loss": 0.6954, + "step": 1696 + }, + { + "epoch": 9.697142857142858, + "grad_norm": 40.71255874633789, + "learning_rate": 4.478095238095238e-05, + "loss": 0.4784, + "step": 1697 + }, + { + "epoch": 9.702857142857143, + "grad_norm": 22.380142211914062, + "learning_rate": 4.4774603174603175e-05, + "loss": 0.6058, + "step": 1698 + }, + { + "epoch": 9.708571428571428, + "grad_norm": 53.52154541015625, + "learning_rate": 4.476825396825397e-05, + "loss": 0.492, + "step": 1699 + }, + { + "epoch": 9.714285714285714, + "grad_norm": 21.259414672851562, + "learning_rate": 4.476190476190477e-05, + "loss": 0.7073, + "step": 1700 + }, + { + "epoch": 9.72, + "grad_norm": 67.19175720214844, + "learning_rate": 4.475555555555555e-05, + "loss": 0.4383, + "step": 1701 + }, + { + "epoch": 9.725714285714286, + "grad_norm": 61.081031799316406, + "learning_rate": 4.474920634920635e-05, + "loss": 0.5562, + "step": 1702 + }, + { + "epoch": 9.731428571428571, + "grad_norm": 79.01075744628906, + "learning_rate": 4.4742857142857145e-05, + "loss": 0.529, + "step": 1703 + }, + { + "epoch": 9.737142857142857, + "grad_norm": 37.538238525390625, + "learning_rate": 4.473650793650794e-05, + "loss": 0.5126, + "step": 1704 + }, + { + "epoch": 9.742857142857144, + "grad_norm": 34.34321212768555, + "learning_rate": 4.473015873015873e-05, + "loss": 0.518, + "step": 1705 + }, + { + "epoch": 9.748571428571429, + "grad_norm": 69.47142791748047, + "learning_rate": 4.472380952380952e-05, + "loss": 0.4305, + "step": 1706 + }, + { + "epoch": 9.754285714285714, + "grad_norm": 115.71222686767578, + "learning_rate": 4.4717460317460316e-05, + "loss": 0.6728, + "step": 1707 + }, + { + "epoch": 9.76, + "grad_norm": 43.00593185424805, + "learning_rate": 4.4711111111111115e-05, + "loss": 0.6375, + "step": 1708 + }, + { + "epoch": 9.765714285714285, + "grad_norm": 28.06012725830078, + "learning_rate": 4.470476190476191e-05, + "loss": 0.7231, + "step": 1709 + }, + { + "epoch": 9.771428571428572, + "grad_norm": 47.50296401977539, + "learning_rate": 4.46984126984127e-05, + "loss": 0.4661, + "step": 1710 + }, + { + "epoch": 9.777142857142858, + "grad_norm": 137.15155029296875, + "learning_rate": 4.469206349206349e-05, + "loss": 0.6739, + "step": 1711 + }, + { + "epoch": 9.782857142857143, + "grad_norm": 69.97490692138672, + "learning_rate": 4.4685714285714286e-05, + "loss": 0.4805, + "step": 1712 + }, + { + "epoch": 9.788571428571428, + "grad_norm": 32.603946685791016, + "learning_rate": 4.4679365079365085e-05, + "loss": 0.6628, + "step": 1713 + }, + { + "epoch": 9.794285714285714, + "grad_norm": 47.70781707763672, + "learning_rate": 4.467301587301588e-05, + "loss": 0.7778, + "step": 1714 + }, + { + "epoch": 9.8, + "grad_norm": 33.18388748168945, + "learning_rate": 4.466666666666667e-05, + "loss": 0.523, + "step": 1715 + }, + { + "epoch": 9.805714285714286, + "grad_norm": 273.55523681640625, + "learning_rate": 4.466031746031746e-05, + "loss": 0.6989, + "step": 1716 + }, + { + "epoch": 9.811428571428571, + "grad_norm": 90.4410171508789, + "learning_rate": 4.4653968253968256e-05, + "loss": 0.421, + "step": 1717 + }, + { + "epoch": 9.817142857142857, + "grad_norm": 52.67757797241211, + "learning_rate": 4.464761904761905e-05, + "loss": 0.4459, + "step": 1718 + }, + { + "epoch": 9.822857142857142, + "grad_norm": 37.0533561706543, + "learning_rate": 4.464126984126985e-05, + "loss": 0.7322, + "step": 1719 + }, + { + "epoch": 9.82857142857143, + "grad_norm": 58.06081771850586, + "learning_rate": 4.4634920634920634e-05, + "loss": 0.412, + "step": 1720 + }, + { + "epoch": 9.834285714285715, + "grad_norm": 38.50861358642578, + "learning_rate": 4.462857142857143e-05, + "loss": 0.4276, + "step": 1721 + }, + { + "epoch": 9.84, + "grad_norm": 37.932193756103516, + "learning_rate": 4.4622222222222226e-05, + "loss": 0.517, + "step": 1722 + }, + { + "epoch": 9.845714285714285, + "grad_norm": 39.057884216308594, + "learning_rate": 4.461587301587302e-05, + "loss": 0.4924, + "step": 1723 + }, + { + "epoch": 9.85142857142857, + "grad_norm": 30.302133560180664, + "learning_rate": 4.460952380952381e-05, + "loss": 0.4598, + "step": 1724 + }, + { + "epoch": 9.857142857142858, + "grad_norm": 73.9806137084961, + "learning_rate": 4.460317460317461e-05, + "loss": 0.4182, + "step": 1725 + }, + { + "epoch": 9.862857142857143, + "grad_norm": 26.147907257080078, + "learning_rate": 4.4596825396825396e-05, + "loss": 0.6775, + "step": 1726 + }, + { + "epoch": 9.868571428571428, + "grad_norm": 34.04471206665039, + "learning_rate": 4.4590476190476196e-05, + "loss": 0.52, + "step": 1727 + }, + { + "epoch": 9.874285714285714, + "grad_norm": 46.370750427246094, + "learning_rate": 4.458412698412698e-05, + "loss": 0.6936, + "step": 1728 + }, + { + "epoch": 9.88, + "grad_norm": 35.784751892089844, + "learning_rate": 4.457777777777778e-05, + "loss": 0.3824, + "step": 1729 + }, + { + "epoch": 9.885714285714286, + "grad_norm": 81.29346466064453, + "learning_rate": 4.4571428571428574e-05, + "loss": 0.5515, + "step": 1730 + }, + { + "epoch": 9.891428571428571, + "grad_norm": 35.98829650878906, + "learning_rate": 4.4565079365079366e-05, + "loss": 0.536, + "step": 1731 + }, + { + "epoch": 9.897142857142857, + "grad_norm": 56.466915130615234, + "learning_rate": 4.455873015873016e-05, + "loss": 0.5491, + "step": 1732 + }, + { + "epoch": 9.902857142857142, + "grad_norm": 29.79550552368164, + "learning_rate": 4.455238095238096e-05, + "loss": 0.8255, + "step": 1733 + }, + { + "epoch": 9.90857142857143, + "grad_norm": 44.566951751708984, + "learning_rate": 4.4546031746031744e-05, + "loss": 0.513, + "step": 1734 + }, + { + "epoch": 9.914285714285715, + "grad_norm": 76.26630401611328, + "learning_rate": 4.4539682539682543e-05, + "loss": 0.5726, + "step": 1735 + }, + { + "epoch": 9.92, + "grad_norm": 88.24022674560547, + "learning_rate": 4.4533333333333336e-05, + "loss": 0.5026, + "step": 1736 + }, + { + "epoch": 9.925714285714285, + "grad_norm": 59.36637496948242, + "learning_rate": 4.452698412698413e-05, + "loss": 0.5303, + "step": 1737 + }, + { + "epoch": 9.93142857142857, + "grad_norm": 133.46734619140625, + "learning_rate": 4.452063492063492e-05, + "loss": 0.5762, + "step": 1738 + }, + { + "epoch": 9.937142857142858, + "grad_norm": 73.90782928466797, + "learning_rate": 4.4514285714285714e-05, + "loss": 0.4135, + "step": 1739 + }, + { + "epoch": 9.942857142857143, + "grad_norm": 16.689319610595703, + "learning_rate": 4.450793650793651e-05, + "loss": 0.4852, + "step": 1740 + }, + { + "epoch": 9.948571428571428, + "grad_norm": 46.914302825927734, + "learning_rate": 4.4501587301587306e-05, + "loss": 0.4914, + "step": 1741 + }, + { + "epoch": 9.954285714285714, + "grad_norm": 64.44391632080078, + "learning_rate": 4.44952380952381e-05, + "loss": 0.7654, + "step": 1742 + }, + { + "epoch": 9.96, + "grad_norm": 44.79595184326172, + "learning_rate": 4.448888888888889e-05, + "loss": 0.654, + "step": 1743 + }, + { + "epoch": 9.965714285714286, + "grad_norm": 20.467912673950195, + "learning_rate": 4.4482539682539684e-05, + "loss": 0.5238, + "step": 1744 + }, + { + "epoch": 9.971428571428572, + "grad_norm": 41.49550247192383, + "learning_rate": 4.447619047619048e-05, + "loss": 0.5272, + "step": 1745 + }, + { + "epoch": 9.977142857142857, + "grad_norm": 163.62322998046875, + "learning_rate": 4.446984126984127e-05, + "loss": 0.6409, + "step": 1746 + }, + { + "epoch": 9.982857142857142, + "grad_norm": 67.88383483886719, + "learning_rate": 4.446349206349207e-05, + "loss": 0.878, + "step": 1747 + }, + { + "epoch": 9.98857142857143, + "grad_norm": 139.52902221679688, + "learning_rate": 4.445714285714286e-05, + "loss": 0.5412, + "step": 1748 + }, + { + "epoch": 9.994285714285715, + "grad_norm": 44.225887298583984, + "learning_rate": 4.4450793650793654e-05, + "loss": 0.33, + "step": 1749 + }, + { + "epoch": 10.0, + "grad_norm": 91.18606567382812, + "learning_rate": 4.4444444444444447e-05, + "loss": 0.7271, + "step": 1750 + }, + { + "epoch": 10.0, + "eval_classes": 0, + "eval_loss": 0.679156482219696, + "eval_map": 0.9103, + "eval_map_50": 0.9707, + "eval_map_75": 0.9658, + "eval_map_large": 0.9103, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9103, + "eval_map_small": -1.0, + "eval_mar_1": 0.7806, + "eval_mar_10": 0.9597, + "eval_mar_100": 0.9743, + "eval_mar_100_per_class": 0.9743, + "eval_mar_large": 0.9743, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 18.2727, + "eval_samples_per_second": 16.09, + "eval_steps_per_second": 2.025, + "step": 1750 + }, + { + "epoch": 10.005714285714285, + "grad_norm": 27.433137893676758, + "learning_rate": 4.443809523809524e-05, + "loss": 0.5827, + "step": 1751 + }, + { + "epoch": 10.01142857142857, + "grad_norm": 34.193050384521484, + "learning_rate": 4.443174603174604e-05, + "loss": 0.4465, + "step": 1752 + }, + { + "epoch": 10.017142857142858, + "grad_norm": 29.82825469970703, + "learning_rate": 4.4425396825396824e-05, + "loss": 0.6885, + "step": 1753 + }, + { + "epoch": 10.022857142857143, + "grad_norm": 21.08742904663086, + "learning_rate": 4.4419047619047624e-05, + "loss": 0.5118, + "step": 1754 + }, + { + "epoch": 10.028571428571428, + "grad_norm": 22.1567325592041, + "learning_rate": 4.4412698412698417e-05, + "loss": 0.481, + "step": 1755 + }, + { + "epoch": 10.034285714285714, + "grad_norm": 38.391639709472656, + "learning_rate": 4.440634920634921e-05, + "loss": 0.4155, + "step": 1756 + }, + { + "epoch": 10.04, + "grad_norm": 62.439395904541016, + "learning_rate": 4.44e-05, + "loss": 0.78, + "step": 1757 + }, + { + "epoch": 10.045714285714286, + "grad_norm": 28.28538703918457, + "learning_rate": 4.43936507936508e-05, + "loss": 0.5031, + "step": 1758 + }, + { + "epoch": 10.051428571428572, + "grad_norm": 28.75678253173828, + "learning_rate": 4.438730158730159e-05, + "loss": 0.7015, + "step": 1759 + }, + { + "epoch": 10.057142857142857, + "grad_norm": 37.00398254394531, + "learning_rate": 4.4380952380952386e-05, + "loss": 0.533, + "step": 1760 + }, + { + "epoch": 10.062857142857142, + "grad_norm": 31.764421463012695, + "learning_rate": 4.437460317460317e-05, + "loss": 0.5539, + "step": 1761 + }, + { + "epoch": 10.06857142857143, + "grad_norm": 34.632568359375, + "learning_rate": 4.436825396825397e-05, + "loss": 0.5347, + "step": 1762 + }, + { + "epoch": 10.074285714285715, + "grad_norm": 355.4385070800781, + "learning_rate": 4.4361904761904764e-05, + "loss": 0.6434, + "step": 1763 + }, + { + "epoch": 10.08, + "grad_norm": 42.91116714477539, + "learning_rate": 4.435555555555556e-05, + "loss": 0.4943, + "step": 1764 + }, + { + "epoch": 10.085714285714285, + "grad_norm": 42.363983154296875, + "learning_rate": 4.434920634920635e-05, + "loss": 0.3786, + "step": 1765 + }, + { + "epoch": 10.09142857142857, + "grad_norm": 46.98724365234375, + "learning_rate": 4.434285714285715e-05, + "loss": 0.5811, + "step": 1766 + }, + { + "epoch": 10.097142857142858, + "grad_norm": 50.39039611816406, + "learning_rate": 4.4336507936507935e-05, + "loss": 0.3648, + "step": 1767 + }, + { + "epoch": 10.102857142857143, + "grad_norm": 68.35560607910156, + "learning_rate": 4.4330158730158734e-05, + "loss": 0.4918, + "step": 1768 + }, + { + "epoch": 10.108571428571429, + "grad_norm": 58.892364501953125, + "learning_rate": 4.432380952380953e-05, + "loss": 0.5621, + "step": 1769 + }, + { + "epoch": 10.114285714285714, + "grad_norm": 30.972572326660156, + "learning_rate": 4.431746031746032e-05, + "loss": 0.6199, + "step": 1770 + }, + { + "epoch": 10.12, + "grad_norm": 22.662992477416992, + "learning_rate": 4.431111111111111e-05, + "loss": 0.4741, + "step": 1771 + }, + { + "epoch": 10.125714285714286, + "grad_norm": 33.405879974365234, + "learning_rate": 4.4304761904761905e-05, + "loss": 0.4343, + "step": 1772 + }, + { + "epoch": 10.131428571428572, + "grad_norm": 48.99565505981445, + "learning_rate": 4.42984126984127e-05, + "loss": 0.5988, + "step": 1773 + }, + { + "epoch": 10.137142857142857, + "grad_norm": 441.27545166015625, + "learning_rate": 4.42920634920635e-05, + "loss": 0.5665, + "step": 1774 + }, + { + "epoch": 10.142857142857142, + "grad_norm": 29.71120262145996, + "learning_rate": 4.428571428571428e-05, + "loss": 0.7444, + "step": 1775 + }, + { + "epoch": 10.14857142857143, + "grad_norm": 26.079683303833008, + "learning_rate": 4.427936507936508e-05, + "loss": 0.51, + "step": 1776 + }, + { + "epoch": 10.154285714285715, + "grad_norm": 60.71558380126953, + "learning_rate": 4.4273015873015875e-05, + "loss": 0.4623, + "step": 1777 + }, + { + "epoch": 10.16, + "grad_norm": 66.20484924316406, + "learning_rate": 4.426666666666667e-05, + "loss": 0.7319, + "step": 1778 + }, + { + "epoch": 10.165714285714285, + "grad_norm": 50.07710647583008, + "learning_rate": 4.426031746031746e-05, + "loss": 0.7065, + "step": 1779 + }, + { + "epoch": 10.17142857142857, + "grad_norm": 64.73977661132812, + "learning_rate": 4.425396825396826e-05, + "loss": 0.6807, + "step": 1780 + }, + { + "epoch": 10.177142857142858, + "grad_norm": 89.32843017578125, + "learning_rate": 4.424761904761905e-05, + "loss": 0.4594, + "step": 1781 + }, + { + "epoch": 10.182857142857143, + "grad_norm": 52.258609771728516, + "learning_rate": 4.4241269841269845e-05, + "loss": 0.6438, + "step": 1782 + }, + { + "epoch": 10.188571428571429, + "grad_norm": 18.544471740722656, + "learning_rate": 4.423492063492064e-05, + "loss": 0.6665, + "step": 1783 + }, + { + "epoch": 10.194285714285714, + "grad_norm": 81.1020736694336, + "learning_rate": 4.422857142857143e-05, + "loss": 0.5614, + "step": 1784 + }, + { + "epoch": 10.2, + "grad_norm": 33.819732666015625, + "learning_rate": 4.422222222222222e-05, + "loss": 0.6872, + "step": 1785 + }, + { + "epoch": 10.205714285714286, + "grad_norm": 36.031288146972656, + "learning_rate": 4.4215873015873015e-05, + "loss": 0.6527, + "step": 1786 + }, + { + "epoch": 10.211428571428572, + "grad_norm": 37.90239334106445, + "learning_rate": 4.4209523809523815e-05, + "loss": 0.5108, + "step": 1787 + }, + { + "epoch": 10.217142857142857, + "grad_norm": 37.0508918762207, + "learning_rate": 4.420317460317461e-05, + "loss": 0.553, + "step": 1788 + }, + { + "epoch": 10.222857142857142, + "grad_norm": 78.48355102539062, + "learning_rate": 4.41968253968254e-05, + "loss": 0.83, + "step": 1789 + }, + { + "epoch": 10.228571428571428, + "grad_norm": 53.61076736450195, + "learning_rate": 4.419047619047619e-05, + "loss": 0.5672, + "step": 1790 + }, + { + "epoch": 10.234285714285715, + "grad_norm": 54.93144989013672, + "learning_rate": 4.418412698412699e-05, + "loss": 0.6038, + "step": 1791 + }, + { + "epoch": 10.24, + "grad_norm": 44.15296173095703, + "learning_rate": 4.417777777777778e-05, + "loss": 0.5362, + "step": 1792 + }, + { + "epoch": 10.245714285714286, + "grad_norm": 32.382083892822266, + "learning_rate": 4.417142857142858e-05, + "loss": 0.6964, + "step": 1793 + }, + { + "epoch": 10.251428571428571, + "grad_norm": 63.223724365234375, + "learning_rate": 4.416507936507936e-05, + "loss": 0.5144, + "step": 1794 + }, + { + "epoch": 10.257142857142856, + "grad_norm": 32.31887435913086, + "learning_rate": 4.415873015873016e-05, + "loss": 0.5586, + "step": 1795 + }, + { + "epoch": 10.262857142857143, + "grad_norm": 67.84217834472656, + "learning_rate": 4.4152380952380955e-05, + "loss": 0.3581, + "step": 1796 + }, + { + "epoch": 10.268571428571429, + "grad_norm": 74.47038269042969, + "learning_rate": 4.414603174603175e-05, + "loss": 0.3358, + "step": 1797 + }, + { + "epoch": 10.274285714285714, + "grad_norm": 31.795873641967773, + "learning_rate": 4.413968253968254e-05, + "loss": 0.611, + "step": 1798 + }, + { + "epoch": 10.28, + "grad_norm": 34.879581451416016, + "learning_rate": 4.413333333333334e-05, + "loss": 0.5509, + "step": 1799 + }, + { + "epoch": 10.285714285714286, + "grad_norm": 35.9124641418457, + "learning_rate": 4.4126984126984126e-05, + "loss": 0.4702, + "step": 1800 + }, + { + "epoch": 10.291428571428572, + "grad_norm": 17.336244583129883, + "learning_rate": 4.4120634920634925e-05, + "loss": 0.3667, + "step": 1801 + }, + { + "epoch": 10.297142857142857, + "grad_norm": 68.83108520507812, + "learning_rate": 4.411428571428572e-05, + "loss": 0.4476, + "step": 1802 + }, + { + "epoch": 10.302857142857142, + "grad_norm": 25.292253494262695, + "learning_rate": 4.410793650793651e-05, + "loss": 0.433, + "step": 1803 + }, + { + "epoch": 10.308571428571428, + "grad_norm": 72.31684875488281, + "learning_rate": 4.41015873015873e-05, + "loss": 0.5445, + "step": 1804 + }, + { + "epoch": 10.314285714285715, + "grad_norm": 39.674320220947266, + "learning_rate": 4.4095238095238096e-05, + "loss": 0.4679, + "step": 1805 + }, + { + "epoch": 10.32, + "grad_norm": 28.290306091308594, + "learning_rate": 4.408888888888889e-05, + "loss": 0.5483, + "step": 1806 + }, + { + "epoch": 10.325714285714286, + "grad_norm": 131.22181701660156, + "learning_rate": 4.408253968253969e-05, + "loss": 0.6128, + "step": 1807 + }, + { + "epoch": 10.331428571428571, + "grad_norm": 73.4686508178711, + "learning_rate": 4.4076190476190474e-05, + "loss": 0.8075, + "step": 1808 + }, + { + "epoch": 10.337142857142856, + "grad_norm": 38.875244140625, + "learning_rate": 4.406984126984127e-05, + "loss": 0.2387, + "step": 1809 + }, + { + "epoch": 10.342857142857143, + "grad_norm": 34.1298713684082, + "learning_rate": 4.4063492063492066e-05, + "loss": 0.5822, + "step": 1810 + }, + { + "epoch": 10.348571428571429, + "grad_norm": 43.67047882080078, + "learning_rate": 4.405714285714286e-05, + "loss": 0.7023, + "step": 1811 + }, + { + "epoch": 10.354285714285714, + "grad_norm": 62.95126724243164, + "learning_rate": 4.405079365079365e-05, + "loss": 0.5263, + "step": 1812 + }, + { + "epoch": 10.36, + "grad_norm": 14.687211036682129, + "learning_rate": 4.404444444444445e-05, + "loss": 0.4211, + "step": 1813 + }, + { + "epoch": 10.365714285714287, + "grad_norm": 109.05048370361328, + "learning_rate": 4.4038095238095236e-05, + "loss": 0.9146, + "step": 1814 + }, + { + "epoch": 10.371428571428572, + "grad_norm": 39.3391227722168, + "learning_rate": 4.4031746031746036e-05, + "loss": 0.3332, + "step": 1815 + }, + { + "epoch": 10.377142857142857, + "grad_norm": 24.407655715942383, + "learning_rate": 4.402539682539683e-05, + "loss": 0.4952, + "step": 1816 + }, + { + "epoch": 10.382857142857143, + "grad_norm": 54.404537200927734, + "learning_rate": 4.401904761904762e-05, + "loss": 0.5828, + "step": 1817 + }, + { + "epoch": 10.388571428571428, + "grad_norm": 91.14871215820312, + "learning_rate": 4.4012698412698414e-05, + "loss": 0.3805, + "step": 1818 + }, + { + "epoch": 10.394285714285715, + "grad_norm": 31.88556480407715, + "learning_rate": 4.4006349206349206e-05, + "loss": 0.5423, + "step": 1819 + }, + { + "epoch": 10.4, + "grad_norm": 26.99669075012207, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.638, + "step": 1820 + }, + { + "epoch": 10.405714285714286, + "grad_norm": 86.47724914550781, + "learning_rate": 4.39936507936508e-05, + "loss": 0.3764, + "step": 1821 + }, + { + "epoch": 10.411428571428571, + "grad_norm": 54.31696319580078, + "learning_rate": 4.398730158730159e-05, + "loss": 0.5246, + "step": 1822 + }, + { + "epoch": 10.417142857142856, + "grad_norm": 55.37803268432617, + "learning_rate": 4.3980952380952384e-05, + "loss": 0.5363, + "step": 1823 + }, + { + "epoch": 10.422857142857143, + "grad_norm": 74.20997619628906, + "learning_rate": 4.3974603174603176e-05, + "loss": 0.559, + "step": 1824 + }, + { + "epoch": 10.428571428571429, + "grad_norm": 68.69185638427734, + "learning_rate": 4.396825396825397e-05, + "loss": 0.66, + "step": 1825 + }, + { + "epoch": 10.434285714285714, + "grad_norm": 71.71404266357422, + "learning_rate": 4.396190476190477e-05, + "loss": 0.5888, + "step": 1826 + }, + { + "epoch": 10.44, + "grad_norm": 41.23540496826172, + "learning_rate": 4.3955555555555554e-05, + "loss": 0.7463, + "step": 1827 + }, + { + "epoch": 10.445714285714285, + "grad_norm": 78.00237274169922, + "learning_rate": 4.3949206349206353e-05, + "loss": 0.5385, + "step": 1828 + }, + { + "epoch": 10.451428571428572, + "grad_norm": 27.8646240234375, + "learning_rate": 4.3942857142857146e-05, + "loss": 0.6213, + "step": 1829 + }, + { + "epoch": 10.457142857142857, + "grad_norm": 107.51541137695312, + "learning_rate": 4.393650793650794e-05, + "loss": 0.432, + "step": 1830 + }, + { + "epoch": 10.462857142857143, + "grad_norm": 105.09674835205078, + "learning_rate": 4.393015873015873e-05, + "loss": 0.6297, + "step": 1831 + }, + { + "epoch": 10.468571428571428, + "grad_norm": 50.10200500488281, + "learning_rate": 4.392380952380953e-05, + "loss": 0.3271, + "step": 1832 + }, + { + "epoch": 10.474285714285715, + "grad_norm": 103.67435455322266, + "learning_rate": 4.391746031746032e-05, + "loss": 0.6717, + "step": 1833 + }, + { + "epoch": 10.48, + "grad_norm": 15.707316398620605, + "learning_rate": 4.3911111111111116e-05, + "loss": 0.6886, + "step": 1834 + }, + { + "epoch": 10.485714285714286, + "grad_norm": 29.44092559814453, + "learning_rate": 4.39047619047619e-05, + "loss": 0.6768, + "step": 1835 + }, + { + "epoch": 10.491428571428571, + "grad_norm": 42.66861343383789, + "learning_rate": 4.38984126984127e-05, + "loss": 0.7291, + "step": 1836 + }, + { + "epoch": 10.497142857142856, + "grad_norm": 158.61050415039062, + "learning_rate": 4.3892063492063494e-05, + "loss": 0.4542, + "step": 1837 + }, + { + "epoch": 10.502857142857144, + "grad_norm": 38.50166320800781, + "learning_rate": 4.388571428571429e-05, + "loss": 0.5022, + "step": 1838 + }, + { + "epoch": 10.508571428571429, + "grad_norm": 72.28811645507812, + "learning_rate": 4.387936507936508e-05, + "loss": 0.6058, + "step": 1839 + }, + { + "epoch": 10.514285714285714, + "grad_norm": 22.055517196655273, + "learning_rate": 4.387301587301588e-05, + "loss": 0.7038, + "step": 1840 + }, + { + "epoch": 10.52, + "grad_norm": 325.0693054199219, + "learning_rate": 4.3866666666666665e-05, + "loss": 0.4803, + "step": 1841 + }, + { + "epoch": 10.525714285714285, + "grad_norm": 15.510709762573242, + "learning_rate": 4.3860317460317464e-05, + "loss": 0.399, + "step": 1842 + }, + { + "epoch": 10.531428571428572, + "grad_norm": 31.22942543029785, + "learning_rate": 4.3853968253968257e-05, + "loss": 0.668, + "step": 1843 + }, + { + "epoch": 10.537142857142857, + "grad_norm": 46.17405700683594, + "learning_rate": 4.384761904761905e-05, + "loss": 0.4323, + "step": 1844 + }, + { + "epoch": 10.542857142857143, + "grad_norm": 338.6712646484375, + "learning_rate": 4.384126984126984e-05, + "loss": 0.5699, + "step": 1845 + }, + { + "epoch": 10.548571428571428, + "grad_norm": 30.1645565032959, + "learning_rate": 4.3834920634920634e-05, + "loss": 0.4089, + "step": 1846 + }, + { + "epoch": 10.554285714285715, + "grad_norm": 24.616436004638672, + "learning_rate": 4.382857142857143e-05, + "loss": 0.4738, + "step": 1847 + }, + { + "epoch": 10.56, + "grad_norm": 56.76235580444336, + "learning_rate": 4.3822222222222227e-05, + "loss": 0.5598, + "step": 1848 + }, + { + "epoch": 10.565714285714286, + "grad_norm": 24.1751708984375, + "learning_rate": 4.381587301587301e-05, + "loss": 0.3399, + "step": 1849 + }, + { + "epoch": 10.571428571428571, + "grad_norm": 519.3365478515625, + "learning_rate": 4.380952380952381e-05, + "loss": 0.7979, + "step": 1850 + }, + { + "epoch": 10.577142857142857, + "grad_norm": 34.48884963989258, + "learning_rate": 4.3803174603174604e-05, + "loss": 0.4607, + "step": 1851 + }, + { + "epoch": 10.582857142857144, + "grad_norm": 24.102577209472656, + "learning_rate": 4.37968253968254e-05, + "loss": 0.3344, + "step": 1852 + }, + { + "epoch": 10.588571428571429, + "grad_norm": 25.61137580871582, + "learning_rate": 4.379047619047619e-05, + "loss": 0.3993, + "step": 1853 + }, + { + "epoch": 10.594285714285714, + "grad_norm": 36.330745697021484, + "learning_rate": 4.378412698412699e-05, + "loss": 0.4345, + "step": 1854 + }, + { + "epoch": 10.6, + "grad_norm": 60.14534378051758, + "learning_rate": 4.377777777777778e-05, + "loss": 0.4862, + "step": 1855 + }, + { + "epoch": 10.605714285714285, + "grad_norm": 62.317447662353516, + "learning_rate": 4.3771428571428574e-05, + "loss": 0.6628, + "step": 1856 + }, + { + "epoch": 10.611428571428572, + "grad_norm": 36.39304733276367, + "learning_rate": 4.376507936507937e-05, + "loss": 0.4503, + "step": 1857 + }, + { + "epoch": 10.617142857142857, + "grad_norm": 41.1817741394043, + "learning_rate": 4.375873015873016e-05, + "loss": 0.4089, + "step": 1858 + }, + { + "epoch": 10.622857142857143, + "grad_norm": 22.43792152404785, + "learning_rate": 4.375238095238096e-05, + "loss": 0.5147, + "step": 1859 + }, + { + "epoch": 10.628571428571428, + "grad_norm": 36.97702407836914, + "learning_rate": 4.3746031746031745e-05, + "loss": 0.4454, + "step": 1860 + }, + { + "epoch": 10.634285714285713, + "grad_norm": 54.35332489013672, + "learning_rate": 4.3739682539682544e-05, + "loss": 0.6532, + "step": 1861 + }, + { + "epoch": 10.64, + "grad_norm": 89.96182250976562, + "learning_rate": 4.373333333333334e-05, + "loss": 0.658, + "step": 1862 + }, + { + "epoch": 10.645714285714286, + "grad_norm": 57.741912841796875, + "learning_rate": 4.372698412698413e-05, + "loss": 0.6051, + "step": 1863 + }, + { + "epoch": 10.651428571428571, + "grad_norm": 63.035011291503906, + "learning_rate": 4.372063492063492e-05, + "loss": 0.4985, + "step": 1864 + }, + { + "epoch": 10.657142857142857, + "grad_norm": 64.25335693359375, + "learning_rate": 4.371428571428572e-05, + "loss": 0.8196, + "step": 1865 + }, + { + "epoch": 10.662857142857142, + "grad_norm": 44.35352325439453, + "learning_rate": 4.370793650793651e-05, + "loss": 0.4929, + "step": 1866 + }, + { + "epoch": 10.668571428571429, + "grad_norm": 63.985801696777344, + "learning_rate": 4.370158730158731e-05, + "loss": 0.4315, + "step": 1867 + }, + { + "epoch": 10.674285714285714, + "grad_norm": 69.80237579345703, + "learning_rate": 4.369523809523809e-05, + "loss": 0.4453, + "step": 1868 + }, + { + "epoch": 10.68, + "grad_norm": 38.57668685913086, + "learning_rate": 4.368888888888889e-05, + "loss": 0.3856, + "step": 1869 + }, + { + "epoch": 10.685714285714285, + "grad_norm": 58.52362823486328, + "learning_rate": 4.3682539682539685e-05, + "loss": 0.6876, + "step": 1870 + }, + { + "epoch": 10.691428571428572, + "grad_norm": 36.25707244873047, + "learning_rate": 4.367619047619048e-05, + "loss": 0.4271, + "step": 1871 + }, + { + "epoch": 10.697142857142858, + "grad_norm": 100.15937042236328, + "learning_rate": 4.366984126984127e-05, + "loss": 0.604, + "step": 1872 + }, + { + "epoch": 10.702857142857143, + "grad_norm": 81.68765258789062, + "learning_rate": 4.366349206349207e-05, + "loss": 0.4773, + "step": 1873 + }, + { + "epoch": 10.708571428571428, + "grad_norm": 74.6390609741211, + "learning_rate": 4.3657142857142855e-05, + "loss": 0.4262, + "step": 1874 + }, + { + "epoch": 10.714285714285714, + "grad_norm": 69.17678833007812, + "learning_rate": 4.3650793650793655e-05, + "loss": 0.6983, + "step": 1875 + }, + { + "epoch": 10.72, + "grad_norm": 222.8771514892578, + "learning_rate": 4.364444444444445e-05, + "loss": 0.7061, + "step": 1876 + }, + { + "epoch": 10.725714285714286, + "grad_norm": 144.02078247070312, + "learning_rate": 4.363809523809524e-05, + "loss": 0.5319, + "step": 1877 + }, + { + "epoch": 10.731428571428571, + "grad_norm": 45.64491653442383, + "learning_rate": 4.363174603174603e-05, + "loss": 0.4739, + "step": 1878 + }, + { + "epoch": 10.737142857142857, + "grad_norm": 87.71896362304688, + "learning_rate": 4.3625396825396825e-05, + "loss": 0.5727, + "step": 1879 + }, + { + "epoch": 10.742857142857144, + "grad_norm": 66.23672485351562, + "learning_rate": 4.361904761904762e-05, + "loss": 0.6821, + "step": 1880 + }, + { + "epoch": 10.748571428571429, + "grad_norm": 28.323043823242188, + "learning_rate": 4.361269841269842e-05, + "loss": 0.5018, + "step": 1881 + }, + { + "epoch": 10.754285714285714, + "grad_norm": 29.00827980041504, + "learning_rate": 4.36063492063492e-05, + "loss": 0.5059, + "step": 1882 + }, + { + "epoch": 10.76, + "grad_norm": 63.22015380859375, + "learning_rate": 4.36e-05, + "loss": 0.4007, + "step": 1883 + }, + { + "epoch": 10.765714285714285, + "grad_norm": 96.02413177490234, + "learning_rate": 4.3593650793650795e-05, + "loss": 0.7076, + "step": 1884 + }, + { + "epoch": 10.771428571428572, + "grad_norm": 37.6485595703125, + "learning_rate": 4.358730158730159e-05, + "loss": 0.5585, + "step": 1885 + }, + { + "epoch": 10.777142857142858, + "grad_norm": 141.62635803222656, + "learning_rate": 4.358095238095238e-05, + "loss": 0.483, + "step": 1886 + }, + { + "epoch": 10.782857142857143, + "grad_norm": 65.65088653564453, + "learning_rate": 4.357460317460318e-05, + "loss": 0.7007, + "step": 1887 + }, + { + "epoch": 10.788571428571428, + "grad_norm": 486.7319641113281, + "learning_rate": 4.3568253968253966e-05, + "loss": 0.5519, + "step": 1888 + }, + { + "epoch": 10.794285714285714, + "grad_norm": 69.58808898925781, + "learning_rate": 4.3561904761904765e-05, + "loss": 0.4125, + "step": 1889 + }, + { + "epoch": 10.8, + "grad_norm": 47.438167572021484, + "learning_rate": 4.355555555555556e-05, + "loss": 0.5029, + "step": 1890 + }, + { + "epoch": 10.805714285714286, + "grad_norm": 34.23190689086914, + "learning_rate": 4.354920634920635e-05, + "loss": 0.5539, + "step": 1891 + }, + { + "epoch": 10.811428571428571, + "grad_norm": 55.843780517578125, + "learning_rate": 4.354285714285714e-05, + "loss": 0.6891, + "step": 1892 + }, + { + "epoch": 10.817142857142857, + "grad_norm": 28.279245376586914, + "learning_rate": 4.3536507936507936e-05, + "loss": 0.6202, + "step": 1893 + }, + { + "epoch": 10.822857142857142, + "grad_norm": 41.35030746459961, + "learning_rate": 4.3530158730158735e-05, + "loss": 0.6041, + "step": 1894 + }, + { + "epoch": 10.82857142857143, + "grad_norm": 131.7793426513672, + "learning_rate": 4.352380952380953e-05, + "loss": 0.7109, + "step": 1895 + }, + { + "epoch": 10.834285714285715, + "grad_norm": 71.40914916992188, + "learning_rate": 4.351746031746032e-05, + "loss": 0.5715, + "step": 1896 + }, + { + "epoch": 10.84, + "grad_norm": 36.117801666259766, + "learning_rate": 4.351111111111111e-05, + "loss": 0.6285, + "step": 1897 + }, + { + "epoch": 10.845714285714285, + "grad_norm": 56.4141960144043, + "learning_rate": 4.350476190476191e-05, + "loss": 0.7102, + "step": 1898 + }, + { + "epoch": 10.85142857142857, + "grad_norm": 50.0871696472168, + "learning_rate": 4.34984126984127e-05, + "loss": 0.4798, + "step": 1899 + }, + { + "epoch": 10.857142857142858, + "grad_norm": 61.65697479248047, + "learning_rate": 4.34920634920635e-05, + "loss": 0.3668, + "step": 1900 + }, + { + "epoch": 10.862857142857143, + "grad_norm": 69.15985107421875, + "learning_rate": 4.3485714285714284e-05, + "loss": 0.5221, + "step": 1901 + }, + { + "epoch": 10.868571428571428, + "grad_norm": 36.924461364746094, + "learning_rate": 4.347936507936508e-05, + "loss": 0.6449, + "step": 1902 + }, + { + "epoch": 10.874285714285714, + "grad_norm": 61.253902435302734, + "learning_rate": 4.3473015873015876e-05, + "loss": 0.5385, + "step": 1903 + }, + { + "epoch": 10.88, + "grad_norm": 65.72564697265625, + "learning_rate": 4.346666666666667e-05, + "loss": 0.6443, + "step": 1904 + }, + { + "epoch": 10.885714285714286, + "grad_norm": 57.5501823425293, + "learning_rate": 4.346031746031746e-05, + "loss": 0.6056, + "step": 1905 + }, + { + "epoch": 10.891428571428571, + "grad_norm": 89.76790618896484, + "learning_rate": 4.345396825396826e-05, + "loss": 0.5265, + "step": 1906 + }, + { + "epoch": 10.897142857142857, + "grad_norm": 34.84463119506836, + "learning_rate": 4.3447619047619046e-05, + "loss": 0.5052, + "step": 1907 + }, + { + "epoch": 10.902857142857142, + "grad_norm": 51.19764709472656, + "learning_rate": 4.3441269841269846e-05, + "loss": 0.5892, + "step": 1908 + }, + { + "epoch": 10.90857142857143, + "grad_norm": 52.49979782104492, + "learning_rate": 4.343492063492064e-05, + "loss": 0.727, + "step": 1909 + }, + { + "epoch": 10.914285714285715, + "grad_norm": 17.535512924194336, + "learning_rate": 4.342857142857143e-05, + "loss": 0.4278, + "step": 1910 + }, + { + "epoch": 10.92, + "grad_norm": 25.79306983947754, + "learning_rate": 4.3422222222222224e-05, + "loss": 0.5654, + "step": 1911 + }, + { + "epoch": 10.925714285714285, + "grad_norm": 117.12657928466797, + "learning_rate": 4.3415873015873016e-05, + "loss": 0.381, + "step": 1912 + }, + { + "epoch": 10.93142857142857, + "grad_norm": 38.862060546875, + "learning_rate": 4.340952380952381e-05, + "loss": 0.5139, + "step": 1913 + }, + { + "epoch": 10.937142857142858, + "grad_norm": 68.39180755615234, + "learning_rate": 4.340317460317461e-05, + "loss": 0.4431, + "step": 1914 + }, + { + "epoch": 10.942857142857143, + "grad_norm": 53.311988830566406, + "learning_rate": 4.3396825396825394e-05, + "loss": 0.4408, + "step": 1915 + }, + { + "epoch": 10.948571428571428, + "grad_norm": 76.713623046875, + "learning_rate": 4.3390476190476194e-05, + "loss": 0.3767, + "step": 1916 + }, + { + "epoch": 10.954285714285714, + "grad_norm": 26.620380401611328, + "learning_rate": 4.3384126984126986e-05, + "loss": 0.4085, + "step": 1917 + }, + { + "epoch": 10.96, + "grad_norm": 38.23472595214844, + "learning_rate": 4.337777777777778e-05, + "loss": 0.5599, + "step": 1918 + }, + { + "epoch": 10.965714285714286, + "grad_norm": 96.24976348876953, + "learning_rate": 4.337142857142857e-05, + "loss": 0.6077, + "step": 1919 + }, + { + "epoch": 10.971428571428572, + "grad_norm": 98.00028228759766, + "learning_rate": 4.336507936507937e-05, + "loss": 0.3625, + "step": 1920 + }, + { + "epoch": 10.977142857142857, + "grad_norm": 56.013404846191406, + "learning_rate": 4.335873015873016e-05, + "loss": 0.6928, + "step": 1921 + }, + { + "epoch": 10.982857142857142, + "grad_norm": 43.91138458251953, + "learning_rate": 4.3352380952380956e-05, + "loss": 0.5207, + "step": 1922 + }, + { + "epoch": 10.98857142857143, + "grad_norm": 63.15080261230469, + "learning_rate": 4.334603174603175e-05, + "loss": 0.4553, + "step": 1923 + }, + { + "epoch": 10.994285714285715, + "grad_norm": 34.55913543701172, + "learning_rate": 4.333968253968254e-05, + "loss": 0.5642, + "step": 1924 + }, + { + "epoch": 11.0, + "grad_norm": 55.247528076171875, + "learning_rate": 4.3333333333333334e-05, + "loss": 0.5804, + "step": 1925 + }, + { + "epoch": 11.0, + "eval_classes": 0, + "eval_loss": 0.6237368583679199, + "eval_map": 0.9203, + "eval_map_50": 0.9709, + "eval_map_75": 0.9592, + "eval_map_large": 0.9205, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9203, + "eval_map_small": -1.0, + "eval_mar_1": 0.7902, + "eval_mar_10": 0.9641, + "eval_mar_100": 0.9743, + "eval_mar_100_per_class": 0.9743, + "eval_mar_large": 0.9743, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 18.5564, + "eval_samples_per_second": 15.844, + "eval_steps_per_second": 1.994, + "step": 1925 + }, + { + "epoch": 11.005714285714285, + "grad_norm": 37.32865905761719, + "learning_rate": 4.332698412698413e-05, + "loss": 0.4686, + "step": 1926 + }, + { + "epoch": 11.01142857142857, + "grad_norm": 33.08869552612305, + "learning_rate": 4.332063492063492e-05, + "loss": 0.4919, + "step": 1927 + }, + { + "epoch": 11.017142857142858, + "grad_norm": 56.82509994506836, + "learning_rate": 4.331428571428572e-05, + "loss": 0.5189, + "step": 1928 + }, + { + "epoch": 11.022857142857143, + "grad_norm": 74.70069885253906, + "learning_rate": 4.330793650793651e-05, + "loss": 0.488, + "step": 1929 + }, + { + "epoch": 11.028571428571428, + "grad_norm": 78.25180053710938, + "learning_rate": 4.3301587301587304e-05, + "loss": 0.6853, + "step": 1930 + }, + { + "epoch": 11.034285714285714, + "grad_norm": 155.81265258789062, + "learning_rate": 4.32952380952381e-05, + "loss": 0.4037, + "step": 1931 + }, + { + "epoch": 11.04, + "grad_norm": 30.16897201538086, + "learning_rate": 4.328888888888889e-05, + "loss": 0.4806, + "step": 1932 + }, + { + "epoch": 11.045714285714286, + "grad_norm": 82.40743255615234, + "learning_rate": 4.328253968253969e-05, + "loss": 0.4178, + "step": 1933 + }, + { + "epoch": 11.051428571428572, + "grad_norm": 428.0183410644531, + "learning_rate": 4.3276190476190475e-05, + "loss": 0.425, + "step": 1934 + }, + { + "epoch": 11.057142857142857, + "grad_norm": 158.1415252685547, + "learning_rate": 4.3269841269841274e-05, + "loss": 0.7478, + "step": 1935 + }, + { + "epoch": 11.062857142857142, + "grad_norm": 244.0629425048828, + "learning_rate": 4.3263492063492067e-05, + "loss": 0.4738, + "step": 1936 + }, + { + "epoch": 11.06857142857143, + "grad_norm": 325.82147216796875, + "learning_rate": 4.325714285714286e-05, + "loss": 0.7152, + "step": 1937 + }, + { + "epoch": 11.074285714285715, + "grad_norm": 98.61585235595703, + "learning_rate": 4.325079365079365e-05, + "loss": 0.5722, + "step": 1938 + }, + { + "epoch": 11.08, + "grad_norm": 42.40473556518555, + "learning_rate": 4.324444444444445e-05, + "loss": 0.4279, + "step": 1939 + }, + { + "epoch": 11.085714285714285, + "grad_norm": 54.572452545166016, + "learning_rate": 4.323809523809524e-05, + "loss": 0.6706, + "step": 1940 + }, + { + "epoch": 11.09142857142857, + "grad_norm": 27.586755752563477, + "learning_rate": 4.3231746031746037e-05, + "loss": 0.5558, + "step": 1941 + }, + { + "epoch": 11.097142857142858, + "grad_norm": 78.04554748535156, + "learning_rate": 4.322539682539683e-05, + "loss": 0.518, + "step": 1942 + }, + { + "epoch": 11.102857142857143, + "grad_norm": 44.25578689575195, + "learning_rate": 4.321904761904762e-05, + "loss": 0.4998, + "step": 1943 + }, + { + "epoch": 11.108571428571429, + "grad_norm": 21.83209991455078, + "learning_rate": 4.3212698412698414e-05, + "loss": 0.3801, + "step": 1944 + }, + { + "epoch": 11.114285714285714, + "grad_norm": 46.22245407104492, + "learning_rate": 4.320634920634921e-05, + "loss": 0.6038, + "step": 1945 + }, + { + "epoch": 11.12, + "grad_norm": 36.09217071533203, + "learning_rate": 4.32e-05, + "loss": 0.4138, + "step": 1946 + }, + { + "epoch": 11.125714285714286, + "grad_norm": 79.12897491455078, + "learning_rate": 4.31936507936508e-05, + "loss": 0.4681, + "step": 1947 + }, + { + "epoch": 11.131428571428572, + "grad_norm": 33.90317153930664, + "learning_rate": 4.3187301587301585e-05, + "loss": 0.4288, + "step": 1948 + }, + { + "epoch": 11.137142857142857, + "grad_norm": 109.8990478515625, + "learning_rate": 4.3180952380952384e-05, + "loss": 0.5626, + "step": 1949 + }, + { + "epoch": 11.142857142857142, + "grad_norm": 141.09649658203125, + "learning_rate": 4.317460317460318e-05, + "loss": 0.415, + "step": 1950 + }, + { + "epoch": 11.14857142857143, + "grad_norm": 118.22587585449219, + "learning_rate": 4.316825396825397e-05, + "loss": 0.4853, + "step": 1951 + }, + { + "epoch": 11.154285714285715, + "grad_norm": 34.691768646240234, + "learning_rate": 4.316190476190476e-05, + "loss": 0.4886, + "step": 1952 + }, + { + "epoch": 11.16, + "grad_norm": 46.5521354675293, + "learning_rate": 4.315555555555556e-05, + "loss": 0.5466, + "step": 1953 + }, + { + "epoch": 11.165714285714285, + "grad_norm": 53.882667541503906, + "learning_rate": 4.314920634920635e-05, + "loss": 0.5406, + "step": 1954 + }, + { + "epoch": 11.17142857142857, + "grad_norm": 20.344602584838867, + "learning_rate": 4.314285714285715e-05, + "loss": 0.7339, + "step": 1955 + }, + { + "epoch": 11.177142857142858, + "grad_norm": 928.9378051757812, + "learning_rate": 4.313650793650793e-05, + "loss": 0.4915, + "step": 1956 + }, + { + "epoch": 11.182857142857143, + "grad_norm": 38.344974517822266, + "learning_rate": 4.313015873015873e-05, + "loss": 0.5138, + "step": 1957 + }, + { + "epoch": 11.188571428571429, + "grad_norm": 28.63566780090332, + "learning_rate": 4.3123809523809525e-05, + "loss": 0.5422, + "step": 1958 + }, + { + "epoch": 11.194285714285714, + "grad_norm": 68.49561309814453, + "learning_rate": 4.311746031746032e-05, + "loss": 0.5566, + "step": 1959 + }, + { + "epoch": 11.2, + "grad_norm": 48.883544921875, + "learning_rate": 4.311111111111111e-05, + "loss": 0.605, + "step": 1960 + }, + { + "epoch": 11.205714285714286, + "grad_norm": 50.32197952270508, + "learning_rate": 4.310476190476191e-05, + "loss": 0.7474, + "step": 1961 + }, + { + "epoch": 11.211428571428572, + "grad_norm": 48.06432342529297, + "learning_rate": 4.30984126984127e-05, + "loss": 0.2864, + "step": 1962 + }, + { + "epoch": 11.217142857142857, + "grad_norm": 64.10684967041016, + "learning_rate": 4.3092063492063495e-05, + "loss": 0.568, + "step": 1963 + }, + { + "epoch": 11.222857142857142, + "grad_norm": 31.029804229736328, + "learning_rate": 4.308571428571429e-05, + "loss": 0.5044, + "step": 1964 + }, + { + "epoch": 11.228571428571428, + "grad_norm": 38.481319427490234, + "learning_rate": 4.307936507936508e-05, + "loss": 0.4226, + "step": 1965 + }, + { + "epoch": 11.234285714285715, + "grad_norm": 204.23287963867188, + "learning_rate": 4.307301587301587e-05, + "loss": 0.4376, + "step": 1966 + }, + { + "epoch": 11.24, + "grad_norm": 83.03377532958984, + "learning_rate": 4.3066666666666665e-05, + "loss": 0.6468, + "step": 1967 + }, + { + "epoch": 11.245714285714286, + "grad_norm": 56.4277229309082, + "learning_rate": 4.3060317460317465e-05, + "loss": 0.524, + "step": 1968 + }, + { + "epoch": 11.251428571428571, + "grad_norm": 95.98672485351562, + "learning_rate": 4.305396825396826e-05, + "loss": 0.8834, + "step": 1969 + }, + { + "epoch": 11.257142857142856, + "grad_norm": 68.86486053466797, + "learning_rate": 4.304761904761905e-05, + "loss": 0.5369, + "step": 1970 + }, + { + "epoch": 11.262857142857143, + "grad_norm": 79.95071411132812, + "learning_rate": 4.304126984126984e-05, + "loss": 0.6789, + "step": 1971 + }, + { + "epoch": 11.268571428571429, + "grad_norm": 89.56037139892578, + "learning_rate": 4.303492063492064e-05, + "loss": 0.7805, + "step": 1972 + }, + { + "epoch": 11.274285714285714, + "grad_norm": 44.71689224243164, + "learning_rate": 4.302857142857143e-05, + "loss": 0.5586, + "step": 1973 + }, + { + "epoch": 11.28, + "grad_norm": 100.0972671508789, + "learning_rate": 4.302222222222223e-05, + "loss": 0.5374, + "step": 1974 + }, + { + "epoch": 11.285714285714286, + "grad_norm": 52.913841247558594, + "learning_rate": 4.301587301587302e-05, + "loss": 0.6111, + "step": 1975 + }, + { + "epoch": 11.291428571428572, + "grad_norm": 24.168188095092773, + "learning_rate": 4.300952380952381e-05, + "loss": 0.3927, + "step": 1976 + }, + { + "epoch": 11.297142857142857, + "grad_norm": 41.68073272705078, + "learning_rate": 4.3003174603174605e-05, + "loss": 0.6429, + "step": 1977 + }, + { + "epoch": 11.302857142857142, + "grad_norm": 99.18087005615234, + "learning_rate": 4.29968253968254e-05, + "loss": 0.5777, + "step": 1978 + }, + { + "epoch": 11.308571428571428, + "grad_norm": 72.45071411132812, + "learning_rate": 4.299047619047619e-05, + "loss": 0.5482, + "step": 1979 + }, + { + "epoch": 11.314285714285715, + "grad_norm": 43.584842681884766, + "learning_rate": 4.298412698412699e-05, + "loss": 0.5588, + "step": 1980 + }, + { + "epoch": 11.32, + "grad_norm": 72.10374450683594, + "learning_rate": 4.2977777777777776e-05, + "loss": 0.4757, + "step": 1981 + }, + { + "epoch": 11.325714285714286, + "grad_norm": 54.02776336669922, + "learning_rate": 4.2971428571428575e-05, + "loss": 0.4067, + "step": 1982 + }, + { + "epoch": 11.331428571428571, + "grad_norm": 165.2854766845703, + "learning_rate": 4.296507936507937e-05, + "loss": 0.6324, + "step": 1983 + }, + { + "epoch": 11.337142857142856, + "grad_norm": 40.24265670776367, + "learning_rate": 4.295873015873016e-05, + "loss": 0.5337, + "step": 1984 + }, + { + "epoch": 11.342857142857143, + "grad_norm": 44.01199722290039, + "learning_rate": 4.295238095238095e-05, + "loss": 0.6425, + "step": 1985 + }, + { + "epoch": 11.348571428571429, + "grad_norm": 30.16360855102539, + "learning_rate": 4.294603174603175e-05, + "loss": 0.569, + "step": 1986 + }, + { + "epoch": 11.354285714285714, + "grad_norm": 54.55186462402344, + "learning_rate": 4.293968253968254e-05, + "loss": 0.5003, + "step": 1987 + }, + { + "epoch": 11.36, + "grad_norm": 84.72821807861328, + "learning_rate": 4.293333333333334e-05, + "loss": 0.6783, + "step": 1988 + }, + { + "epoch": 11.365714285714287, + "grad_norm": 33.0731201171875, + "learning_rate": 4.2926984126984124e-05, + "loss": 0.5685, + "step": 1989 + }, + { + "epoch": 11.371428571428572, + "grad_norm": 67.03597259521484, + "learning_rate": 4.292063492063492e-05, + "loss": 0.4879, + "step": 1990 + }, + { + "epoch": 11.377142857142857, + "grad_norm": 26.01388168334961, + "learning_rate": 4.2914285714285716e-05, + "loss": 0.4475, + "step": 1991 + }, + { + "epoch": 11.382857142857143, + "grad_norm": 41.41946792602539, + "learning_rate": 4.290793650793651e-05, + "loss": 0.5839, + "step": 1992 + }, + { + "epoch": 11.388571428571428, + "grad_norm": 22.868900299072266, + "learning_rate": 4.29015873015873e-05, + "loss": 0.3814, + "step": 1993 + }, + { + "epoch": 11.394285714285715, + "grad_norm": 38.04800033569336, + "learning_rate": 4.28952380952381e-05, + "loss": 0.5012, + "step": 1994 + }, + { + "epoch": 11.4, + "grad_norm": 29.78988265991211, + "learning_rate": 4.2888888888888886e-05, + "loss": 0.4224, + "step": 1995 + }, + { + "epoch": 11.405714285714286, + "grad_norm": 34.21943664550781, + "learning_rate": 4.2882539682539686e-05, + "loss": 0.4692, + "step": 1996 + }, + { + "epoch": 11.411428571428571, + "grad_norm": 36.694095611572266, + "learning_rate": 4.287619047619048e-05, + "loss": 0.6359, + "step": 1997 + }, + { + "epoch": 11.417142857142856, + "grad_norm": 50.67527770996094, + "learning_rate": 4.286984126984127e-05, + "loss": 0.872, + "step": 1998 + }, + { + "epoch": 11.422857142857143, + "grad_norm": 46.49637222290039, + "learning_rate": 4.2863492063492064e-05, + "loss": 0.529, + "step": 1999 + }, + { + "epoch": 11.428571428571429, + "grad_norm": 60.049251556396484, + "learning_rate": 4.2857142857142856e-05, + "loss": 0.6069, + "step": 2000 + }, + { + "epoch": 11.434285714285714, + "grad_norm": 50.15530014038086, + "learning_rate": 4.2850793650793656e-05, + "loss": 0.5928, + "step": 2001 + }, + { + "epoch": 11.44, + "grad_norm": 56.090023040771484, + "learning_rate": 4.284444444444445e-05, + "loss": 0.4419, + "step": 2002 + }, + { + "epoch": 11.445714285714285, + "grad_norm": 27.48720932006836, + "learning_rate": 4.283809523809524e-05, + "loss": 0.4542, + "step": 2003 + }, + { + "epoch": 11.451428571428572, + "grad_norm": 73.39603424072266, + "learning_rate": 4.2831746031746034e-05, + "loss": 0.7161, + "step": 2004 + }, + { + "epoch": 11.457142857142857, + "grad_norm": 33.92833709716797, + "learning_rate": 4.2825396825396826e-05, + "loss": 0.4452, + "step": 2005 + }, + { + "epoch": 11.462857142857143, + "grad_norm": 51.888221740722656, + "learning_rate": 4.281904761904762e-05, + "loss": 0.7835, + "step": 2006 + }, + { + "epoch": 11.468571428571428, + "grad_norm": 31.269601821899414, + "learning_rate": 4.281269841269842e-05, + "loss": 0.8202, + "step": 2007 + }, + { + "epoch": 11.474285714285715, + "grad_norm": 29.58742332458496, + "learning_rate": 4.280634920634921e-05, + "loss": 0.8376, + "step": 2008 + }, + { + "epoch": 11.48, + "grad_norm": 28.278118133544922, + "learning_rate": 4.2800000000000004e-05, + "loss": 0.6554, + "step": 2009 + }, + { + "epoch": 11.485714285714286, + "grad_norm": 64.48429107666016, + "learning_rate": 4.2793650793650796e-05, + "loss": 0.5049, + "step": 2010 + }, + { + "epoch": 11.491428571428571, + "grad_norm": 45.5100212097168, + "learning_rate": 4.278730158730159e-05, + "loss": 0.3134, + "step": 2011 + }, + { + "epoch": 11.497142857142856, + "grad_norm": 57.239131927490234, + "learning_rate": 4.278095238095238e-05, + "loss": 0.5187, + "step": 2012 + }, + { + "epoch": 11.502857142857144, + "grad_norm": 42.058956146240234, + "learning_rate": 4.277460317460318e-05, + "loss": 0.5187, + "step": 2013 + }, + { + "epoch": 11.508571428571429, + "grad_norm": 126.79985809326172, + "learning_rate": 4.276825396825397e-05, + "loss": 0.6129, + "step": 2014 + }, + { + "epoch": 11.514285714285714, + "grad_norm": 126.11183166503906, + "learning_rate": 4.2761904761904766e-05, + "loss": 0.6724, + "step": 2015 + }, + { + "epoch": 11.52, + "grad_norm": 48.38181686401367, + "learning_rate": 4.275555555555556e-05, + "loss": 0.5613, + "step": 2016 + }, + { + "epoch": 11.525714285714285, + "grad_norm": 72.9981689453125, + "learning_rate": 4.274920634920635e-05, + "loss": 0.6188, + "step": 2017 + }, + { + "epoch": 11.531428571428572, + "grad_norm": 190.55735778808594, + "learning_rate": 4.2742857142857144e-05, + "loss": 0.6706, + "step": 2018 + }, + { + "epoch": 11.537142857142857, + "grad_norm": 220.0579071044922, + "learning_rate": 4.2736507936507943e-05, + "loss": 0.3876, + "step": 2019 + }, + { + "epoch": 11.542857142857143, + "grad_norm": 67.05650329589844, + "learning_rate": 4.273015873015873e-05, + "loss": 0.5308, + "step": 2020 + }, + { + "epoch": 11.548571428571428, + "grad_norm": 45.68679428100586, + "learning_rate": 4.272380952380953e-05, + "loss": 0.548, + "step": 2021 + }, + { + "epoch": 11.554285714285715, + "grad_norm": 19.767457962036133, + "learning_rate": 4.2717460317460315e-05, + "loss": 0.4076, + "step": 2022 + }, + { + "epoch": 11.56, + "grad_norm": 54.95707321166992, + "learning_rate": 4.2711111111111114e-05, + "loss": 0.4922, + "step": 2023 + }, + { + "epoch": 11.565714285714286, + "grad_norm": 31.493915557861328, + "learning_rate": 4.270476190476191e-05, + "loss": 0.555, + "step": 2024 + }, + { + "epoch": 11.571428571428571, + "grad_norm": 44.43305206298828, + "learning_rate": 4.26984126984127e-05, + "loss": 0.6991, + "step": 2025 + }, + { + "epoch": 11.577142857142857, + "grad_norm": 19.636838912963867, + "learning_rate": 4.269206349206349e-05, + "loss": 0.487, + "step": 2026 + }, + { + "epoch": 11.582857142857144, + "grad_norm": 21.483842849731445, + "learning_rate": 4.268571428571429e-05, + "loss": 0.4997, + "step": 2027 + }, + { + "epoch": 11.588571428571429, + "grad_norm": 34.854068756103516, + "learning_rate": 4.267936507936508e-05, + "loss": 0.3773, + "step": 2028 + }, + { + "epoch": 11.594285714285714, + "grad_norm": 74.83104705810547, + "learning_rate": 4.267301587301588e-05, + "loss": 0.3825, + "step": 2029 + }, + { + "epoch": 11.6, + "grad_norm": 66.73103332519531, + "learning_rate": 4.266666666666667e-05, + "loss": 0.4521, + "step": 2030 + }, + { + "epoch": 11.605714285714285, + "grad_norm": 70.25010681152344, + "learning_rate": 4.266031746031746e-05, + "loss": 0.3803, + "step": 2031 + }, + { + "epoch": 11.611428571428572, + "grad_norm": 11.714632034301758, + "learning_rate": 4.2653968253968255e-05, + "loss": 0.4515, + "step": 2032 + }, + { + "epoch": 11.617142857142857, + "grad_norm": 50.63631820678711, + "learning_rate": 4.264761904761905e-05, + "loss": 0.4352, + "step": 2033 + }, + { + "epoch": 11.622857142857143, + "grad_norm": 52.2682991027832, + "learning_rate": 4.264126984126984e-05, + "loss": 0.7108, + "step": 2034 + }, + { + "epoch": 11.628571428571428, + "grad_norm": 39.13172149658203, + "learning_rate": 4.263492063492064e-05, + "loss": 0.7079, + "step": 2035 + }, + { + "epoch": 11.634285714285713, + "grad_norm": 39.47657775878906, + "learning_rate": 4.262857142857143e-05, + "loss": 0.7292, + "step": 2036 + }, + { + "epoch": 11.64, + "grad_norm": 17.30119514465332, + "learning_rate": 4.2622222222222224e-05, + "loss": 0.5236, + "step": 2037 + }, + { + "epoch": 11.645714285714286, + "grad_norm": 48.4094123840332, + "learning_rate": 4.261587301587302e-05, + "loss": 0.7339, + "step": 2038 + }, + { + "epoch": 11.651428571428571, + "grad_norm": 39.617584228515625, + "learning_rate": 4.260952380952381e-05, + "loss": 0.5657, + "step": 2039 + }, + { + "epoch": 11.657142857142857, + "grad_norm": 20.779735565185547, + "learning_rate": 4.260317460317461e-05, + "loss": 0.4882, + "step": 2040 + }, + { + "epoch": 11.662857142857142, + "grad_norm": 41.407501220703125, + "learning_rate": 4.25968253968254e-05, + "loss": 0.3647, + "step": 2041 + }, + { + "epoch": 11.668571428571429, + "grad_norm": 64.75984954833984, + "learning_rate": 4.2590476190476194e-05, + "loss": 0.4312, + "step": 2042 + }, + { + "epoch": 11.674285714285714, + "grad_norm": 47.528480529785156, + "learning_rate": 4.258412698412699e-05, + "loss": 0.3838, + "step": 2043 + }, + { + "epoch": 11.68, + "grad_norm": 32.28666305541992, + "learning_rate": 4.257777777777778e-05, + "loss": 0.4923, + "step": 2044 + }, + { + "epoch": 11.685714285714285, + "grad_norm": 89.13447570800781, + "learning_rate": 4.257142857142857e-05, + "loss": 0.4379, + "step": 2045 + }, + { + "epoch": 11.691428571428572, + "grad_norm": 28.47506332397461, + "learning_rate": 4.256507936507937e-05, + "loss": 0.4225, + "step": 2046 + }, + { + "epoch": 11.697142857142858, + "grad_norm": 47.36610794067383, + "learning_rate": 4.255873015873016e-05, + "loss": 0.5541, + "step": 2047 + }, + { + "epoch": 11.702857142857143, + "grad_norm": 56.977779388427734, + "learning_rate": 4.255238095238096e-05, + "loss": 0.6693, + "step": 2048 + }, + { + "epoch": 11.708571428571428, + "grad_norm": 27.516395568847656, + "learning_rate": 4.254603174603175e-05, + "loss": 0.6076, + "step": 2049 + }, + { + "epoch": 11.714285714285714, + "grad_norm": 329.2275085449219, + "learning_rate": 4.253968253968254e-05, + "loss": 0.4606, + "step": 2050 + }, + { + "epoch": 11.72, + "grad_norm": 63.27556228637695, + "learning_rate": 4.2533333333333335e-05, + "loss": 0.35, + "step": 2051 + }, + { + "epoch": 11.725714285714286, + "grad_norm": 24.467205047607422, + "learning_rate": 4.252698412698413e-05, + "loss": 0.4779, + "step": 2052 + }, + { + "epoch": 11.731428571428571, + "grad_norm": 27.03794288635254, + "learning_rate": 4.252063492063492e-05, + "loss": 0.519, + "step": 2053 + }, + { + "epoch": 11.737142857142857, + "grad_norm": 44.75577163696289, + "learning_rate": 4.251428571428572e-05, + "loss": 0.5261, + "step": 2054 + }, + { + "epoch": 11.742857142857144, + "grad_norm": 46.69427490234375, + "learning_rate": 4.2507936507936505e-05, + "loss": 0.3524, + "step": 2055 + }, + { + "epoch": 11.748571428571429, + "grad_norm": 661.8193969726562, + "learning_rate": 4.2501587301587305e-05, + "loss": 0.5148, + "step": 2056 + }, + { + "epoch": 11.754285714285714, + "grad_norm": 37.70705795288086, + "learning_rate": 4.24952380952381e-05, + "loss": 0.7424, + "step": 2057 + }, + { + "epoch": 11.76, + "grad_norm": 60.31504440307617, + "learning_rate": 4.248888888888889e-05, + "loss": 0.6803, + "step": 2058 + }, + { + "epoch": 11.765714285714285, + "grad_norm": 53.602027893066406, + "learning_rate": 4.248253968253968e-05, + "loss": 0.3222, + "step": 2059 + }, + { + "epoch": 11.771428571428572, + "grad_norm": 16.7213134765625, + "learning_rate": 4.247619047619048e-05, + "loss": 0.4194, + "step": 2060 + }, + { + "epoch": 11.777142857142858, + "grad_norm": 31.727859497070312, + "learning_rate": 4.246984126984127e-05, + "loss": 0.6186, + "step": 2061 + }, + { + "epoch": 11.782857142857143, + "grad_norm": 68.51502990722656, + "learning_rate": 4.246349206349207e-05, + "loss": 0.6848, + "step": 2062 + }, + { + "epoch": 11.788571428571428, + "grad_norm": 111.31018829345703, + "learning_rate": 4.245714285714285e-05, + "loss": 0.6621, + "step": 2063 + }, + { + "epoch": 11.794285714285714, + "grad_norm": 47.58015441894531, + "learning_rate": 4.245079365079365e-05, + "loss": 0.5018, + "step": 2064 + }, + { + "epoch": 11.8, + "grad_norm": 55.297119140625, + "learning_rate": 4.2444444444444445e-05, + "loss": 0.6728, + "step": 2065 + }, + { + "epoch": 11.805714285714286, + "grad_norm": 76.49820709228516, + "learning_rate": 4.243809523809524e-05, + "loss": 0.3881, + "step": 2066 + }, + { + "epoch": 11.811428571428571, + "grad_norm": 24.38382339477539, + "learning_rate": 4.243174603174603e-05, + "loss": 0.4981, + "step": 2067 + }, + { + "epoch": 11.817142857142857, + "grad_norm": 22.08475112915039, + "learning_rate": 4.242539682539683e-05, + "loss": 0.5994, + "step": 2068 + }, + { + "epoch": 11.822857142857142, + "grad_norm": 41.89626693725586, + "learning_rate": 4.241904761904762e-05, + "loss": 0.4922, + "step": 2069 + }, + { + "epoch": 11.82857142857143, + "grad_norm": 40.99779510498047, + "learning_rate": 4.2412698412698415e-05, + "loss": 0.406, + "step": 2070 + }, + { + "epoch": 11.834285714285715, + "grad_norm": 36.55776596069336, + "learning_rate": 4.240634920634921e-05, + "loss": 0.4732, + "step": 2071 + }, + { + "epoch": 11.84, + "grad_norm": 88.96212768554688, + "learning_rate": 4.24e-05, + "loss": 0.4956, + "step": 2072 + }, + { + "epoch": 11.845714285714285, + "grad_norm": 28.359384536743164, + "learning_rate": 4.239365079365079e-05, + "loss": 0.4471, + "step": 2073 + }, + { + "epoch": 11.85142857142857, + "grad_norm": 149.3414764404297, + "learning_rate": 4.2387301587301586e-05, + "loss": 0.6434, + "step": 2074 + }, + { + "epoch": 11.857142857142858, + "grad_norm": 46.888206481933594, + "learning_rate": 4.2380952380952385e-05, + "loss": 0.4271, + "step": 2075 + }, + { + "epoch": 11.862857142857143, + "grad_norm": 49.115962982177734, + "learning_rate": 4.237460317460318e-05, + "loss": 0.5293, + "step": 2076 + }, + { + "epoch": 11.868571428571428, + "grad_norm": 54.932952880859375, + "learning_rate": 4.236825396825397e-05, + "loss": 0.85, + "step": 2077 + }, + { + "epoch": 11.874285714285714, + "grad_norm": 50.6089973449707, + "learning_rate": 4.236190476190476e-05, + "loss": 0.756, + "step": 2078 + }, + { + "epoch": 11.88, + "grad_norm": 31.927751541137695, + "learning_rate": 4.235555555555556e-05, + "loss": 0.5294, + "step": 2079 + }, + { + "epoch": 11.885714285714286, + "grad_norm": 38.52204513549805, + "learning_rate": 4.234920634920635e-05, + "loss": 0.534, + "step": 2080 + }, + { + "epoch": 11.891428571428571, + "grad_norm": 27.282697677612305, + "learning_rate": 4.234285714285715e-05, + "loss": 0.455, + "step": 2081 + }, + { + "epoch": 11.897142857142857, + "grad_norm": 48.41023635864258, + "learning_rate": 4.233650793650794e-05, + "loss": 0.5042, + "step": 2082 + }, + { + "epoch": 11.902857142857142, + "grad_norm": 32.89781188964844, + "learning_rate": 4.233015873015873e-05, + "loss": 0.5412, + "step": 2083 + }, + { + "epoch": 11.90857142857143, + "grad_norm": 238.75128173828125, + "learning_rate": 4.2323809523809526e-05, + "loss": 0.5629, + "step": 2084 + }, + { + "epoch": 11.914285714285715, + "grad_norm": 32.940555572509766, + "learning_rate": 4.231746031746032e-05, + "loss": 0.4948, + "step": 2085 + }, + { + "epoch": 11.92, + "grad_norm": 68.61351013183594, + "learning_rate": 4.231111111111111e-05, + "loss": 0.6385, + "step": 2086 + }, + { + "epoch": 11.925714285714285, + "grad_norm": 164.97157287597656, + "learning_rate": 4.230476190476191e-05, + "loss": 0.7415, + "step": 2087 + }, + { + "epoch": 11.93142857142857, + "grad_norm": 61.077117919921875, + "learning_rate": 4.2298412698412696e-05, + "loss": 0.7373, + "step": 2088 + }, + { + "epoch": 11.937142857142858, + "grad_norm": 35.24998474121094, + "learning_rate": 4.2292063492063496e-05, + "loss": 0.6198, + "step": 2089 + }, + { + "epoch": 11.942857142857143, + "grad_norm": 52.322322845458984, + "learning_rate": 4.228571428571429e-05, + "loss": 0.6625, + "step": 2090 + }, + { + "epoch": 11.948571428571428, + "grad_norm": 26.97150993347168, + "learning_rate": 4.227936507936508e-05, + "loss": 0.4099, + "step": 2091 + }, + { + "epoch": 11.954285714285714, + "grad_norm": 48.867034912109375, + "learning_rate": 4.2273015873015874e-05, + "loss": 0.5592, + "step": 2092 + }, + { + "epoch": 11.96, + "grad_norm": 32.61470031738281, + "learning_rate": 4.226666666666667e-05, + "loss": 0.4504, + "step": 2093 + }, + { + "epoch": 11.965714285714286, + "grad_norm": 83.98099517822266, + "learning_rate": 4.226031746031746e-05, + "loss": 0.5111, + "step": 2094 + }, + { + "epoch": 11.971428571428572, + "grad_norm": 338.13092041015625, + "learning_rate": 4.225396825396826e-05, + "loss": 0.3596, + "step": 2095 + }, + { + "epoch": 11.977142857142857, + "grad_norm": 47.3306884765625, + "learning_rate": 4.2247619047619044e-05, + "loss": 0.4337, + "step": 2096 + }, + { + "epoch": 11.982857142857142, + "grad_norm": 23.791034698486328, + "learning_rate": 4.2241269841269844e-05, + "loss": 0.5078, + "step": 2097 + }, + { + "epoch": 11.98857142857143, + "grad_norm": 34.635372161865234, + "learning_rate": 4.2234920634920636e-05, + "loss": 0.6245, + "step": 2098 + }, + { + "epoch": 11.994285714285715, + "grad_norm": 47.677650451660156, + "learning_rate": 4.222857142857143e-05, + "loss": 0.482, + "step": 2099 + }, + { + "epoch": 12.0, + "grad_norm": 19.956443786621094, + "learning_rate": 4.222222222222222e-05, + "loss": 0.4969, + "step": 2100 + }, + { + "epoch": 12.0, + "eval_classes": 0, + "eval_loss": 0.6676629781723022, + "eval_map": 0.9106, + "eval_map_50": 0.9664, + "eval_map_75": 0.9573, + "eval_map_large": 0.9107, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9106, + "eval_map_small": -1.0, + "eval_mar_1": 0.7819, + "eval_mar_10": 0.9613, + "eval_mar_100": 0.9717, + "eval_mar_100_per_class": 0.9717, + "eval_mar_large": 0.9717, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 17.7214, + "eval_samples_per_second": 16.59, + "eval_steps_per_second": 2.088, + "step": 2100 + }, + { + "epoch": 12.005714285714285, + "grad_norm": 19.098283767700195, + "learning_rate": 4.221587301587302e-05, + "loss": 0.4616, + "step": 2101 + }, + { + "epoch": 12.01142857142857, + "grad_norm": 116.86248779296875, + "learning_rate": 4.220952380952381e-05, + "loss": 0.498, + "step": 2102 + }, + { + "epoch": 12.017142857142858, + "grad_norm": 67.87767791748047, + "learning_rate": 4.2203174603174606e-05, + "loss": 0.4618, + "step": 2103 + }, + { + "epoch": 12.022857142857143, + "grad_norm": 44.77392578125, + "learning_rate": 4.21968253968254e-05, + "loss": 0.5808, + "step": 2104 + }, + { + "epoch": 12.028571428571428, + "grad_norm": 230.97474670410156, + "learning_rate": 4.219047619047619e-05, + "loss": 0.5865, + "step": 2105 + }, + { + "epoch": 12.034285714285714, + "grad_norm": 88.32511138916016, + "learning_rate": 4.2184126984126984e-05, + "loss": 0.5504, + "step": 2106 + }, + { + "epoch": 12.04, + "grad_norm": 30.843040466308594, + "learning_rate": 4.217777777777778e-05, + "loss": 0.5666, + "step": 2107 + }, + { + "epoch": 12.045714285714286, + "grad_norm": 36.712955474853516, + "learning_rate": 4.2171428571428576e-05, + "loss": 0.5809, + "step": 2108 + }, + { + "epoch": 12.051428571428572, + "grad_norm": 380.5534362792969, + "learning_rate": 4.216507936507937e-05, + "loss": 0.5338, + "step": 2109 + }, + { + "epoch": 12.057142857142857, + "grad_norm": 22.827157974243164, + "learning_rate": 4.215873015873016e-05, + "loss": 0.6134, + "step": 2110 + }, + { + "epoch": 12.062857142857142, + "grad_norm": 65.1633071899414, + "learning_rate": 4.2152380952380954e-05, + "loss": 0.4525, + "step": 2111 + }, + { + "epoch": 12.06857142857143, + "grad_norm": 40.37752151489258, + "learning_rate": 4.214603174603175e-05, + "loss": 0.599, + "step": 2112 + }, + { + "epoch": 12.074285714285715, + "grad_norm": 36.74299621582031, + "learning_rate": 4.213968253968254e-05, + "loss": 0.5092, + "step": 2113 + }, + { + "epoch": 12.08, + "grad_norm": 32.371429443359375, + "learning_rate": 4.213333333333334e-05, + "loss": 0.5349, + "step": 2114 + }, + { + "epoch": 12.085714285714285, + "grad_norm": 27.217981338500977, + "learning_rate": 4.212698412698413e-05, + "loss": 0.4384, + "step": 2115 + }, + { + "epoch": 12.09142857142857, + "grad_norm": 27.24367904663086, + "learning_rate": 4.2120634920634924e-05, + "loss": 0.4383, + "step": 2116 + }, + { + "epoch": 12.097142857142858, + "grad_norm": 71.84082794189453, + "learning_rate": 4.211428571428572e-05, + "loss": 0.5462, + "step": 2117 + }, + { + "epoch": 12.102857142857143, + "grad_norm": 27.53964614868164, + "learning_rate": 4.210793650793651e-05, + "loss": 0.4367, + "step": 2118 + }, + { + "epoch": 12.108571428571429, + "grad_norm": 71.42292785644531, + "learning_rate": 4.21015873015873e-05, + "loss": 0.5036, + "step": 2119 + }, + { + "epoch": 12.114285714285714, + "grad_norm": 55.60530471801758, + "learning_rate": 4.20952380952381e-05, + "loss": 0.615, + "step": 2120 + }, + { + "epoch": 12.12, + "grad_norm": 66.84548950195312, + "learning_rate": 4.208888888888889e-05, + "loss": 0.5791, + "step": 2121 + }, + { + "epoch": 12.125714285714286, + "grad_norm": 143.91664123535156, + "learning_rate": 4.208253968253969e-05, + "loss": 0.4915, + "step": 2122 + }, + { + "epoch": 12.131428571428572, + "grad_norm": 31.189228057861328, + "learning_rate": 4.207619047619048e-05, + "loss": 0.8509, + "step": 2123 + }, + { + "epoch": 12.137142857142857, + "grad_norm": 24.511716842651367, + "learning_rate": 4.206984126984127e-05, + "loss": 0.5659, + "step": 2124 + }, + { + "epoch": 12.142857142857142, + "grad_norm": 20.814542770385742, + "learning_rate": 4.2063492063492065e-05, + "loss": 0.5101, + "step": 2125 + }, + { + "epoch": 12.14857142857143, + "grad_norm": 49.04802703857422, + "learning_rate": 4.2057142857142864e-05, + "loss": 0.3948, + "step": 2126 + }, + { + "epoch": 12.154285714285715, + "grad_norm": 64.11085510253906, + "learning_rate": 4.205079365079365e-05, + "loss": 0.4462, + "step": 2127 + }, + { + "epoch": 12.16, + "grad_norm": 36.154884338378906, + "learning_rate": 4.204444444444445e-05, + "loss": 0.6394, + "step": 2128 + }, + { + "epoch": 12.165714285714285, + "grad_norm": 53.460052490234375, + "learning_rate": 4.2038095238095235e-05, + "loss": 0.4813, + "step": 2129 + }, + { + "epoch": 12.17142857142857, + "grad_norm": 70.51872253417969, + "learning_rate": 4.2031746031746034e-05, + "loss": 0.4803, + "step": 2130 + }, + { + "epoch": 12.177142857142858, + "grad_norm": 65.65107727050781, + "learning_rate": 4.202539682539683e-05, + "loss": 0.4508, + "step": 2131 + }, + { + "epoch": 12.182857142857143, + "grad_norm": 51.026554107666016, + "learning_rate": 4.201904761904762e-05, + "loss": 0.322, + "step": 2132 + }, + { + "epoch": 12.188571428571429, + "grad_norm": 39.102333068847656, + "learning_rate": 4.201269841269841e-05, + "loss": 0.4516, + "step": 2133 + }, + { + "epoch": 12.194285714285714, + "grad_norm": 38.93904113769531, + "learning_rate": 4.200634920634921e-05, + "loss": 0.5626, + "step": 2134 + }, + { + "epoch": 12.2, + "grad_norm": 24.30509376525879, + "learning_rate": 4.2e-05, + "loss": 0.3237, + "step": 2135 + }, + { + "epoch": 12.205714285714286, + "grad_norm": 15.547541618347168, + "learning_rate": 4.19936507936508e-05, + "loss": 0.5393, + "step": 2136 + }, + { + "epoch": 12.211428571428572, + "grad_norm": 53.673561096191406, + "learning_rate": 4.198730158730159e-05, + "loss": 0.6376, + "step": 2137 + }, + { + "epoch": 12.217142857142857, + "grad_norm": 81.23772430419922, + "learning_rate": 4.198095238095238e-05, + "loss": 0.5826, + "step": 2138 + }, + { + "epoch": 12.222857142857142, + "grad_norm": 28.7341251373291, + "learning_rate": 4.1974603174603175e-05, + "loss": 0.5006, + "step": 2139 + }, + { + "epoch": 12.228571428571428, + "grad_norm": 38.085105895996094, + "learning_rate": 4.196825396825397e-05, + "loss": 0.5555, + "step": 2140 + }, + { + "epoch": 12.234285714285715, + "grad_norm": 41.27619934082031, + "learning_rate": 4.196190476190476e-05, + "loss": 0.4308, + "step": 2141 + }, + { + "epoch": 12.24, + "grad_norm": 31.69409942626953, + "learning_rate": 4.195555555555556e-05, + "loss": 0.6899, + "step": 2142 + }, + { + "epoch": 12.245714285714286, + "grad_norm": 23.529006958007812, + "learning_rate": 4.194920634920635e-05, + "loss": 0.4078, + "step": 2143 + }, + { + "epoch": 12.251428571428571, + "grad_norm": 77.69281005859375, + "learning_rate": 4.1942857142857145e-05, + "loss": 0.4954, + "step": 2144 + }, + { + "epoch": 12.257142857142856, + "grad_norm": 62.58024215698242, + "learning_rate": 4.193650793650794e-05, + "loss": 0.4758, + "step": 2145 + }, + { + "epoch": 12.262857142857143, + "grad_norm": 68.62194061279297, + "learning_rate": 4.193015873015873e-05, + "loss": 0.5499, + "step": 2146 + }, + { + "epoch": 12.268571428571429, + "grad_norm": 41.420654296875, + "learning_rate": 4.192380952380953e-05, + "loss": 0.4927, + "step": 2147 + }, + { + "epoch": 12.274285714285714, + "grad_norm": 51.051979064941406, + "learning_rate": 4.191746031746032e-05, + "loss": 0.4567, + "step": 2148 + }, + { + "epoch": 12.28, + "grad_norm": 37.83919906616211, + "learning_rate": 4.1911111111111115e-05, + "loss": 0.627, + "step": 2149 + }, + { + "epoch": 12.285714285714286, + "grad_norm": 56.637603759765625, + "learning_rate": 4.190476190476191e-05, + "loss": 0.4963, + "step": 2150 + }, + { + "epoch": 12.291428571428572, + "grad_norm": 58.017276763916016, + "learning_rate": 4.18984126984127e-05, + "loss": 0.592, + "step": 2151 + }, + { + "epoch": 12.297142857142857, + "grad_norm": 321.4501037597656, + "learning_rate": 4.189206349206349e-05, + "loss": 0.5086, + "step": 2152 + }, + { + "epoch": 12.302857142857142, + "grad_norm": 28.185853958129883, + "learning_rate": 4.188571428571429e-05, + "loss": 0.458, + "step": 2153 + }, + { + "epoch": 12.308571428571428, + "grad_norm": 28.150747299194336, + "learning_rate": 4.187936507936508e-05, + "loss": 0.3269, + "step": 2154 + }, + { + "epoch": 12.314285714285715, + "grad_norm": 52.02998733520508, + "learning_rate": 4.187301587301588e-05, + "loss": 0.6794, + "step": 2155 + }, + { + "epoch": 12.32, + "grad_norm": 110.56523132324219, + "learning_rate": 4.186666666666667e-05, + "loss": 0.3929, + "step": 2156 + }, + { + "epoch": 12.325714285714286, + "grad_norm": 23.185405731201172, + "learning_rate": 4.186031746031746e-05, + "loss": 0.6975, + "step": 2157 + }, + { + "epoch": 12.331428571428571, + "grad_norm": 235.8870086669922, + "learning_rate": 4.1853968253968255e-05, + "loss": 0.4131, + "step": 2158 + }, + { + "epoch": 12.337142857142856, + "grad_norm": 53.238712310791016, + "learning_rate": 4.1847619047619055e-05, + "loss": 0.4683, + "step": 2159 + }, + { + "epoch": 12.342857142857143, + "grad_norm": 38.866641998291016, + "learning_rate": 4.184126984126984e-05, + "loss": 0.4686, + "step": 2160 + }, + { + "epoch": 12.348571428571429, + "grad_norm": 29.63631820678711, + "learning_rate": 4.183492063492064e-05, + "loss": 0.6512, + "step": 2161 + }, + { + "epoch": 12.354285714285714, + "grad_norm": 34.041786193847656, + "learning_rate": 4.1828571428571426e-05, + "loss": 0.4923, + "step": 2162 + }, + { + "epoch": 12.36, + "grad_norm": 51.90822219848633, + "learning_rate": 4.1822222222222225e-05, + "loss": 0.4309, + "step": 2163 + }, + { + "epoch": 12.365714285714287, + "grad_norm": 54.88132858276367, + "learning_rate": 4.181587301587302e-05, + "loss": 0.4355, + "step": 2164 + }, + { + "epoch": 12.371428571428572, + "grad_norm": 37.65031433105469, + "learning_rate": 4.180952380952381e-05, + "loss": 0.4204, + "step": 2165 + }, + { + "epoch": 12.377142857142857, + "grad_norm": 45.69845199584961, + "learning_rate": 4.18031746031746e-05, + "loss": 0.3058, + "step": 2166 + }, + { + "epoch": 12.382857142857143, + "grad_norm": 33.428993225097656, + "learning_rate": 4.17968253968254e-05, + "loss": 0.3404, + "step": 2167 + }, + { + "epoch": 12.388571428571428, + "grad_norm": 28.256479263305664, + "learning_rate": 4.179047619047619e-05, + "loss": 0.865, + "step": 2168 + }, + { + "epoch": 12.394285714285715, + "grad_norm": 71.89823150634766, + "learning_rate": 4.178412698412699e-05, + "loss": 0.7565, + "step": 2169 + }, + { + "epoch": 12.4, + "grad_norm": 43.25562286376953, + "learning_rate": 4.177777777777778e-05, + "loss": 0.4256, + "step": 2170 + }, + { + "epoch": 12.405714285714286, + "grad_norm": 31.406131744384766, + "learning_rate": 4.177142857142857e-05, + "loss": 0.6001, + "step": 2171 + }, + { + "epoch": 12.411428571428571, + "grad_norm": 70.84765625, + "learning_rate": 4.1765079365079366e-05, + "loss": 0.4689, + "step": 2172 + }, + { + "epoch": 12.417142857142856, + "grad_norm": 48.25562286376953, + "learning_rate": 4.175873015873016e-05, + "loss": 0.4392, + "step": 2173 + }, + { + "epoch": 12.422857142857143, + "grad_norm": 35.83698272705078, + "learning_rate": 4.175238095238095e-05, + "loss": 0.5675, + "step": 2174 + }, + { + "epoch": 12.428571428571429, + "grad_norm": 72.83888244628906, + "learning_rate": 4.174603174603175e-05, + "loss": 0.4522, + "step": 2175 + }, + { + "epoch": 12.434285714285714, + "grad_norm": 117.34611511230469, + "learning_rate": 4.1739682539682536e-05, + "loss": 0.5442, + "step": 2176 + }, + { + "epoch": 12.44, + "grad_norm": 81.17318725585938, + "learning_rate": 4.1733333333333336e-05, + "loss": 0.4339, + "step": 2177 + }, + { + "epoch": 12.445714285714285, + "grad_norm": 55.67113494873047, + "learning_rate": 4.172698412698413e-05, + "loss": 0.4434, + "step": 2178 + }, + { + "epoch": 12.451428571428572, + "grad_norm": 27.9365177154541, + "learning_rate": 4.172063492063492e-05, + "loss": 0.46, + "step": 2179 + }, + { + "epoch": 12.457142857142857, + "grad_norm": 61.951812744140625, + "learning_rate": 4.1714285714285714e-05, + "loss": 0.5254, + "step": 2180 + }, + { + "epoch": 12.462857142857143, + "grad_norm": 30.21892738342285, + "learning_rate": 4.170793650793651e-05, + "loss": 0.526, + "step": 2181 + }, + { + "epoch": 12.468571428571428, + "grad_norm": 41.746585845947266, + "learning_rate": 4.1701587301587306e-05, + "loss": 0.4023, + "step": 2182 + }, + { + "epoch": 12.474285714285715, + "grad_norm": 27.280019760131836, + "learning_rate": 4.16952380952381e-05, + "loss": 0.4215, + "step": 2183 + }, + { + "epoch": 12.48, + "grad_norm": 33.64419937133789, + "learning_rate": 4.168888888888889e-05, + "loss": 0.414, + "step": 2184 + }, + { + "epoch": 12.485714285714286, + "grad_norm": 87.38959503173828, + "learning_rate": 4.1682539682539684e-05, + "loss": 0.6313, + "step": 2185 + }, + { + "epoch": 12.491428571428571, + "grad_norm": 44.78113555908203, + "learning_rate": 4.167619047619048e-05, + "loss": 0.4985, + "step": 2186 + }, + { + "epoch": 12.497142857142856, + "grad_norm": 52.0896110534668, + "learning_rate": 4.166984126984127e-05, + "loss": 0.581, + "step": 2187 + }, + { + "epoch": 12.502857142857144, + "grad_norm": 97.08638000488281, + "learning_rate": 4.166349206349207e-05, + "loss": 0.78, + "step": 2188 + }, + { + "epoch": 12.508571428571429, + "grad_norm": 60.737979888916016, + "learning_rate": 4.165714285714286e-05, + "loss": 0.6149, + "step": 2189 + }, + { + "epoch": 12.514285714285714, + "grad_norm": 117.92765045166016, + "learning_rate": 4.1650793650793654e-05, + "loss": 0.5493, + "step": 2190 + }, + { + "epoch": 12.52, + "grad_norm": 27.144393920898438, + "learning_rate": 4.1644444444444446e-05, + "loss": 0.6426, + "step": 2191 + }, + { + "epoch": 12.525714285714285, + "grad_norm": 53.552608489990234, + "learning_rate": 4.1638095238095246e-05, + "loss": 0.6946, + "step": 2192 + }, + { + "epoch": 12.531428571428572, + "grad_norm": 41.25876998901367, + "learning_rate": 4.163174603174603e-05, + "loss": 0.6304, + "step": 2193 + }, + { + "epoch": 12.537142857142857, + "grad_norm": 36.41542053222656, + "learning_rate": 4.162539682539683e-05, + "loss": 0.4749, + "step": 2194 + }, + { + "epoch": 12.542857142857143, + "grad_norm": 48.75900650024414, + "learning_rate": 4.161904761904762e-05, + "loss": 0.5893, + "step": 2195 + }, + { + "epoch": 12.548571428571428, + "grad_norm": 59.34621810913086, + "learning_rate": 4.1612698412698416e-05, + "loss": 0.4529, + "step": 2196 + }, + { + "epoch": 12.554285714285715, + "grad_norm": 44.70014953613281, + "learning_rate": 4.160634920634921e-05, + "loss": 0.3575, + "step": 2197 + }, + { + "epoch": 12.56, + "grad_norm": 38.56593704223633, + "learning_rate": 4.16e-05, + "loss": 0.6915, + "step": 2198 + }, + { + "epoch": 12.565714285714286, + "grad_norm": 85.35823822021484, + "learning_rate": 4.1593650793650794e-05, + "loss": 0.4099, + "step": 2199 + }, + { + "epoch": 12.571428571428571, + "grad_norm": 47.486751556396484, + "learning_rate": 4.1587301587301594e-05, + "loss": 0.583, + "step": 2200 + }, + { + "epoch": 12.577142857142857, + "grad_norm": 53.076847076416016, + "learning_rate": 4.158095238095238e-05, + "loss": 0.5741, + "step": 2201 + }, + { + "epoch": 12.582857142857144, + "grad_norm": 58.12879943847656, + "learning_rate": 4.157460317460318e-05, + "loss": 0.4875, + "step": 2202 + }, + { + "epoch": 12.588571428571429, + "grad_norm": 34.542964935302734, + "learning_rate": 4.156825396825397e-05, + "loss": 0.4918, + "step": 2203 + }, + { + "epoch": 12.594285714285714, + "grad_norm": 30.032142639160156, + "learning_rate": 4.1561904761904764e-05, + "loss": 0.4003, + "step": 2204 + }, + { + "epoch": 12.6, + "grad_norm": 95.49871063232422, + "learning_rate": 4.155555555555556e-05, + "loss": 0.4535, + "step": 2205 + }, + { + "epoch": 12.605714285714285, + "grad_norm": 61.23542404174805, + "learning_rate": 4.154920634920635e-05, + "loss": 0.6763, + "step": 2206 + }, + { + "epoch": 12.611428571428572, + "grad_norm": 48.1504020690918, + "learning_rate": 4.154285714285714e-05, + "loss": 0.5599, + "step": 2207 + }, + { + "epoch": 12.617142857142857, + "grad_norm": 56.297393798828125, + "learning_rate": 4.153650793650794e-05, + "loss": 0.3831, + "step": 2208 + }, + { + "epoch": 12.622857142857143, + "grad_norm": 35.83076477050781, + "learning_rate": 4.153015873015873e-05, + "loss": 0.5612, + "step": 2209 + }, + { + "epoch": 12.628571428571428, + "grad_norm": 37.802852630615234, + "learning_rate": 4.152380952380953e-05, + "loss": 0.31, + "step": 2210 + }, + { + "epoch": 12.634285714285713, + "grad_norm": 39.768943786621094, + "learning_rate": 4.151746031746032e-05, + "loss": 0.4237, + "step": 2211 + }, + { + "epoch": 12.64, + "grad_norm": 19.604061126708984, + "learning_rate": 4.151111111111111e-05, + "loss": 0.5153, + "step": 2212 + }, + { + "epoch": 12.645714285714286, + "grad_norm": 71.32669830322266, + "learning_rate": 4.1504761904761905e-05, + "loss": 0.6764, + "step": 2213 + }, + { + "epoch": 12.651428571428571, + "grad_norm": 120.34733581542969, + "learning_rate": 4.1498412698412704e-05, + "loss": 0.4144, + "step": 2214 + }, + { + "epoch": 12.657142857142857, + "grad_norm": 87.2412338256836, + "learning_rate": 4.149206349206349e-05, + "loss": 0.3574, + "step": 2215 + }, + { + "epoch": 12.662857142857142, + "grad_norm": 40.069541931152344, + "learning_rate": 4.148571428571429e-05, + "loss": 0.5624, + "step": 2216 + }, + { + "epoch": 12.668571428571429, + "grad_norm": 27.889135360717773, + "learning_rate": 4.147936507936508e-05, + "loss": 0.536, + "step": 2217 + }, + { + "epoch": 12.674285714285714, + "grad_norm": 43.12520980834961, + "learning_rate": 4.1473015873015875e-05, + "loss": 0.6002, + "step": 2218 + }, + { + "epoch": 12.68, + "grad_norm": 44.10758590698242, + "learning_rate": 4.146666666666667e-05, + "loss": 0.5036, + "step": 2219 + }, + { + "epoch": 12.685714285714285, + "grad_norm": 23.964990615844727, + "learning_rate": 4.146031746031746e-05, + "loss": 0.3921, + "step": 2220 + }, + { + "epoch": 12.691428571428572, + "grad_norm": 52.23629379272461, + "learning_rate": 4.145396825396826e-05, + "loss": 0.4159, + "step": 2221 + }, + { + "epoch": 12.697142857142858, + "grad_norm": 20.55379295349121, + "learning_rate": 4.144761904761905e-05, + "loss": 0.4629, + "step": 2222 + }, + { + "epoch": 12.702857142857143, + "grad_norm": 62.3278694152832, + "learning_rate": 4.1441269841269845e-05, + "loss": 0.5655, + "step": 2223 + }, + { + "epoch": 12.708571428571428, + "grad_norm": 86.1930923461914, + "learning_rate": 4.143492063492064e-05, + "loss": 0.3896, + "step": 2224 + }, + { + "epoch": 12.714285714285714, + "grad_norm": 71.3177719116211, + "learning_rate": 4.1428571428571437e-05, + "loss": 0.563, + "step": 2225 + }, + { + "epoch": 12.72, + "grad_norm": 63.88593292236328, + "learning_rate": 4.142222222222222e-05, + "loss": 0.5526, + "step": 2226 + }, + { + "epoch": 12.725714285714286, + "grad_norm": 59.57171630859375, + "learning_rate": 4.141587301587302e-05, + "loss": 0.5307, + "step": 2227 + }, + { + "epoch": 12.731428571428571, + "grad_norm": 19.78388023376465, + "learning_rate": 4.140952380952381e-05, + "loss": 0.3957, + "step": 2228 + }, + { + "epoch": 12.737142857142857, + "grad_norm": 32.11157989501953, + "learning_rate": 4.140317460317461e-05, + "loss": 0.591, + "step": 2229 + }, + { + "epoch": 12.742857142857144, + "grad_norm": 36.756797790527344, + "learning_rate": 4.13968253968254e-05, + "loss": 0.4532, + "step": 2230 + }, + { + "epoch": 12.748571428571429, + "grad_norm": 19.290258407592773, + "learning_rate": 4.139047619047619e-05, + "loss": 0.5087, + "step": 2231 + }, + { + "epoch": 12.754285714285714, + "grad_norm": 97.21000671386719, + "learning_rate": 4.1384126984126985e-05, + "loss": 0.4911, + "step": 2232 + }, + { + "epoch": 12.76, + "grad_norm": 648.7294921875, + "learning_rate": 4.1377777777777784e-05, + "loss": 0.4904, + "step": 2233 + }, + { + "epoch": 12.765714285714285, + "grad_norm": 35.48090744018555, + "learning_rate": 4.137142857142857e-05, + "loss": 0.5012, + "step": 2234 + }, + { + "epoch": 12.771428571428572, + "grad_norm": 43.22107696533203, + "learning_rate": 4.136507936507937e-05, + "loss": 0.4422, + "step": 2235 + }, + { + "epoch": 12.777142857142858, + "grad_norm": 113.79402160644531, + "learning_rate": 4.135873015873016e-05, + "loss": 0.5195, + "step": 2236 + }, + { + "epoch": 12.782857142857143, + "grad_norm": 85.96818542480469, + "learning_rate": 4.1352380952380955e-05, + "loss": 0.4418, + "step": 2237 + }, + { + "epoch": 12.788571428571428, + "grad_norm": 18.308883666992188, + "learning_rate": 4.134603174603175e-05, + "loss": 0.5449, + "step": 2238 + }, + { + "epoch": 12.794285714285714, + "grad_norm": 29.693681716918945, + "learning_rate": 4.133968253968254e-05, + "loss": 0.4464, + "step": 2239 + }, + { + "epoch": 12.8, + "grad_norm": 211.36032104492188, + "learning_rate": 4.133333333333333e-05, + "loss": 0.5757, + "step": 2240 + }, + { + "epoch": 12.805714285714286, + "grad_norm": 35.264339447021484, + "learning_rate": 4.132698412698413e-05, + "loss": 0.4297, + "step": 2241 + }, + { + "epoch": 12.811428571428571, + "grad_norm": 35.70240020751953, + "learning_rate": 4.132063492063492e-05, + "loss": 0.464, + "step": 2242 + }, + { + "epoch": 12.817142857142857, + "grad_norm": 16.335657119750977, + "learning_rate": 4.131428571428572e-05, + "loss": 0.4074, + "step": 2243 + }, + { + "epoch": 12.822857142857142, + "grad_norm": 41.2120475769043, + "learning_rate": 4.130793650793651e-05, + "loss": 0.401, + "step": 2244 + }, + { + "epoch": 12.82857142857143, + "grad_norm": 25.838830947875977, + "learning_rate": 4.13015873015873e-05, + "loss": 0.5388, + "step": 2245 + }, + { + "epoch": 12.834285714285715, + "grad_norm": 38.36637496948242, + "learning_rate": 4.1295238095238095e-05, + "loss": 0.4832, + "step": 2246 + }, + { + "epoch": 12.84, + "grad_norm": 142.09210205078125, + "learning_rate": 4.1288888888888895e-05, + "loss": 0.5239, + "step": 2247 + }, + { + "epoch": 12.845714285714285, + "grad_norm": 101.1116943359375, + "learning_rate": 4.128253968253968e-05, + "loss": 0.5064, + "step": 2248 + }, + { + "epoch": 12.85142857142857, + "grad_norm": 38.06106948852539, + "learning_rate": 4.127619047619048e-05, + "loss": 0.3722, + "step": 2249 + }, + { + "epoch": 12.857142857142858, + "grad_norm": 40.42740249633789, + "learning_rate": 4.126984126984127e-05, + "loss": 0.4188, + "step": 2250 + }, + { + "epoch": 12.862857142857143, + "grad_norm": 25.689695358276367, + "learning_rate": 4.1263492063492065e-05, + "loss": 0.7067, + "step": 2251 + }, + { + "epoch": 12.868571428571428, + "grad_norm": 44.955421447753906, + "learning_rate": 4.125714285714286e-05, + "loss": 0.5671, + "step": 2252 + }, + { + "epoch": 12.874285714285714, + "grad_norm": 49.81996536254883, + "learning_rate": 4.125079365079365e-05, + "loss": 0.5818, + "step": 2253 + }, + { + "epoch": 12.88, + "grad_norm": 34.02114486694336, + "learning_rate": 4.124444444444444e-05, + "loss": 0.4568, + "step": 2254 + }, + { + "epoch": 12.885714285714286, + "grad_norm": 26.276948928833008, + "learning_rate": 4.123809523809524e-05, + "loss": 0.4356, + "step": 2255 + }, + { + "epoch": 12.891428571428571, + "grad_norm": 78.74541473388672, + "learning_rate": 4.1231746031746035e-05, + "loss": 0.541, + "step": 2256 + }, + { + "epoch": 12.897142857142857, + "grad_norm": 20.253076553344727, + "learning_rate": 4.122539682539683e-05, + "loss": 0.3611, + "step": 2257 + }, + { + "epoch": 12.902857142857142, + "grad_norm": 28.861141204833984, + "learning_rate": 4.121904761904762e-05, + "loss": 0.4468, + "step": 2258 + }, + { + "epoch": 12.90857142857143, + "grad_norm": 53.7813606262207, + "learning_rate": 4.121269841269841e-05, + "loss": 0.5349, + "step": 2259 + }, + { + "epoch": 12.914285714285715, + "grad_norm": 93.12281799316406, + "learning_rate": 4.120634920634921e-05, + "loss": 0.3874, + "step": 2260 + }, + { + "epoch": 12.92, + "grad_norm": 72.68656158447266, + "learning_rate": 4.12e-05, + "loss": 0.4197, + "step": 2261 + }, + { + "epoch": 12.925714285714285, + "grad_norm": 48.06651306152344, + "learning_rate": 4.11936507936508e-05, + "loss": 0.5674, + "step": 2262 + }, + { + "epoch": 12.93142857142857, + "grad_norm": 129.9513702392578, + "learning_rate": 4.118730158730159e-05, + "loss": 0.3995, + "step": 2263 + }, + { + "epoch": 12.937142857142858, + "grad_norm": 79.52442932128906, + "learning_rate": 4.118095238095238e-05, + "loss": 0.4851, + "step": 2264 + }, + { + "epoch": 12.942857142857143, + "grad_norm": 27.562313079833984, + "learning_rate": 4.1174603174603176e-05, + "loss": 0.502, + "step": 2265 + }, + { + "epoch": 12.948571428571428, + "grad_norm": 47.59757995605469, + "learning_rate": 4.1168253968253975e-05, + "loss": 0.4192, + "step": 2266 + }, + { + "epoch": 12.954285714285714, + "grad_norm": 251.0091552734375, + "learning_rate": 4.116190476190476e-05, + "loss": 0.8031, + "step": 2267 + }, + { + "epoch": 12.96, + "grad_norm": 35.71852111816406, + "learning_rate": 4.115555555555556e-05, + "loss": 0.4427, + "step": 2268 + }, + { + "epoch": 12.965714285714286, + "grad_norm": 109.42433166503906, + "learning_rate": 4.1149206349206346e-05, + "loss": 0.5537, + "step": 2269 + }, + { + "epoch": 12.971428571428572, + "grad_norm": 69.72895050048828, + "learning_rate": 4.1142857142857146e-05, + "loss": 0.5632, + "step": 2270 + }, + { + "epoch": 12.977142857142857, + "grad_norm": 46.306617736816406, + "learning_rate": 4.113650793650794e-05, + "loss": 0.2779, + "step": 2271 + }, + { + "epoch": 12.982857142857142, + "grad_norm": 55.917381286621094, + "learning_rate": 4.113015873015873e-05, + "loss": 0.6122, + "step": 2272 + }, + { + "epoch": 12.98857142857143, + "grad_norm": 30.848764419555664, + "learning_rate": 4.1123809523809524e-05, + "loss": 0.4483, + "step": 2273 + }, + { + "epoch": 12.994285714285715, + "grad_norm": 43.35200500488281, + "learning_rate": 4.111746031746032e-05, + "loss": 0.4658, + "step": 2274 + }, + { + "epoch": 13.0, + "grad_norm": 48.12958526611328, + "learning_rate": 4.111111111111111e-05, + "loss": 0.4433, + "step": 2275 + }, + { + "epoch": 13.0, + "eval_classes": 0, + "eval_loss": 0.6659702658653259, + "eval_map": 0.9143, + "eval_map_50": 0.9608, + "eval_map_75": 0.9477, + "eval_map_large": 0.9144, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9143, + "eval_map_small": -1.0, + "eval_mar_1": 0.7806, + "eval_mar_10": 0.9721, + "eval_mar_100": 0.9819, + "eval_mar_100_per_class": 0.9819, + "eval_mar_large": 0.9819, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 15.677, + "eval_samples_per_second": 18.754, + "eval_steps_per_second": 2.36, + "step": 2275 + }, + { + "epoch": 13.005714285714285, + "grad_norm": 85.56483459472656, + "learning_rate": 4.110476190476191e-05, + "loss": 0.445, + "step": 2276 + }, + { + "epoch": 13.01142857142857, + "grad_norm": 27.930274963378906, + "learning_rate": 4.10984126984127e-05, + "loss": 0.7734, + "step": 2277 + }, + { + "epoch": 13.017142857142858, + "grad_norm": 51.549617767333984, + "learning_rate": 4.1092063492063494e-05, + "loss": 0.4166, + "step": 2278 + }, + { + "epoch": 13.022857142857143, + "grad_norm": 81.78390502929688, + "learning_rate": 4.1085714285714286e-05, + "loss": 0.4473, + "step": 2279 + }, + { + "epoch": 13.028571428571428, + "grad_norm": 53.621219635009766, + "learning_rate": 4.107936507936508e-05, + "loss": 0.4609, + "step": 2280 + }, + { + "epoch": 13.034285714285714, + "grad_norm": 101.24864959716797, + "learning_rate": 4.107301587301587e-05, + "loss": 0.5958, + "step": 2281 + }, + { + "epoch": 13.04, + "grad_norm": 45.11786651611328, + "learning_rate": 4.106666666666667e-05, + "loss": 0.5586, + "step": 2282 + }, + { + "epoch": 13.045714285714286, + "grad_norm": 25.740419387817383, + "learning_rate": 4.106031746031746e-05, + "loss": 0.4583, + "step": 2283 + }, + { + "epoch": 13.051428571428572, + "grad_norm": 31.131811141967773, + "learning_rate": 4.1053968253968256e-05, + "loss": 0.4216, + "step": 2284 + }, + { + "epoch": 13.057142857142857, + "grad_norm": 139.30738830566406, + "learning_rate": 4.104761904761905e-05, + "loss": 0.5814, + "step": 2285 + }, + { + "epoch": 13.062857142857142, + "grad_norm": 34.93601608276367, + "learning_rate": 4.104126984126984e-05, + "loss": 0.4903, + "step": 2286 + }, + { + "epoch": 13.06857142857143, + "grad_norm": 57.43114471435547, + "learning_rate": 4.1034920634920634e-05, + "loss": 0.6388, + "step": 2287 + }, + { + "epoch": 13.074285714285715, + "grad_norm": 32.30340576171875, + "learning_rate": 4.1028571428571434e-05, + "loss": 0.6437, + "step": 2288 + }, + { + "epoch": 13.08, + "grad_norm": 28.308734893798828, + "learning_rate": 4.1022222222222226e-05, + "loss": 0.495, + "step": 2289 + }, + { + "epoch": 13.085714285714285, + "grad_norm": 39.46627426147461, + "learning_rate": 4.101587301587302e-05, + "loss": 0.4642, + "step": 2290 + }, + { + "epoch": 13.09142857142857, + "grad_norm": 91.99317932128906, + "learning_rate": 4.100952380952381e-05, + "loss": 0.5092, + "step": 2291 + }, + { + "epoch": 13.097142857142858, + "grad_norm": 85.90660858154297, + "learning_rate": 4.1003174603174604e-05, + "loss": 0.3799, + "step": 2292 + }, + { + "epoch": 13.102857142857143, + "grad_norm": 66.8467788696289, + "learning_rate": 4.09968253968254e-05, + "loss": 0.2814, + "step": 2293 + }, + { + "epoch": 13.108571428571429, + "grad_norm": 30.89203643798828, + "learning_rate": 4.099047619047619e-05, + "loss": 0.2674, + "step": 2294 + }, + { + "epoch": 13.114285714285714, + "grad_norm": 48.26861572265625, + "learning_rate": 4.098412698412699e-05, + "loss": 0.478, + "step": 2295 + }, + { + "epoch": 13.12, + "grad_norm": 72.69939422607422, + "learning_rate": 4.097777777777778e-05, + "loss": 0.4094, + "step": 2296 + }, + { + "epoch": 13.125714285714286, + "grad_norm": 49.83537673950195, + "learning_rate": 4.0971428571428574e-05, + "loss": 0.4477, + "step": 2297 + }, + { + "epoch": 13.131428571428572, + "grad_norm": 30.371856689453125, + "learning_rate": 4.096507936507937e-05, + "loss": 0.6477, + "step": 2298 + }, + { + "epoch": 13.137142857142857, + "grad_norm": 75.71834564208984, + "learning_rate": 4.0958730158730166e-05, + "loss": 0.4407, + "step": 2299 + }, + { + "epoch": 13.142857142857142, + "grad_norm": 42.12094497680664, + "learning_rate": 4.095238095238095e-05, + "loss": 0.5816, + "step": 2300 + }, + { + "epoch": 13.14857142857143, + "grad_norm": 66.28294372558594, + "learning_rate": 4.094603174603175e-05, + "loss": 0.5022, + "step": 2301 + }, + { + "epoch": 13.154285714285715, + "grad_norm": 62.495208740234375, + "learning_rate": 4.093968253968254e-05, + "loss": 0.5326, + "step": 2302 + }, + { + "epoch": 13.16, + "grad_norm": 26.98307228088379, + "learning_rate": 4.093333333333334e-05, + "loss": 0.3825, + "step": 2303 + }, + { + "epoch": 13.165714285714285, + "grad_norm": 52.273460388183594, + "learning_rate": 4.092698412698413e-05, + "loss": 0.4312, + "step": 2304 + }, + { + "epoch": 13.17142857142857, + "grad_norm": 54.64181900024414, + "learning_rate": 4.092063492063492e-05, + "loss": 0.4302, + "step": 2305 + }, + { + "epoch": 13.177142857142858, + "grad_norm": 72.7457046508789, + "learning_rate": 4.0914285714285715e-05, + "loss": 0.4043, + "step": 2306 + }, + { + "epoch": 13.182857142857143, + "grad_norm": 96.02617645263672, + "learning_rate": 4.0907936507936514e-05, + "loss": 0.3691, + "step": 2307 + }, + { + "epoch": 13.188571428571429, + "grad_norm": 30.374874114990234, + "learning_rate": 4.09015873015873e-05, + "loss": 0.4787, + "step": 2308 + }, + { + "epoch": 13.194285714285714, + "grad_norm": 41.272945404052734, + "learning_rate": 4.08952380952381e-05, + "loss": 0.4312, + "step": 2309 + }, + { + "epoch": 13.2, + "grad_norm": 68.13615417480469, + "learning_rate": 4.088888888888889e-05, + "loss": 0.6399, + "step": 2310 + }, + { + "epoch": 13.205714285714286, + "grad_norm": 26.37989044189453, + "learning_rate": 4.0882539682539685e-05, + "loss": 0.4583, + "step": 2311 + }, + { + "epoch": 13.211428571428572, + "grad_norm": 210.23959350585938, + "learning_rate": 4.087619047619048e-05, + "loss": 0.4212, + "step": 2312 + }, + { + "epoch": 13.217142857142857, + "grad_norm": 47.81938934326172, + "learning_rate": 4.086984126984127e-05, + "loss": 0.5077, + "step": 2313 + }, + { + "epoch": 13.222857142857142, + "grad_norm": 27.632001876831055, + "learning_rate": 4.086349206349206e-05, + "loss": 0.4903, + "step": 2314 + }, + { + "epoch": 13.228571428571428, + "grad_norm": 38.60955810546875, + "learning_rate": 4.085714285714286e-05, + "loss": 0.7757, + "step": 2315 + }, + { + "epoch": 13.234285714285715, + "grad_norm": 35.93268966674805, + "learning_rate": 4.085079365079365e-05, + "loss": 0.8017, + "step": 2316 + }, + { + "epoch": 13.24, + "grad_norm": 45.96718215942383, + "learning_rate": 4.084444444444445e-05, + "loss": 0.599, + "step": 2317 + }, + { + "epoch": 13.245714285714286, + "grad_norm": 54.23589324951172, + "learning_rate": 4.083809523809524e-05, + "loss": 0.3001, + "step": 2318 + }, + { + "epoch": 13.251428571428571, + "grad_norm": 63.831703186035156, + "learning_rate": 4.083174603174603e-05, + "loss": 0.5831, + "step": 2319 + }, + { + "epoch": 13.257142857142856, + "grad_norm": 38.45108413696289, + "learning_rate": 4.0825396825396825e-05, + "loss": 0.3537, + "step": 2320 + }, + { + "epoch": 13.262857142857143, + "grad_norm": 40.974998474121094, + "learning_rate": 4.0819047619047624e-05, + "loss": 0.4593, + "step": 2321 + }, + { + "epoch": 13.268571428571429, + "grad_norm": 35.33351135253906, + "learning_rate": 4.081269841269841e-05, + "loss": 0.3977, + "step": 2322 + }, + { + "epoch": 13.274285714285714, + "grad_norm": 28.143640518188477, + "learning_rate": 4.080634920634921e-05, + "loss": 0.4476, + "step": 2323 + }, + { + "epoch": 13.28, + "grad_norm": 50.97539520263672, + "learning_rate": 4.08e-05, + "loss": 0.4968, + "step": 2324 + }, + { + "epoch": 13.285714285714286, + "grad_norm": 33.4579963684082, + "learning_rate": 4.0793650793650795e-05, + "loss": 0.3779, + "step": 2325 + }, + { + "epoch": 13.291428571428572, + "grad_norm": 113.8723373413086, + "learning_rate": 4.078730158730159e-05, + "loss": 0.6165, + "step": 2326 + }, + { + "epoch": 13.297142857142857, + "grad_norm": 80.13993835449219, + "learning_rate": 4.078095238095238e-05, + "loss": 0.5253, + "step": 2327 + }, + { + "epoch": 13.302857142857142, + "grad_norm": 125.358154296875, + "learning_rate": 4.077460317460318e-05, + "loss": 0.3868, + "step": 2328 + }, + { + "epoch": 13.308571428571428, + "grad_norm": 24.944622039794922, + "learning_rate": 4.076825396825397e-05, + "loss": 0.4771, + "step": 2329 + }, + { + "epoch": 13.314285714285715, + "grad_norm": 44.7222900390625, + "learning_rate": 4.0761904761904765e-05, + "loss": 0.5108, + "step": 2330 + }, + { + "epoch": 13.32, + "grad_norm": 68.44735717773438, + "learning_rate": 4.075555555555556e-05, + "loss": 0.4781, + "step": 2331 + }, + { + "epoch": 13.325714285714286, + "grad_norm": 95.55236053466797, + "learning_rate": 4.074920634920635e-05, + "loss": 0.4965, + "step": 2332 + }, + { + "epoch": 13.331428571428571, + "grad_norm": 29.676864624023438, + "learning_rate": 4.074285714285714e-05, + "loss": 0.5067, + "step": 2333 + }, + { + "epoch": 13.337142857142856, + "grad_norm": 54.00707244873047, + "learning_rate": 4.073650793650794e-05, + "loss": 0.393, + "step": 2334 + }, + { + "epoch": 13.342857142857143, + "grad_norm": 79.81952667236328, + "learning_rate": 4.073015873015873e-05, + "loss": 0.4672, + "step": 2335 + }, + { + "epoch": 13.348571428571429, + "grad_norm": 74.93247985839844, + "learning_rate": 4.072380952380953e-05, + "loss": 0.3498, + "step": 2336 + }, + { + "epoch": 13.354285714285714, + "grad_norm": 25.939695358276367, + "learning_rate": 4.071746031746032e-05, + "loss": 0.4535, + "step": 2337 + }, + { + "epoch": 13.36, + "grad_norm": 38.312496185302734, + "learning_rate": 4.071111111111111e-05, + "loss": 0.4496, + "step": 2338 + }, + { + "epoch": 13.365714285714287, + "grad_norm": 63.252864837646484, + "learning_rate": 4.0704761904761905e-05, + "loss": 0.3789, + "step": 2339 + }, + { + "epoch": 13.371428571428572, + "grad_norm": 38.034812927246094, + "learning_rate": 4.0698412698412705e-05, + "loss": 0.379, + "step": 2340 + }, + { + "epoch": 13.377142857142857, + "grad_norm": 107.1825180053711, + "learning_rate": 4.069206349206349e-05, + "loss": 0.4296, + "step": 2341 + }, + { + "epoch": 13.382857142857143, + "grad_norm": 48.01869201660156, + "learning_rate": 4.068571428571429e-05, + "loss": 0.5187, + "step": 2342 + }, + { + "epoch": 13.388571428571428, + "grad_norm": 98.60651397705078, + "learning_rate": 4.067936507936508e-05, + "loss": 0.7235, + "step": 2343 + }, + { + "epoch": 13.394285714285715, + "grad_norm": 38.27113723754883, + "learning_rate": 4.0673015873015875e-05, + "loss": 0.742, + "step": 2344 + }, + { + "epoch": 13.4, + "grad_norm": 24.036109924316406, + "learning_rate": 4.066666666666667e-05, + "loss": 0.5235, + "step": 2345 + }, + { + "epoch": 13.405714285714286, + "grad_norm": 27.38574981689453, + "learning_rate": 4.066031746031746e-05, + "loss": 0.6086, + "step": 2346 + }, + { + "epoch": 13.411428571428571, + "grad_norm": 42.157020568847656, + "learning_rate": 4.065396825396825e-05, + "loss": 0.3587, + "step": 2347 + }, + { + "epoch": 13.417142857142856, + "grad_norm": 46.96993637084961, + "learning_rate": 4.064761904761905e-05, + "loss": 0.3667, + "step": 2348 + }, + { + "epoch": 13.422857142857143, + "grad_norm": 115.64402770996094, + "learning_rate": 4.064126984126984e-05, + "loss": 0.3629, + "step": 2349 + }, + { + "epoch": 13.428571428571429, + "grad_norm": 25.6961727142334, + "learning_rate": 4.063492063492064e-05, + "loss": 0.5351, + "step": 2350 + }, + { + "epoch": 13.434285714285714, + "grad_norm": 31.624475479125977, + "learning_rate": 4.062857142857143e-05, + "loss": 0.3819, + "step": 2351 + }, + { + "epoch": 13.44, + "grad_norm": 90.46236419677734, + "learning_rate": 4.062222222222222e-05, + "loss": 0.6151, + "step": 2352 + }, + { + "epoch": 13.445714285714285, + "grad_norm": 100.17254638671875, + "learning_rate": 4.0615873015873016e-05, + "loss": 0.3905, + "step": 2353 + }, + { + "epoch": 13.451428571428572, + "grad_norm": 219.26023864746094, + "learning_rate": 4.0609523809523815e-05, + "loss": 0.5057, + "step": 2354 + }, + { + "epoch": 13.457142857142857, + "grad_norm": 52.1915283203125, + "learning_rate": 4.06031746031746e-05, + "loss": 0.3385, + "step": 2355 + }, + { + "epoch": 13.462857142857143, + "grad_norm": 30.643356323242188, + "learning_rate": 4.05968253968254e-05, + "loss": 0.9016, + "step": 2356 + }, + { + "epoch": 13.468571428571428, + "grad_norm": 61.73093795776367, + "learning_rate": 4.059047619047619e-05, + "loss": 0.4592, + "step": 2357 + }, + { + "epoch": 13.474285714285715, + "grad_norm": 102.5331039428711, + "learning_rate": 4.0584126984126986e-05, + "loss": 0.6305, + "step": 2358 + }, + { + "epoch": 13.48, + "grad_norm": 35.591346740722656, + "learning_rate": 4.057777777777778e-05, + "loss": 0.3772, + "step": 2359 + }, + { + "epoch": 13.485714285714286, + "grad_norm": 235.7429962158203, + "learning_rate": 4.057142857142857e-05, + "loss": 0.5206, + "step": 2360 + }, + { + "epoch": 13.491428571428571, + "grad_norm": 82.47492218017578, + "learning_rate": 4.0565079365079364e-05, + "loss": 0.3429, + "step": 2361 + }, + { + "epoch": 13.497142857142856, + "grad_norm": 38.43523406982422, + "learning_rate": 4.055873015873016e-05, + "loss": 0.7014, + "step": 2362 + }, + { + "epoch": 13.502857142857144, + "grad_norm": 76.04268646240234, + "learning_rate": 4.0552380952380956e-05, + "loss": 0.478, + "step": 2363 + }, + { + "epoch": 13.508571428571429, + "grad_norm": 33.30377960205078, + "learning_rate": 4.054603174603175e-05, + "loss": 0.3771, + "step": 2364 + }, + { + "epoch": 13.514285714285714, + "grad_norm": 33.07572555541992, + "learning_rate": 4.053968253968254e-05, + "loss": 0.442, + "step": 2365 + }, + { + "epoch": 13.52, + "grad_norm": 47.697723388671875, + "learning_rate": 4.0533333333333334e-05, + "loss": 0.3184, + "step": 2366 + }, + { + "epoch": 13.525714285714285, + "grad_norm": 66.57059478759766, + "learning_rate": 4.052698412698413e-05, + "loss": 0.3617, + "step": 2367 + }, + { + "epoch": 13.531428571428572, + "grad_norm": 17.434370040893555, + "learning_rate": 4.052063492063492e-05, + "loss": 0.4504, + "step": 2368 + }, + { + "epoch": 13.537142857142857, + "grad_norm": 22.381181716918945, + "learning_rate": 4.051428571428572e-05, + "loss": 0.4599, + "step": 2369 + }, + { + "epoch": 13.542857142857143, + "grad_norm": 65.71564483642578, + "learning_rate": 4.050793650793651e-05, + "loss": 0.2784, + "step": 2370 + }, + { + "epoch": 13.548571428571428, + "grad_norm": 64.0141372680664, + "learning_rate": 4.0501587301587304e-05, + "loss": 0.3171, + "step": 2371 + }, + { + "epoch": 13.554285714285715, + "grad_norm": 124.51643371582031, + "learning_rate": 4.0495238095238096e-05, + "loss": 0.6788, + "step": 2372 + }, + { + "epoch": 13.56, + "grad_norm": 30.546184539794922, + "learning_rate": 4.0488888888888896e-05, + "loss": 0.6532, + "step": 2373 + }, + { + "epoch": 13.565714285714286, + "grad_norm": 104.8770523071289, + "learning_rate": 4.048253968253968e-05, + "loss": 0.3769, + "step": 2374 + }, + { + "epoch": 13.571428571428571, + "grad_norm": 52.30247116088867, + "learning_rate": 4.047619047619048e-05, + "loss": 0.624, + "step": 2375 + }, + { + "epoch": 13.577142857142857, + "grad_norm": 37.756839752197266, + "learning_rate": 4.0469841269841274e-05, + "loss": 0.592, + "step": 2376 + }, + { + "epoch": 13.582857142857144, + "grad_norm": 64.60394287109375, + "learning_rate": 4.0463492063492066e-05, + "loss": 0.4058, + "step": 2377 + }, + { + "epoch": 13.588571428571429, + "grad_norm": 53.130348205566406, + "learning_rate": 4.045714285714286e-05, + "loss": 0.4623, + "step": 2378 + }, + { + "epoch": 13.594285714285714, + "grad_norm": 29.630857467651367, + "learning_rate": 4.045079365079365e-05, + "loss": 0.4196, + "step": 2379 + }, + { + "epoch": 13.6, + "grad_norm": 46.71171188354492, + "learning_rate": 4.0444444444444444e-05, + "loss": 0.5538, + "step": 2380 + }, + { + "epoch": 13.605714285714285, + "grad_norm": 38.45140838623047, + "learning_rate": 4.0438095238095244e-05, + "loss": 0.3889, + "step": 2381 + }, + { + "epoch": 13.611428571428572, + "grad_norm": 762.2544555664062, + "learning_rate": 4.043174603174603e-05, + "loss": 0.345, + "step": 2382 + }, + { + "epoch": 13.617142857142857, + "grad_norm": 403.2573547363281, + "learning_rate": 4.042539682539683e-05, + "loss": 0.3458, + "step": 2383 + }, + { + "epoch": 13.622857142857143, + "grad_norm": 40.29979705810547, + "learning_rate": 4.041904761904762e-05, + "loss": 0.2905, + "step": 2384 + }, + { + "epoch": 13.628571428571428, + "grad_norm": 56.68416213989258, + "learning_rate": 4.0412698412698414e-05, + "loss": 0.4291, + "step": 2385 + }, + { + "epoch": 13.634285714285713, + "grad_norm": 25.499067306518555, + "learning_rate": 4.040634920634921e-05, + "loss": 0.4255, + "step": 2386 + }, + { + "epoch": 13.64, + "grad_norm": 34.191986083984375, + "learning_rate": 4.0400000000000006e-05, + "loss": 0.5086, + "step": 2387 + }, + { + "epoch": 13.645714285714286, + "grad_norm": 21.267122268676758, + "learning_rate": 4.039365079365079e-05, + "loss": 0.5265, + "step": 2388 + }, + { + "epoch": 13.651428571428571, + "grad_norm": 33.64370346069336, + "learning_rate": 4.038730158730159e-05, + "loss": 0.5539, + "step": 2389 + }, + { + "epoch": 13.657142857142857, + "grad_norm": 43.58633804321289, + "learning_rate": 4.038095238095238e-05, + "loss": 0.3569, + "step": 2390 + }, + { + "epoch": 13.662857142857142, + "grad_norm": 57.552650451660156, + "learning_rate": 4.037460317460318e-05, + "loss": 0.331, + "step": 2391 + }, + { + "epoch": 13.668571428571429, + "grad_norm": 17.721899032592773, + "learning_rate": 4.036825396825397e-05, + "loss": 0.5111, + "step": 2392 + }, + { + "epoch": 13.674285714285714, + "grad_norm": 49.80946731567383, + "learning_rate": 4.036190476190476e-05, + "loss": 0.6951, + "step": 2393 + }, + { + "epoch": 13.68, + "grad_norm": 64.67133331298828, + "learning_rate": 4.0355555555555555e-05, + "loss": 0.4337, + "step": 2394 + }, + { + "epoch": 13.685714285714285, + "grad_norm": 32.472862243652344, + "learning_rate": 4.0349206349206354e-05, + "loss": 0.4215, + "step": 2395 + }, + { + "epoch": 13.691428571428572, + "grad_norm": 22.962915420532227, + "learning_rate": 4.034285714285715e-05, + "loss": 0.498, + "step": 2396 + }, + { + "epoch": 13.697142857142858, + "grad_norm": 287.5697021484375, + "learning_rate": 4.033650793650794e-05, + "loss": 0.4389, + "step": 2397 + }, + { + "epoch": 13.702857142857143, + "grad_norm": 145.85488891601562, + "learning_rate": 4.033015873015873e-05, + "loss": 0.3825, + "step": 2398 + }, + { + "epoch": 13.708571428571428, + "grad_norm": 22.51424217224121, + "learning_rate": 4.0323809523809525e-05, + "loss": 0.4401, + "step": 2399 + }, + { + "epoch": 13.714285714285714, + "grad_norm": 54.225807189941406, + "learning_rate": 4.031746031746032e-05, + "loss": 0.4892, + "step": 2400 + }, + { + "epoch": 13.72, + "grad_norm": 58.608455657958984, + "learning_rate": 4.031111111111111e-05, + "loss": 0.6965, + "step": 2401 + }, + { + "epoch": 13.725714285714286, + "grad_norm": 97.2522964477539, + "learning_rate": 4.030476190476191e-05, + "loss": 0.5691, + "step": 2402 + }, + { + "epoch": 13.731428571428571, + "grad_norm": 44.312068939208984, + "learning_rate": 4.02984126984127e-05, + "loss": 0.3953, + "step": 2403 + }, + { + "epoch": 13.737142857142857, + "grad_norm": 31.484203338623047, + "learning_rate": 4.0292063492063495e-05, + "loss": 0.6223, + "step": 2404 + }, + { + "epoch": 13.742857142857144, + "grad_norm": 37.49378204345703, + "learning_rate": 4.028571428571429e-05, + "loss": 0.4558, + "step": 2405 + }, + { + "epoch": 13.748571428571429, + "grad_norm": 23.190092086791992, + "learning_rate": 4.027936507936509e-05, + "loss": 0.4926, + "step": 2406 + }, + { + "epoch": 13.754285714285714, + "grad_norm": 51.90544509887695, + "learning_rate": 4.027301587301587e-05, + "loss": 0.5645, + "step": 2407 + }, + { + "epoch": 13.76, + "grad_norm": 56.01433181762695, + "learning_rate": 4.026666666666667e-05, + "loss": 0.4741, + "step": 2408 + }, + { + "epoch": 13.765714285714285, + "grad_norm": 26.729398727416992, + "learning_rate": 4.0260317460317465e-05, + "loss": 0.3693, + "step": 2409 + }, + { + "epoch": 13.771428571428572, + "grad_norm": 66.43302917480469, + "learning_rate": 4.025396825396826e-05, + "loss": 0.4163, + "step": 2410 + }, + { + "epoch": 13.777142857142858, + "grad_norm": 73.76277923583984, + "learning_rate": 4.024761904761905e-05, + "loss": 0.3351, + "step": 2411 + }, + { + "epoch": 13.782857142857143, + "grad_norm": 24.860280990600586, + "learning_rate": 4.024126984126984e-05, + "loss": 0.37, + "step": 2412 + }, + { + "epoch": 13.788571428571428, + "grad_norm": 24.26219367980957, + "learning_rate": 4.0234920634920635e-05, + "loss": 0.5209, + "step": 2413 + }, + { + "epoch": 13.794285714285714, + "grad_norm": 75.13851165771484, + "learning_rate": 4.0228571428571434e-05, + "loss": 0.4331, + "step": 2414 + }, + { + "epoch": 13.8, + "grad_norm": 76.9747314453125, + "learning_rate": 4.022222222222222e-05, + "loss": 0.6054, + "step": 2415 + }, + { + "epoch": 13.805714285714286, + "grad_norm": 67.45930480957031, + "learning_rate": 4.021587301587302e-05, + "loss": 0.5507, + "step": 2416 + }, + { + "epoch": 13.811428571428571, + "grad_norm": 60.373104095458984, + "learning_rate": 4.020952380952381e-05, + "loss": 0.3903, + "step": 2417 + }, + { + "epoch": 13.817142857142857, + "grad_norm": 35.6954345703125, + "learning_rate": 4.0203174603174605e-05, + "loss": 0.5955, + "step": 2418 + }, + { + "epoch": 13.822857142857142, + "grad_norm": 29.355072021484375, + "learning_rate": 4.01968253968254e-05, + "loss": 0.5763, + "step": 2419 + }, + { + "epoch": 13.82857142857143, + "grad_norm": 22.599504470825195, + "learning_rate": 4.01904761904762e-05, + "loss": 0.6903, + "step": 2420 + }, + { + "epoch": 13.834285714285715, + "grad_norm": 44.9334831237793, + "learning_rate": 4.018412698412698e-05, + "loss": 0.3831, + "step": 2421 + }, + { + "epoch": 13.84, + "grad_norm": 24.91976547241211, + "learning_rate": 4.017777777777778e-05, + "loss": 0.6801, + "step": 2422 + }, + { + "epoch": 13.845714285714285, + "grad_norm": 42.77363204956055, + "learning_rate": 4.017142857142857e-05, + "loss": 0.3493, + "step": 2423 + }, + { + "epoch": 13.85142857142857, + "grad_norm": 57.37465286254883, + "learning_rate": 4.016507936507937e-05, + "loss": 0.437, + "step": 2424 + }, + { + "epoch": 13.857142857142858, + "grad_norm": 47.02690887451172, + "learning_rate": 4.015873015873016e-05, + "loss": 0.427, + "step": 2425 + }, + { + "epoch": 13.862857142857143, + "grad_norm": 76.19194793701172, + "learning_rate": 4.015238095238095e-05, + "loss": 0.558, + "step": 2426 + }, + { + "epoch": 13.868571428571428, + "grad_norm": 29.325809478759766, + "learning_rate": 4.0146031746031746e-05, + "loss": 0.4203, + "step": 2427 + }, + { + "epoch": 13.874285714285714, + "grad_norm": 19.02168846130371, + "learning_rate": 4.0139682539682545e-05, + "loss": 0.4868, + "step": 2428 + }, + { + "epoch": 13.88, + "grad_norm": 26.71924591064453, + "learning_rate": 4.013333333333333e-05, + "loss": 0.5272, + "step": 2429 + }, + { + "epoch": 13.885714285714286, + "grad_norm": 34.36274337768555, + "learning_rate": 4.012698412698413e-05, + "loss": 0.5817, + "step": 2430 + }, + { + "epoch": 13.891428571428571, + "grad_norm": 41.352142333984375, + "learning_rate": 4.012063492063492e-05, + "loss": 0.3324, + "step": 2431 + }, + { + "epoch": 13.897142857142857, + "grad_norm": 52.86463165283203, + "learning_rate": 4.0114285714285715e-05, + "loss": 0.4886, + "step": 2432 + }, + { + "epoch": 13.902857142857142, + "grad_norm": 49.43160629272461, + "learning_rate": 4.010793650793651e-05, + "loss": 0.7166, + "step": 2433 + }, + { + "epoch": 13.90857142857143, + "grad_norm": 17.74382209777832, + "learning_rate": 4.01015873015873e-05, + "loss": 0.3946, + "step": 2434 + }, + { + "epoch": 13.914285714285715, + "grad_norm": 68.80399322509766, + "learning_rate": 4.00952380952381e-05, + "loss": 0.4132, + "step": 2435 + }, + { + "epoch": 13.92, + "grad_norm": 280.85858154296875, + "learning_rate": 4.008888888888889e-05, + "loss": 0.3927, + "step": 2436 + }, + { + "epoch": 13.925714285714285, + "grad_norm": 455.3421630859375, + "learning_rate": 4.0082539682539685e-05, + "loss": 0.418, + "step": 2437 + }, + { + "epoch": 13.93142857142857, + "grad_norm": 37.32456970214844, + "learning_rate": 4.007619047619048e-05, + "loss": 0.411, + "step": 2438 + }, + { + "epoch": 13.937142857142858, + "grad_norm": 86.37480926513672, + "learning_rate": 4.006984126984127e-05, + "loss": 0.4207, + "step": 2439 + }, + { + "epoch": 13.942857142857143, + "grad_norm": 148.76866149902344, + "learning_rate": 4.006349206349206e-05, + "loss": 0.538, + "step": 2440 + }, + { + "epoch": 13.948571428571428, + "grad_norm": 67.73564147949219, + "learning_rate": 4.005714285714286e-05, + "loss": 0.4706, + "step": 2441 + }, + { + "epoch": 13.954285714285714, + "grad_norm": 54.38566589355469, + "learning_rate": 4.0050793650793655e-05, + "loss": 0.7123, + "step": 2442 + }, + { + "epoch": 13.96, + "grad_norm": 50.352149963378906, + "learning_rate": 4.004444444444445e-05, + "loss": 0.6072, + "step": 2443 + }, + { + "epoch": 13.965714285714286, + "grad_norm": 234.63272094726562, + "learning_rate": 4.003809523809524e-05, + "loss": 0.4392, + "step": 2444 + }, + { + "epoch": 13.971428571428572, + "grad_norm": 58.83427047729492, + "learning_rate": 4.003174603174603e-05, + "loss": 0.4459, + "step": 2445 + }, + { + "epoch": 13.977142857142857, + "grad_norm": 28.319311141967773, + "learning_rate": 4.0025396825396826e-05, + "loss": 0.4224, + "step": 2446 + }, + { + "epoch": 13.982857142857142, + "grad_norm": 21.747053146362305, + "learning_rate": 4.0019047619047625e-05, + "loss": 0.675, + "step": 2447 + }, + { + "epoch": 13.98857142857143, + "grad_norm": 74.70608520507812, + "learning_rate": 4.001269841269841e-05, + "loss": 0.5733, + "step": 2448 + }, + { + "epoch": 13.994285714285715, + "grad_norm": 71.20796966552734, + "learning_rate": 4.000634920634921e-05, + "loss": 0.4269, + "step": 2449 + }, + { + "epoch": 14.0, + "grad_norm": 24.509675979614258, + "learning_rate": 4e-05, + "loss": 0.446, + "step": 2450 + }, + { + "epoch": 14.0, + "eval_classes": 0, + "eval_loss": 0.6337568759918213, + "eval_map": 0.9186, + "eval_map_50": 0.96, + "eval_map_75": 0.9528, + "eval_map_large": 0.9186, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9186, + "eval_map_small": -1.0, + "eval_mar_1": 0.786, + "eval_mar_10": 0.9676, + "eval_mar_100": 0.9752, + "eval_mar_100_per_class": 0.9752, + "eval_mar_large": 0.9752, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 15.4814, + "eval_samples_per_second": 18.991, + "eval_steps_per_second": 2.39, + "step": 2450 + }, + { + "epoch": 14.005714285714285, + "grad_norm": 36.81268310546875, + "learning_rate": 3.9993650793650796e-05, + "loss": 0.3741, + "step": 2451 + }, + { + "epoch": 14.01142857142857, + "grad_norm": 35.905799865722656, + "learning_rate": 3.998730158730159e-05, + "loss": 0.4541, + "step": 2452 + }, + { + "epoch": 14.017142857142858, + "grad_norm": 23.208505630493164, + "learning_rate": 3.998095238095239e-05, + "loss": 0.3826, + "step": 2453 + }, + { + "epoch": 14.022857142857143, + "grad_norm": 22.888587951660156, + "learning_rate": 3.9974603174603174e-05, + "loss": 0.5514, + "step": 2454 + }, + { + "epoch": 14.028571428571428, + "grad_norm": 41.91596603393555, + "learning_rate": 3.996825396825397e-05, + "loss": 0.3492, + "step": 2455 + }, + { + "epoch": 14.034285714285714, + "grad_norm": 34.92245864868164, + "learning_rate": 3.996190476190476e-05, + "loss": 0.4104, + "step": 2456 + }, + { + "epoch": 14.04, + "grad_norm": 29.703365325927734, + "learning_rate": 3.995555555555556e-05, + "loss": 0.4348, + "step": 2457 + }, + { + "epoch": 14.045714285714286, + "grad_norm": 60.400856018066406, + "learning_rate": 3.994920634920635e-05, + "loss": 0.515, + "step": 2458 + }, + { + "epoch": 14.051428571428572, + "grad_norm": 55.89780807495117, + "learning_rate": 3.9942857142857144e-05, + "loss": 0.5957, + "step": 2459 + }, + { + "epoch": 14.057142857142857, + "grad_norm": 40.55846405029297, + "learning_rate": 3.9936507936507936e-05, + "loss": 0.457, + "step": 2460 + }, + { + "epoch": 14.062857142857142, + "grad_norm": 109.13560485839844, + "learning_rate": 3.9930158730158736e-05, + "loss": 0.3506, + "step": 2461 + }, + { + "epoch": 14.06857142857143, + "grad_norm": 79.74500274658203, + "learning_rate": 3.992380952380952e-05, + "loss": 0.36, + "step": 2462 + }, + { + "epoch": 14.074285714285715, + "grad_norm": 20.207691192626953, + "learning_rate": 3.991746031746032e-05, + "loss": 0.6307, + "step": 2463 + }, + { + "epoch": 14.08, + "grad_norm": 68.04793548583984, + "learning_rate": 3.9911111111111114e-05, + "loss": 0.4574, + "step": 2464 + }, + { + "epoch": 14.085714285714285, + "grad_norm": 261.51654052734375, + "learning_rate": 3.9904761904761906e-05, + "loss": 0.4338, + "step": 2465 + }, + { + "epoch": 14.09142857142857, + "grad_norm": 40.91068649291992, + "learning_rate": 3.98984126984127e-05, + "loss": 0.5558, + "step": 2466 + }, + { + "epoch": 14.097142857142858, + "grad_norm": 30.04355239868164, + "learning_rate": 3.989206349206349e-05, + "loss": 0.3277, + "step": 2467 + }, + { + "epoch": 14.102857142857143, + "grad_norm": 37.32410430908203, + "learning_rate": 3.9885714285714284e-05, + "loss": 0.5457, + "step": 2468 + }, + { + "epoch": 14.108571428571429, + "grad_norm": 32.55274963378906, + "learning_rate": 3.9879365079365084e-05, + "loss": 0.463, + "step": 2469 + }, + { + "epoch": 14.114285714285714, + "grad_norm": 19.690839767456055, + "learning_rate": 3.9873015873015876e-05, + "loss": 0.4994, + "step": 2470 + }, + { + "epoch": 14.12, + "grad_norm": 38.527259826660156, + "learning_rate": 3.986666666666667e-05, + "loss": 0.4435, + "step": 2471 + }, + { + "epoch": 14.125714285714286, + "grad_norm": 41.78245544433594, + "learning_rate": 3.986031746031746e-05, + "loss": 0.465, + "step": 2472 + }, + { + "epoch": 14.131428571428572, + "grad_norm": 34.67498016357422, + "learning_rate": 3.9853968253968254e-05, + "loss": 0.44, + "step": 2473 + }, + { + "epoch": 14.137142857142857, + "grad_norm": 27.37482261657715, + "learning_rate": 3.9847619047619054e-05, + "loss": 0.3372, + "step": 2474 + }, + { + "epoch": 14.142857142857142, + "grad_norm": 156.32009887695312, + "learning_rate": 3.984126984126984e-05, + "loss": 0.5525, + "step": 2475 + }, + { + "epoch": 14.14857142857143, + "grad_norm": 45.042030334472656, + "learning_rate": 3.983492063492064e-05, + "loss": 0.3852, + "step": 2476 + }, + { + "epoch": 14.154285714285715, + "grad_norm": 93.20071411132812, + "learning_rate": 3.982857142857143e-05, + "loss": 0.4532, + "step": 2477 + }, + { + "epoch": 14.16, + "grad_norm": 54.58212661743164, + "learning_rate": 3.9822222222222224e-05, + "loss": 0.3697, + "step": 2478 + }, + { + "epoch": 14.165714285714285, + "grad_norm": 74.4084243774414, + "learning_rate": 3.981587301587302e-05, + "loss": 0.4921, + "step": 2479 + }, + { + "epoch": 14.17142857142857, + "grad_norm": 70.5937728881836, + "learning_rate": 3.9809523809523816e-05, + "loss": 0.7412, + "step": 2480 + }, + { + "epoch": 14.177142857142858, + "grad_norm": 85.14163208007812, + "learning_rate": 3.98031746031746e-05, + "loss": 0.6297, + "step": 2481 + }, + { + "epoch": 14.182857142857143, + "grad_norm": 29.404062271118164, + "learning_rate": 3.97968253968254e-05, + "loss": 0.615, + "step": 2482 + }, + { + "epoch": 14.188571428571429, + "grad_norm": 284.3392028808594, + "learning_rate": 3.9790476190476194e-05, + "loss": 0.4211, + "step": 2483 + }, + { + "epoch": 14.194285714285714, + "grad_norm": 46.706485748291016, + "learning_rate": 3.978412698412699e-05, + "loss": 0.3809, + "step": 2484 + }, + { + "epoch": 14.2, + "grad_norm": 40.72541046142578, + "learning_rate": 3.977777777777778e-05, + "loss": 0.4838, + "step": 2485 + }, + { + "epoch": 14.205714285714286, + "grad_norm": 24.260883331298828, + "learning_rate": 3.977142857142857e-05, + "loss": 0.4957, + "step": 2486 + }, + { + "epoch": 14.211428571428572, + "grad_norm": 29.26984977722168, + "learning_rate": 3.9765079365079365e-05, + "loss": 0.429, + "step": 2487 + }, + { + "epoch": 14.217142857142857, + "grad_norm": 101.20543670654297, + "learning_rate": 3.9758730158730164e-05, + "loss": 0.4955, + "step": 2488 + }, + { + "epoch": 14.222857142857142, + "grad_norm": 28.793743133544922, + "learning_rate": 3.975238095238095e-05, + "loss": 0.5254, + "step": 2489 + }, + { + "epoch": 14.228571428571428, + "grad_norm": 84.4249267578125, + "learning_rate": 3.974603174603175e-05, + "loss": 0.3372, + "step": 2490 + }, + { + "epoch": 14.234285714285715, + "grad_norm": 65.11558532714844, + "learning_rate": 3.973968253968254e-05, + "loss": 0.5795, + "step": 2491 + }, + { + "epoch": 14.24, + "grad_norm": 51.24958419799805, + "learning_rate": 3.9733333333333335e-05, + "loss": 0.5393, + "step": 2492 + }, + { + "epoch": 14.245714285714286, + "grad_norm": 37.17159652709961, + "learning_rate": 3.972698412698413e-05, + "loss": 0.5153, + "step": 2493 + }, + { + "epoch": 14.251428571428571, + "grad_norm": 28.747060775756836, + "learning_rate": 3.972063492063493e-05, + "loss": 0.5044, + "step": 2494 + }, + { + "epoch": 14.257142857142856, + "grad_norm": 29.33413314819336, + "learning_rate": 3.971428571428571e-05, + "loss": 0.602, + "step": 2495 + }, + { + "epoch": 14.262857142857143, + "grad_norm": 70.5453872680664, + "learning_rate": 3.970793650793651e-05, + "loss": 0.5681, + "step": 2496 + }, + { + "epoch": 14.268571428571429, + "grad_norm": 44.95606994628906, + "learning_rate": 3.97015873015873e-05, + "loss": 0.3336, + "step": 2497 + }, + { + "epoch": 14.274285714285714, + "grad_norm": 47.51212692260742, + "learning_rate": 3.96952380952381e-05, + "loss": 0.5591, + "step": 2498 + }, + { + "epoch": 14.28, + "grad_norm": 39.193973541259766, + "learning_rate": 3.968888888888889e-05, + "loss": 0.3975, + "step": 2499 + }, + { + "epoch": 14.285714285714286, + "grad_norm": 151.0368194580078, + "learning_rate": 3.968253968253968e-05, + "loss": 0.4476, + "step": 2500 + }, + { + "epoch": 14.291428571428572, + "grad_norm": 27.16161346435547, + "learning_rate": 3.9676190476190475e-05, + "loss": 0.3779, + "step": 2501 + }, + { + "epoch": 14.297142857142857, + "grad_norm": 35.30384826660156, + "learning_rate": 3.9669841269841275e-05, + "loss": 0.5124, + "step": 2502 + }, + { + "epoch": 14.302857142857142, + "grad_norm": 26.686216354370117, + "learning_rate": 3.966349206349206e-05, + "loss": 0.4631, + "step": 2503 + }, + { + "epoch": 14.308571428571428, + "grad_norm": 26.295207977294922, + "learning_rate": 3.965714285714286e-05, + "loss": 0.4499, + "step": 2504 + }, + { + "epoch": 14.314285714285715, + "grad_norm": 33.198848724365234, + "learning_rate": 3.965079365079365e-05, + "loss": 0.408, + "step": 2505 + }, + { + "epoch": 14.32, + "grad_norm": 29.399959564208984, + "learning_rate": 3.9644444444444445e-05, + "loss": 0.4149, + "step": 2506 + }, + { + "epoch": 14.325714285714286, + "grad_norm": 142.16004943847656, + "learning_rate": 3.963809523809524e-05, + "loss": 0.3737, + "step": 2507 + }, + { + "epoch": 14.331428571428571, + "grad_norm": 22.166606903076172, + "learning_rate": 3.963174603174603e-05, + "loss": 0.5191, + "step": 2508 + }, + { + "epoch": 14.337142857142856, + "grad_norm": 45.35384750366211, + "learning_rate": 3.962539682539683e-05, + "loss": 0.3282, + "step": 2509 + }, + { + "epoch": 14.342857142857143, + "grad_norm": 34.01889419555664, + "learning_rate": 3.961904761904762e-05, + "loss": 0.4813, + "step": 2510 + }, + { + "epoch": 14.348571428571429, + "grad_norm": 40.84413146972656, + "learning_rate": 3.9612698412698415e-05, + "loss": 0.6772, + "step": 2511 + }, + { + "epoch": 14.354285714285714, + "grad_norm": 29.83700180053711, + "learning_rate": 3.960634920634921e-05, + "loss": 0.3248, + "step": 2512 + }, + { + "epoch": 14.36, + "grad_norm": 51.366172790527344, + "learning_rate": 3.960000000000001e-05, + "loss": 0.3728, + "step": 2513 + }, + { + "epoch": 14.365714285714287, + "grad_norm": 54.66666793823242, + "learning_rate": 3.959365079365079e-05, + "loss": 0.319, + "step": 2514 + }, + { + "epoch": 14.371428571428572, + "grad_norm": 91.00899505615234, + "learning_rate": 3.958730158730159e-05, + "loss": 0.5796, + "step": 2515 + }, + { + "epoch": 14.377142857142857, + "grad_norm": 48.039817810058594, + "learning_rate": 3.9580952380952385e-05, + "loss": 0.4446, + "step": 2516 + }, + { + "epoch": 14.382857142857143, + "grad_norm": 38.14234924316406, + "learning_rate": 3.957460317460318e-05, + "loss": 0.5097, + "step": 2517 + }, + { + "epoch": 14.388571428571428, + "grad_norm": 43.835147857666016, + "learning_rate": 3.956825396825397e-05, + "loss": 0.4423, + "step": 2518 + }, + { + "epoch": 14.394285714285715, + "grad_norm": 46.32112121582031, + "learning_rate": 3.956190476190476e-05, + "loss": 0.4711, + "step": 2519 + }, + { + "epoch": 14.4, + "grad_norm": 204.66468811035156, + "learning_rate": 3.9555555555555556e-05, + "loss": 0.6653, + "step": 2520 + }, + { + "epoch": 14.405714285714286, + "grad_norm": 56.643836975097656, + "learning_rate": 3.9549206349206355e-05, + "loss": 0.5713, + "step": 2521 + }, + { + "epoch": 14.411428571428571, + "grad_norm": 56.983455657958984, + "learning_rate": 3.954285714285714e-05, + "loss": 0.4003, + "step": 2522 + }, + { + "epoch": 14.417142857142856, + "grad_norm": 100.86315155029297, + "learning_rate": 3.953650793650794e-05, + "loss": 0.3291, + "step": 2523 + }, + { + "epoch": 14.422857142857143, + "grad_norm": 77.1080093383789, + "learning_rate": 3.953015873015873e-05, + "loss": 0.3666, + "step": 2524 + }, + { + "epoch": 14.428571428571429, + "grad_norm": 35.71822738647461, + "learning_rate": 3.9523809523809526e-05, + "loss": 0.3828, + "step": 2525 + }, + { + "epoch": 14.434285714285714, + "grad_norm": 60.82173538208008, + "learning_rate": 3.951746031746032e-05, + "loss": 0.5301, + "step": 2526 + }, + { + "epoch": 14.44, + "grad_norm": 37.55204391479492, + "learning_rate": 3.951111111111112e-05, + "loss": 0.3292, + "step": 2527 + }, + { + "epoch": 14.445714285714285, + "grad_norm": 93.9327621459961, + "learning_rate": 3.9504761904761903e-05, + "loss": 0.5047, + "step": 2528 + }, + { + "epoch": 14.451428571428572, + "grad_norm": 46.62969970703125, + "learning_rate": 3.94984126984127e-05, + "loss": 0.3249, + "step": 2529 + }, + { + "epoch": 14.457142857142857, + "grad_norm": 72.3116683959961, + "learning_rate": 3.949206349206349e-05, + "loss": 0.4743, + "step": 2530 + }, + { + "epoch": 14.462857142857143, + "grad_norm": 55.59286117553711, + "learning_rate": 3.948571428571429e-05, + "loss": 0.537, + "step": 2531 + }, + { + "epoch": 14.468571428571428, + "grad_norm": 44.294151306152344, + "learning_rate": 3.947936507936508e-05, + "loss": 0.4722, + "step": 2532 + }, + { + "epoch": 14.474285714285715, + "grad_norm": 50.04436111450195, + "learning_rate": 3.947301587301587e-05, + "loss": 0.6354, + "step": 2533 + }, + { + "epoch": 14.48, + "grad_norm": 82.75171661376953, + "learning_rate": 3.9466666666666666e-05, + "loss": 0.4525, + "step": 2534 + }, + { + "epoch": 14.485714285714286, + "grad_norm": 65.42272186279297, + "learning_rate": 3.9460317460317465e-05, + "loss": 0.3971, + "step": 2535 + }, + { + "epoch": 14.491428571428571, + "grad_norm": 219.6083984375, + "learning_rate": 3.945396825396825e-05, + "loss": 0.46, + "step": 2536 + }, + { + "epoch": 14.497142857142856, + "grad_norm": 32.19956588745117, + "learning_rate": 3.944761904761905e-05, + "loss": 0.4132, + "step": 2537 + }, + { + "epoch": 14.502857142857144, + "grad_norm": 42.3463134765625, + "learning_rate": 3.944126984126984e-05, + "loss": 0.4181, + "step": 2538 + }, + { + "epoch": 14.508571428571429, + "grad_norm": 28.293188095092773, + "learning_rate": 3.9434920634920636e-05, + "loss": 0.3657, + "step": 2539 + }, + { + "epoch": 14.514285714285714, + "grad_norm": 76.0169906616211, + "learning_rate": 3.942857142857143e-05, + "loss": 0.4506, + "step": 2540 + }, + { + "epoch": 14.52, + "grad_norm": 35.1695442199707, + "learning_rate": 3.942222222222222e-05, + "loss": 0.4247, + "step": 2541 + }, + { + "epoch": 14.525714285714285, + "grad_norm": 75.5692367553711, + "learning_rate": 3.9415873015873014e-05, + "loss": 0.542, + "step": 2542 + }, + { + "epoch": 14.531428571428572, + "grad_norm": 32.895774841308594, + "learning_rate": 3.940952380952381e-05, + "loss": 0.4416, + "step": 2543 + }, + { + "epoch": 14.537142857142857, + "grad_norm": 65.54524230957031, + "learning_rate": 3.9403174603174606e-05, + "loss": 0.3774, + "step": 2544 + }, + { + "epoch": 14.542857142857143, + "grad_norm": 31.3699893951416, + "learning_rate": 3.93968253968254e-05, + "loss": 0.4871, + "step": 2545 + }, + { + "epoch": 14.548571428571428, + "grad_norm": 37.57272720336914, + "learning_rate": 3.939047619047619e-05, + "loss": 0.4938, + "step": 2546 + }, + { + "epoch": 14.554285714285715, + "grad_norm": 81.45382690429688, + "learning_rate": 3.9384126984126984e-05, + "loss": 0.4315, + "step": 2547 + }, + { + "epoch": 14.56, + "grad_norm": 54.47109603881836, + "learning_rate": 3.937777777777778e-05, + "loss": 0.4103, + "step": 2548 + }, + { + "epoch": 14.565714285714286, + "grad_norm": 62.30387496948242, + "learning_rate": 3.9371428571428576e-05, + "loss": 0.3809, + "step": 2549 + }, + { + "epoch": 14.571428571428571, + "grad_norm": 38.18788528442383, + "learning_rate": 3.936507936507937e-05, + "loss": 0.3779, + "step": 2550 + }, + { + "epoch": 14.577142857142857, + "grad_norm": 110.54651641845703, + "learning_rate": 3.935873015873016e-05, + "loss": 0.4439, + "step": 2551 + }, + { + "epoch": 14.582857142857144, + "grad_norm": 64.4066162109375, + "learning_rate": 3.9352380952380954e-05, + "loss": 0.4098, + "step": 2552 + }, + { + "epoch": 14.588571428571429, + "grad_norm": 86.1274185180664, + "learning_rate": 3.9346031746031746e-05, + "loss": 0.3765, + "step": 2553 + }, + { + "epoch": 14.594285714285714, + "grad_norm": 41.547847747802734, + "learning_rate": 3.9339682539682546e-05, + "loss": 0.4719, + "step": 2554 + }, + { + "epoch": 14.6, + "grad_norm": 43.89437484741211, + "learning_rate": 3.933333333333333e-05, + "loss": 0.3632, + "step": 2555 + }, + { + "epoch": 14.605714285714285, + "grad_norm": 64.66832733154297, + "learning_rate": 3.932698412698413e-05, + "loss": 0.4349, + "step": 2556 + }, + { + "epoch": 14.611428571428572, + "grad_norm": 62.122745513916016, + "learning_rate": 3.9320634920634924e-05, + "loss": 0.3536, + "step": 2557 + }, + { + "epoch": 14.617142857142857, + "grad_norm": 51.276641845703125, + "learning_rate": 3.9314285714285716e-05, + "loss": 0.4425, + "step": 2558 + }, + { + "epoch": 14.622857142857143, + "grad_norm": 45.557525634765625, + "learning_rate": 3.930793650793651e-05, + "loss": 0.4156, + "step": 2559 + }, + { + "epoch": 14.628571428571428, + "grad_norm": 48.128719329833984, + "learning_rate": 3.930158730158731e-05, + "loss": 0.4061, + "step": 2560 + }, + { + "epoch": 14.634285714285713, + "grad_norm": 35.2762565612793, + "learning_rate": 3.9295238095238094e-05, + "loss": 0.4133, + "step": 2561 + }, + { + "epoch": 14.64, + "grad_norm": 50.81557846069336, + "learning_rate": 3.9288888888888894e-05, + "loss": 0.3663, + "step": 2562 + }, + { + "epoch": 14.645714285714286, + "grad_norm": 115.70230865478516, + "learning_rate": 3.928253968253968e-05, + "loss": 0.4082, + "step": 2563 + }, + { + "epoch": 14.651428571428571, + "grad_norm": 40.494483947753906, + "learning_rate": 3.927619047619048e-05, + "loss": 0.396, + "step": 2564 + }, + { + "epoch": 14.657142857142857, + "grad_norm": 83.41320037841797, + "learning_rate": 3.926984126984127e-05, + "loss": 0.6076, + "step": 2565 + }, + { + "epoch": 14.662857142857142, + "grad_norm": 57.01734924316406, + "learning_rate": 3.9263492063492064e-05, + "loss": 0.3043, + "step": 2566 + }, + { + "epoch": 14.668571428571429, + "grad_norm": 89.8864974975586, + "learning_rate": 3.925714285714286e-05, + "loss": 0.3443, + "step": 2567 + }, + { + "epoch": 14.674285714285714, + "grad_norm": 39.41927719116211, + "learning_rate": 3.9250793650793656e-05, + "loss": 0.5277, + "step": 2568 + }, + { + "epoch": 14.68, + "grad_norm": 67.12992095947266, + "learning_rate": 3.924444444444444e-05, + "loss": 0.582, + "step": 2569 + }, + { + "epoch": 14.685714285714285, + "grad_norm": 29.401214599609375, + "learning_rate": 3.923809523809524e-05, + "loss": 0.379, + "step": 2570 + }, + { + "epoch": 14.691428571428572, + "grad_norm": 37.86322021484375, + "learning_rate": 3.9231746031746034e-05, + "loss": 0.5949, + "step": 2571 + }, + { + "epoch": 14.697142857142858, + "grad_norm": 20.0744686126709, + "learning_rate": 3.922539682539683e-05, + "loss": 0.3908, + "step": 2572 + }, + { + "epoch": 14.702857142857143, + "grad_norm": 48.950862884521484, + "learning_rate": 3.921904761904762e-05, + "loss": 0.4258, + "step": 2573 + }, + { + "epoch": 14.708571428571428, + "grad_norm": 319.82684326171875, + "learning_rate": 3.921269841269841e-05, + "loss": 0.4514, + "step": 2574 + }, + { + "epoch": 14.714285714285714, + "grad_norm": 31.71880531311035, + "learning_rate": 3.9206349206349205e-05, + "loss": 0.493, + "step": 2575 + }, + { + "epoch": 14.72, + "grad_norm": 59.9763298034668, + "learning_rate": 3.9200000000000004e-05, + "loss": 0.4182, + "step": 2576 + }, + { + "epoch": 14.725714285714286, + "grad_norm": 94.82324981689453, + "learning_rate": 3.91936507936508e-05, + "loss": 0.3055, + "step": 2577 + }, + { + "epoch": 14.731428571428571, + "grad_norm": 37.82186508178711, + "learning_rate": 3.918730158730159e-05, + "loss": 0.4299, + "step": 2578 + }, + { + "epoch": 14.737142857142857, + "grad_norm": 39.15916442871094, + "learning_rate": 3.918095238095238e-05, + "loss": 0.5593, + "step": 2579 + }, + { + "epoch": 14.742857142857144, + "grad_norm": 35.86741638183594, + "learning_rate": 3.9174603174603175e-05, + "loss": 0.3602, + "step": 2580 + }, + { + "epoch": 14.748571428571429, + "grad_norm": 37.261138916015625, + "learning_rate": 3.916825396825397e-05, + "loss": 0.4795, + "step": 2581 + }, + { + "epoch": 14.754285714285714, + "grad_norm": 33.5512809753418, + "learning_rate": 3.916190476190477e-05, + "loss": 0.3735, + "step": 2582 + }, + { + "epoch": 14.76, + "grad_norm": 49.997344970703125, + "learning_rate": 3.915555555555556e-05, + "loss": 0.3427, + "step": 2583 + }, + { + "epoch": 14.765714285714285, + "grad_norm": 29.823055267333984, + "learning_rate": 3.914920634920635e-05, + "loss": 0.4592, + "step": 2584 + }, + { + "epoch": 14.771428571428572, + "grad_norm": 33.76132583618164, + "learning_rate": 3.9142857142857145e-05, + "loss": 0.3816, + "step": 2585 + }, + { + "epoch": 14.777142857142858, + "grad_norm": 24.215843200683594, + "learning_rate": 3.913650793650794e-05, + "loss": 0.3551, + "step": 2586 + }, + { + "epoch": 14.782857142857143, + "grad_norm": 1256.5478515625, + "learning_rate": 3.913015873015874e-05, + "loss": 0.4059, + "step": 2587 + }, + { + "epoch": 14.788571428571428, + "grad_norm": 36.900177001953125, + "learning_rate": 3.912380952380952e-05, + "loss": 0.4487, + "step": 2588 + }, + { + "epoch": 14.794285714285714, + "grad_norm": 633.5703735351562, + "learning_rate": 3.911746031746032e-05, + "loss": 0.3842, + "step": 2589 + }, + { + "epoch": 14.8, + "grad_norm": 92.28624725341797, + "learning_rate": 3.9111111111111115e-05, + "loss": 0.4321, + "step": 2590 + }, + { + "epoch": 14.805714285714286, + "grad_norm": 21.139535903930664, + "learning_rate": 3.910476190476191e-05, + "loss": 0.3397, + "step": 2591 + }, + { + "epoch": 14.811428571428571, + "grad_norm": 33.3880500793457, + "learning_rate": 3.90984126984127e-05, + "loss": 0.5109, + "step": 2592 + }, + { + "epoch": 14.817142857142857, + "grad_norm": 60.31396484375, + "learning_rate": 3.90920634920635e-05, + "loss": 0.4734, + "step": 2593 + }, + { + "epoch": 14.822857142857142, + "grad_norm": 35.14171600341797, + "learning_rate": 3.9085714285714285e-05, + "loss": 0.4481, + "step": 2594 + }, + { + "epoch": 14.82857142857143, + "grad_norm": 40.16318130493164, + "learning_rate": 3.9079365079365085e-05, + "loss": 0.3135, + "step": 2595 + }, + { + "epoch": 14.834285714285715, + "grad_norm": 45.186973571777344, + "learning_rate": 3.907301587301587e-05, + "loss": 0.468, + "step": 2596 + }, + { + "epoch": 14.84, + "grad_norm": 55.854042053222656, + "learning_rate": 3.906666666666667e-05, + "loss": 0.4176, + "step": 2597 + }, + { + "epoch": 14.845714285714285, + "grad_norm": 44.230403900146484, + "learning_rate": 3.906031746031746e-05, + "loss": 0.2856, + "step": 2598 + }, + { + "epoch": 14.85142857142857, + "grad_norm": 43.87007141113281, + "learning_rate": 3.9053968253968255e-05, + "loss": 0.4488, + "step": 2599 + }, + { + "epoch": 14.857142857142858, + "grad_norm": 39.99993896484375, + "learning_rate": 3.904761904761905e-05, + "loss": 0.4659, + "step": 2600 + }, + { + "epoch": 14.862857142857143, + "grad_norm": 38.486351013183594, + "learning_rate": 3.904126984126985e-05, + "loss": 0.4924, + "step": 2601 + }, + { + "epoch": 14.868571428571428, + "grad_norm": 58.42378616333008, + "learning_rate": 3.903492063492063e-05, + "loss": 0.3247, + "step": 2602 + }, + { + "epoch": 14.874285714285714, + "grad_norm": 26.708703994750977, + "learning_rate": 3.902857142857143e-05, + "loss": 0.3187, + "step": 2603 + }, + { + "epoch": 14.88, + "grad_norm": 54.7596549987793, + "learning_rate": 3.9022222222222225e-05, + "loss": 0.311, + "step": 2604 + }, + { + "epoch": 14.885714285714286, + "grad_norm": 53.620750427246094, + "learning_rate": 3.901587301587302e-05, + "loss": 0.3484, + "step": 2605 + }, + { + "epoch": 14.891428571428571, + "grad_norm": 28.88561248779297, + "learning_rate": 3.900952380952381e-05, + "loss": 0.3119, + "step": 2606 + }, + { + "epoch": 14.897142857142857, + "grad_norm": 26.30694580078125, + "learning_rate": 3.90031746031746e-05, + "loss": 0.3085, + "step": 2607 + }, + { + "epoch": 14.902857142857142, + "grad_norm": 37.05280685424805, + "learning_rate": 3.8996825396825396e-05, + "loss": 0.4501, + "step": 2608 + }, + { + "epoch": 14.90857142857143, + "grad_norm": 61.25300598144531, + "learning_rate": 3.8990476190476195e-05, + "loss": 0.6067, + "step": 2609 + }, + { + "epoch": 14.914285714285715, + "grad_norm": 79.2222900390625, + "learning_rate": 3.898412698412698e-05, + "loss": 0.3847, + "step": 2610 + }, + { + "epoch": 14.92, + "grad_norm": 79.72723388671875, + "learning_rate": 3.897777777777778e-05, + "loss": 0.4162, + "step": 2611 + }, + { + "epoch": 14.925714285714285, + "grad_norm": 38.832340240478516, + "learning_rate": 3.897142857142857e-05, + "loss": 0.4211, + "step": 2612 + }, + { + "epoch": 14.93142857142857, + "grad_norm": 69.52618408203125, + "learning_rate": 3.8965079365079366e-05, + "loss": 0.4996, + "step": 2613 + }, + { + "epoch": 14.937142857142858, + "grad_norm": 56.463932037353516, + "learning_rate": 3.895873015873016e-05, + "loss": 0.4568, + "step": 2614 + }, + { + "epoch": 14.942857142857143, + "grad_norm": 30.429393768310547, + "learning_rate": 3.895238095238096e-05, + "loss": 0.3387, + "step": 2615 + }, + { + "epoch": 14.948571428571428, + "grad_norm": 660.6868896484375, + "learning_rate": 3.894603174603175e-05, + "loss": 0.3617, + "step": 2616 + }, + { + "epoch": 14.954285714285714, + "grad_norm": 27.04292106628418, + "learning_rate": 3.893968253968254e-05, + "loss": 0.6176, + "step": 2617 + }, + { + "epoch": 14.96, + "grad_norm": 30.616981506347656, + "learning_rate": 3.8933333333333336e-05, + "loss": 0.4639, + "step": 2618 + }, + { + "epoch": 14.965714285714286, + "grad_norm": 47.954803466796875, + "learning_rate": 3.892698412698413e-05, + "loss": 0.3035, + "step": 2619 + }, + { + "epoch": 14.971428571428572, + "grad_norm": 77.13597106933594, + "learning_rate": 3.892063492063492e-05, + "loss": 0.3525, + "step": 2620 + }, + { + "epoch": 14.977142857142857, + "grad_norm": 94.75045013427734, + "learning_rate": 3.8914285714285713e-05, + "loss": 0.2897, + "step": 2621 + }, + { + "epoch": 14.982857142857142, + "grad_norm": 64.9283218383789, + "learning_rate": 3.890793650793651e-05, + "loss": 0.4123, + "step": 2622 + }, + { + "epoch": 14.98857142857143, + "grad_norm": 51.37266540527344, + "learning_rate": 3.8901587301587305e-05, + "loss": 0.6112, + "step": 2623 + }, + { + "epoch": 14.994285714285715, + "grad_norm": 54.290157318115234, + "learning_rate": 3.88952380952381e-05, + "loss": 0.2984, + "step": 2624 + }, + { + "epoch": 15.0, + "grad_norm": 107.26709747314453, + "learning_rate": 3.888888888888889e-05, + "loss": 0.3927, + "step": 2625 + }, + { + "epoch": 15.0, + "eval_classes": 0, + "eval_loss": 0.6049719452857971, + "eval_map": 0.9119, + "eval_map_50": 0.9623, + "eval_map_75": 0.9503, + "eval_map_large": 0.912, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9119, + "eval_map_small": -1.0, + "eval_mar_1": 0.781, + "eval_mar_10": 0.9657, + "eval_mar_100": 0.9695, + "eval_mar_100_per_class": 0.9695, + "eval_mar_large": 0.9695, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.7442, + "eval_samples_per_second": 21.391, + "eval_steps_per_second": 2.692, + "step": 2625 + }, + { + "epoch": 15.005714285714285, + "grad_norm": 75.1425552368164, + "learning_rate": 3.888253968253969e-05, + "loss": 0.4931, + "step": 2626 + }, + { + "epoch": 15.01142857142857, + "grad_norm": 37.94574737548828, + "learning_rate": 3.8876190476190476e-05, + "loss": 0.2928, + "step": 2627 + }, + { + "epoch": 15.017142857142858, + "grad_norm": 24.948440551757812, + "learning_rate": 3.8869841269841275e-05, + "loss": 0.3102, + "step": 2628 + }, + { + "epoch": 15.022857142857143, + "grad_norm": 28.053247451782227, + "learning_rate": 3.886349206349206e-05, + "loss": 0.3639, + "step": 2629 + }, + { + "epoch": 15.028571428571428, + "grad_norm": 34.28338623046875, + "learning_rate": 3.885714285714286e-05, + "loss": 0.2893, + "step": 2630 + }, + { + "epoch": 15.034285714285714, + "grad_norm": 223.83596801757812, + "learning_rate": 3.885079365079365e-05, + "loss": 0.4726, + "step": 2631 + }, + { + "epoch": 15.04, + "grad_norm": 40.889259338378906, + "learning_rate": 3.8844444444444446e-05, + "loss": 0.485, + "step": 2632 + }, + { + "epoch": 15.045714285714286, + "grad_norm": 44.691932678222656, + "learning_rate": 3.883809523809524e-05, + "loss": 0.3396, + "step": 2633 + }, + { + "epoch": 15.051428571428572, + "grad_norm": 35.61066818237305, + "learning_rate": 3.883174603174604e-05, + "loss": 0.4293, + "step": 2634 + }, + { + "epoch": 15.057142857142857, + "grad_norm": 260.98980712890625, + "learning_rate": 3.8825396825396824e-05, + "loss": 0.3622, + "step": 2635 + }, + { + "epoch": 15.062857142857142, + "grad_norm": 49.225555419921875, + "learning_rate": 3.881904761904762e-05, + "loss": 0.2627, + "step": 2636 + }, + { + "epoch": 15.06857142857143, + "grad_norm": 70.27244567871094, + "learning_rate": 3.8812698412698416e-05, + "loss": 0.4287, + "step": 2637 + }, + { + "epoch": 15.074285714285715, + "grad_norm": 42.6537971496582, + "learning_rate": 3.880634920634921e-05, + "loss": 0.315, + "step": 2638 + }, + { + "epoch": 15.08, + "grad_norm": 47.73801040649414, + "learning_rate": 3.88e-05, + "loss": 0.3974, + "step": 2639 + }, + { + "epoch": 15.085714285714285, + "grad_norm": 102.1975326538086, + "learning_rate": 3.8793650793650794e-05, + "loss": 0.4949, + "step": 2640 + }, + { + "epoch": 15.09142857142857, + "grad_norm": 236.9488525390625, + "learning_rate": 3.8787301587301586e-05, + "loss": 0.322, + "step": 2641 + }, + { + "epoch": 15.097142857142858, + "grad_norm": 187.68399047851562, + "learning_rate": 3.8780952380952386e-05, + "loss": 0.3594, + "step": 2642 + }, + { + "epoch": 15.102857142857143, + "grad_norm": 142.1114044189453, + "learning_rate": 3.877460317460317e-05, + "loss": 0.4132, + "step": 2643 + }, + { + "epoch": 15.108571428571429, + "grad_norm": 33.342689514160156, + "learning_rate": 3.876825396825397e-05, + "loss": 0.5165, + "step": 2644 + }, + { + "epoch": 15.114285714285714, + "grad_norm": 34.99799728393555, + "learning_rate": 3.8761904761904764e-05, + "loss": 0.3033, + "step": 2645 + }, + { + "epoch": 15.12, + "grad_norm": 33.82915496826172, + "learning_rate": 3.8755555555555556e-05, + "loss": 0.4889, + "step": 2646 + }, + { + "epoch": 15.125714285714286, + "grad_norm": 30.21843147277832, + "learning_rate": 3.874920634920635e-05, + "loss": 0.3653, + "step": 2647 + }, + { + "epoch": 15.131428571428572, + "grad_norm": 35.87263107299805, + "learning_rate": 3.874285714285715e-05, + "loss": 0.5984, + "step": 2648 + }, + { + "epoch": 15.137142857142857, + "grad_norm": 30.697744369506836, + "learning_rate": 3.8736507936507934e-05, + "loss": 0.4492, + "step": 2649 + }, + { + "epoch": 15.142857142857142, + "grad_norm": 49.59101486206055, + "learning_rate": 3.8730158730158734e-05, + "loss": 0.5365, + "step": 2650 + }, + { + "epoch": 15.14857142857143, + "grad_norm": 34.50474166870117, + "learning_rate": 3.8723809523809526e-05, + "loss": 0.5607, + "step": 2651 + }, + { + "epoch": 15.154285714285715, + "grad_norm": 39.87806701660156, + "learning_rate": 3.871746031746032e-05, + "loss": 0.5181, + "step": 2652 + }, + { + "epoch": 15.16, + "grad_norm": 240.49522399902344, + "learning_rate": 3.871111111111111e-05, + "loss": 0.4354, + "step": 2653 + }, + { + "epoch": 15.165714285714285, + "grad_norm": 39.07673645019531, + "learning_rate": 3.8704761904761904e-05, + "loss": 0.5012, + "step": 2654 + }, + { + "epoch": 15.17142857142857, + "grad_norm": 78.40770721435547, + "learning_rate": 3.8698412698412704e-05, + "loss": 0.2838, + "step": 2655 + }, + { + "epoch": 15.177142857142858, + "grad_norm": 44.3741455078125, + "learning_rate": 3.8692063492063496e-05, + "loss": 0.5775, + "step": 2656 + }, + { + "epoch": 15.182857142857143, + "grad_norm": 24.08373260498047, + "learning_rate": 3.868571428571429e-05, + "loss": 0.4942, + "step": 2657 + }, + { + "epoch": 15.188571428571429, + "grad_norm": 34.060638427734375, + "learning_rate": 3.867936507936508e-05, + "loss": 0.3849, + "step": 2658 + }, + { + "epoch": 15.194285714285714, + "grad_norm": 287.13433837890625, + "learning_rate": 3.8673015873015874e-05, + "loss": 0.473, + "step": 2659 + }, + { + "epoch": 15.2, + "grad_norm": 91.75950622558594, + "learning_rate": 3.866666666666667e-05, + "loss": 0.4938, + "step": 2660 + }, + { + "epoch": 15.205714285714286, + "grad_norm": 40.372276306152344, + "learning_rate": 3.8660317460317466e-05, + "loss": 0.4256, + "step": 2661 + }, + { + "epoch": 15.211428571428572, + "grad_norm": 58.20750045776367, + "learning_rate": 3.865396825396825e-05, + "loss": 0.4944, + "step": 2662 + }, + { + "epoch": 15.217142857142857, + "grad_norm": 48.70400619506836, + "learning_rate": 3.864761904761905e-05, + "loss": 0.5066, + "step": 2663 + }, + { + "epoch": 15.222857142857142, + "grad_norm": 59.96915817260742, + "learning_rate": 3.8641269841269844e-05, + "loss": 0.4956, + "step": 2664 + }, + { + "epoch": 15.228571428571428, + "grad_norm": 37.78099060058594, + "learning_rate": 3.863492063492064e-05, + "loss": 0.5086, + "step": 2665 + }, + { + "epoch": 15.234285714285715, + "grad_norm": 28.727447509765625, + "learning_rate": 3.862857142857143e-05, + "loss": 0.4821, + "step": 2666 + }, + { + "epoch": 15.24, + "grad_norm": 57.22956085205078, + "learning_rate": 3.862222222222223e-05, + "loss": 0.6978, + "step": 2667 + }, + { + "epoch": 15.245714285714286, + "grad_norm": 25.5422420501709, + "learning_rate": 3.8615873015873015e-05, + "loss": 0.6043, + "step": 2668 + }, + { + "epoch": 15.251428571428571, + "grad_norm": 607.6710815429688, + "learning_rate": 3.8609523809523814e-05, + "loss": 0.4465, + "step": 2669 + }, + { + "epoch": 15.257142857142856, + "grad_norm": 31.48773765563965, + "learning_rate": 3.860317460317461e-05, + "loss": 0.4792, + "step": 2670 + }, + { + "epoch": 15.262857142857143, + "grad_norm": 33.4849739074707, + "learning_rate": 3.85968253968254e-05, + "loss": 0.7286, + "step": 2671 + }, + { + "epoch": 15.268571428571429, + "grad_norm": 29.74656105041504, + "learning_rate": 3.859047619047619e-05, + "loss": 0.4638, + "step": 2672 + }, + { + "epoch": 15.274285714285714, + "grad_norm": 43.694854736328125, + "learning_rate": 3.8584126984126985e-05, + "loss": 0.5248, + "step": 2673 + }, + { + "epoch": 15.28, + "grad_norm": 42.586055755615234, + "learning_rate": 3.857777777777778e-05, + "loss": 0.5272, + "step": 2674 + }, + { + "epoch": 15.285714285714286, + "grad_norm": 20.127805709838867, + "learning_rate": 3.857142857142858e-05, + "loss": 0.4056, + "step": 2675 + }, + { + "epoch": 15.291428571428572, + "grad_norm": 37.68088912963867, + "learning_rate": 3.856507936507936e-05, + "loss": 0.4232, + "step": 2676 + }, + { + "epoch": 15.297142857142857, + "grad_norm": 37.87181091308594, + "learning_rate": 3.855873015873016e-05, + "loss": 0.3896, + "step": 2677 + }, + { + "epoch": 15.302857142857142, + "grad_norm": 100.7476806640625, + "learning_rate": 3.8552380952380955e-05, + "loss": 0.5037, + "step": 2678 + }, + { + "epoch": 15.308571428571428, + "grad_norm": 48.78540802001953, + "learning_rate": 3.854603174603175e-05, + "loss": 0.6045, + "step": 2679 + }, + { + "epoch": 15.314285714285715, + "grad_norm": 81.76539611816406, + "learning_rate": 3.853968253968254e-05, + "loss": 0.6549, + "step": 2680 + }, + { + "epoch": 15.32, + "grad_norm": 115.28096771240234, + "learning_rate": 3.853333333333334e-05, + "loss": 0.5465, + "step": 2681 + }, + { + "epoch": 15.325714285714286, + "grad_norm": 52.533226013183594, + "learning_rate": 3.8526984126984125e-05, + "loss": 0.5013, + "step": 2682 + }, + { + "epoch": 15.331428571428571, + "grad_norm": 39.55607604980469, + "learning_rate": 3.8520634920634925e-05, + "loss": 0.4071, + "step": 2683 + }, + { + "epoch": 15.337142857142856, + "grad_norm": 37.944557189941406, + "learning_rate": 3.851428571428571e-05, + "loss": 0.6956, + "step": 2684 + }, + { + "epoch": 15.342857142857143, + "grad_norm": 48.310096740722656, + "learning_rate": 3.850793650793651e-05, + "loss": 0.538, + "step": 2685 + }, + { + "epoch": 15.348571428571429, + "grad_norm": 53.69296646118164, + "learning_rate": 3.85015873015873e-05, + "loss": 0.5177, + "step": 2686 + }, + { + "epoch": 15.354285714285714, + "grad_norm": 57.018463134765625, + "learning_rate": 3.8495238095238095e-05, + "loss": 0.3286, + "step": 2687 + }, + { + "epoch": 15.36, + "grad_norm": 44.31338119506836, + "learning_rate": 3.848888888888889e-05, + "loss": 0.3722, + "step": 2688 + }, + { + "epoch": 15.365714285714287, + "grad_norm": 73.21768188476562, + "learning_rate": 3.848253968253969e-05, + "loss": 0.4856, + "step": 2689 + }, + { + "epoch": 15.371428571428572, + "grad_norm": 47.26458740234375, + "learning_rate": 3.847619047619048e-05, + "loss": 0.4554, + "step": 2690 + }, + { + "epoch": 15.377142857142857, + "grad_norm": 33.08348083496094, + "learning_rate": 3.846984126984127e-05, + "loss": 0.6845, + "step": 2691 + }, + { + "epoch": 15.382857142857143, + "grad_norm": 79.1163558959961, + "learning_rate": 3.8463492063492065e-05, + "loss": 0.695, + "step": 2692 + }, + { + "epoch": 15.388571428571428, + "grad_norm": 40.303707122802734, + "learning_rate": 3.845714285714286e-05, + "loss": 0.5144, + "step": 2693 + }, + { + "epoch": 15.394285714285715, + "grad_norm": 65.41890716552734, + "learning_rate": 3.845079365079366e-05, + "loss": 0.503, + "step": 2694 + }, + { + "epoch": 15.4, + "grad_norm": 20.049118041992188, + "learning_rate": 3.844444444444444e-05, + "loss": 0.4207, + "step": 2695 + }, + { + "epoch": 15.405714285714286, + "grad_norm": 85.63333129882812, + "learning_rate": 3.843809523809524e-05, + "loss": 0.6394, + "step": 2696 + }, + { + "epoch": 15.411428571428571, + "grad_norm": 52.71570587158203, + "learning_rate": 3.8431746031746035e-05, + "loss": 0.5849, + "step": 2697 + }, + { + "epoch": 15.417142857142856, + "grad_norm": 81.16397094726562, + "learning_rate": 3.842539682539683e-05, + "loss": 0.3905, + "step": 2698 + }, + { + "epoch": 15.422857142857143, + "grad_norm": 30.789758682250977, + "learning_rate": 3.841904761904762e-05, + "loss": 0.4725, + "step": 2699 + }, + { + "epoch": 15.428571428571429, + "grad_norm": 36.64466094970703, + "learning_rate": 3.841269841269842e-05, + "loss": 0.3179, + "step": 2700 + }, + { + "epoch": 15.434285714285714, + "grad_norm": 62.083412170410156, + "learning_rate": 3.8406349206349206e-05, + "loss": 0.3341, + "step": 2701 + }, + { + "epoch": 15.44, + "grad_norm": 47.09940719604492, + "learning_rate": 3.8400000000000005e-05, + "loss": 0.4432, + "step": 2702 + }, + { + "epoch": 15.445714285714285, + "grad_norm": 30.16719627380371, + "learning_rate": 3.839365079365079e-05, + "loss": 0.3751, + "step": 2703 + }, + { + "epoch": 15.451428571428572, + "grad_norm": 305.1468505859375, + "learning_rate": 3.838730158730159e-05, + "loss": 0.3978, + "step": 2704 + }, + { + "epoch": 15.457142857142857, + "grad_norm": 33.10639190673828, + "learning_rate": 3.838095238095238e-05, + "loss": 0.3757, + "step": 2705 + }, + { + "epoch": 15.462857142857143, + "grad_norm": 36.49113845825195, + "learning_rate": 3.8374603174603176e-05, + "loss": 0.2957, + "step": 2706 + }, + { + "epoch": 15.468571428571428, + "grad_norm": 61.94491958618164, + "learning_rate": 3.836825396825397e-05, + "loss": 0.36, + "step": 2707 + }, + { + "epoch": 15.474285714285715, + "grad_norm": 372.0150146484375, + "learning_rate": 3.836190476190477e-05, + "loss": 0.4366, + "step": 2708 + }, + { + "epoch": 15.48, + "grad_norm": 517.32861328125, + "learning_rate": 3.8355555555555553e-05, + "loss": 0.4392, + "step": 2709 + }, + { + "epoch": 15.485714285714286, + "grad_norm": 45.069026947021484, + "learning_rate": 3.834920634920635e-05, + "loss": 0.3724, + "step": 2710 + }, + { + "epoch": 15.491428571428571, + "grad_norm": 43.171451568603516, + "learning_rate": 3.8342857142857146e-05, + "loss": 0.4524, + "step": 2711 + }, + { + "epoch": 15.497142857142856, + "grad_norm": 64.76368713378906, + "learning_rate": 3.833650793650794e-05, + "loss": 0.4767, + "step": 2712 + }, + { + "epoch": 15.502857142857144, + "grad_norm": 47.70409393310547, + "learning_rate": 3.833015873015873e-05, + "loss": 0.3534, + "step": 2713 + }, + { + "epoch": 15.508571428571429, + "grad_norm": 62.24884033203125, + "learning_rate": 3.8323809523809523e-05, + "loss": 0.3876, + "step": 2714 + }, + { + "epoch": 15.514285714285714, + "grad_norm": 46.99068069458008, + "learning_rate": 3.8317460317460316e-05, + "loss": 0.7459, + "step": 2715 + }, + { + "epoch": 15.52, + "grad_norm": 192.08668518066406, + "learning_rate": 3.8311111111111115e-05, + "loss": 0.3793, + "step": 2716 + }, + { + "epoch": 15.525714285714285, + "grad_norm": 45.692264556884766, + "learning_rate": 3.83047619047619e-05, + "loss": 0.2748, + "step": 2717 + }, + { + "epoch": 15.531428571428572, + "grad_norm": 48.88400650024414, + "learning_rate": 3.82984126984127e-05, + "loss": 0.3597, + "step": 2718 + }, + { + "epoch": 15.537142857142857, + "grad_norm": 36.55814743041992, + "learning_rate": 3.8292063492063493e-05, + "loss": 0.4922, + "step": 2719 + }, + { + "epoch": 15.542857142857143, + "grad_norm": 28.92718505859375, + "learning_rate": 3.8285714285714286e-05, + "loss": 0.5391, + "step": 2720 + }, + { + "epoch": 15.548571428571428, + "grad_norm": 34.36704635620117, + "learning_rate": 3.827936507936508e-05, + "loss": 0.4521, + "step": 2721 + }, + { + "epoch": 15.554285714285715, + "grad_norm": 563.7392578125, + "learning_rate": 3.827301587301588e-05, + "loss": 0.4306, + "step": 2722 + }, + { + "epoch": 15.56, + "grad_norm": 194.91851806640625, + "learning_rate": 3.8266666666666664e-05, + "loss": 0.4852, + "step": 2723 + }, + { + "epoch": 15.565714285714286, + "grad_norm": 47.08349609375, + "learning_rate": 3.826031746031746e-05, + "loss": 0.4089, + "step": 2724 + }, + { + "epoch": 15.571428571428571, + "grad_norm": 203.97264099121094, + "learning_rate": 3.8253968253968256e-05, + "loss": 0.3607, + "step": 2725 + }, + { + "epoch": 15.577142857142857, + "grad_norm": 21.412168502807617, + "learning_rate": 3.824761904761905e-05, + "loss": 0.3815, + "step": 2726 + }, + { + "epoch": 15.582857142857144, + "grad_norm": 26.144681930541992, + "learning_rate": 3.824126984126984e-05, + "loss": 0.4553, + "step": 2727 + }, + { + "epoch": 15.588571428571429, + "grad_norm": 39.761959075927734, + "learning_rate": 3.8234920634920634e-05, + "loss": 0.3699, + "step": 2728 + }, + { + "epoch": 15.594285714285714, + "grad_norm": 22.443368911743164, + "learning_rate": 3.822857142857143e-05, + "loss": 0.3112, + "step": 2729 + }, + { + "epoch": 15.6, + "grad_norm": 40.048221588134766, + "learning_rate": 3.8222222222222226e-05, + "loss": 0.3681, + "step": 2730 + }, + { + "epoch": 15.605714285714285, + "grad_norm": 37.674468994140625, + "learning_rate": 3.821587301587302e-05, + "loss": 0.3342, + "step": 2731 + }, + { + "epoch": 15.611428571428572, + "grad_norm": 33.92021560668945, + "learning_rate": 3.820952380952381e-05, + "loss": 0.583, + "step": 2732 + }, + { + "epoch": 15.617142857142857, + "grad_norm": 64.13064575195312, + "learning_rate": 3.820317460317461e-05, + "loss": 0.5907, + "step": 2733 + }, + { + "epoch": 15.622857142857143, + "grad_norm": 39.63349151611328, + "learning_rate": 3.8196825396825396e-05, + "loss": 0.4675, + "step": 2734 + }, + { + "epoch": 15.628571428571428, + "grad_norm": 43.180381774902344, + "learning_rate": 3.8190476190476196e-05, + "loss": 0.3661, + "step": 2735 + }, + { + "epoch": 15.634285714285713, + "grad_norm": 31.936859130859375, + "learning_rate": 3.818412698412698e-05, + "loss": 0.3476, + "step": 2736 + }, + { + "epoch": 15.64, + "grad_norm": 55.57992935180664, + "learning_rate": 3.817777777777778e-05, + "loss": 0.3881, + "step": 2737 + }, + { + "epoch": 15.645714285714286, + "grad_norm": 33.439857482910156, + "learning_rate": 3.8171428571428574e-05, + "loss": 0.3601, + "step": 2738 + }, + { + "epoch": 15.651428571428571, + "grad_norm": 30.789459228515625, + "learning_rate": 3.8165079365079366e-05, + "loss": 0.4528, + "step": 2739 + }, + { + "epoch": 15.657142857142857, + "grad_norm": 41.949378967285156, + "learning_rate": 3.815873015873016e-05, + "loss": 0.5464, + "step": 2740 + }, + { + "epoch": 15.662857142857142, + "grad_norm": 28.893184661865234, + "learning_rate": 3.815238095238096e-05, + "loss": 0.3825, + "step": 2741 + }, + { + "epoch": 15.668571428571429, + "grad_norm": 79.07247161865234, + "learning_rate": 3.8146031746031744e-05, + "loss": 0.617, + "step": 2742 + }, + { + "epoch": 15.674285714285714, + "grad_norm": 30.040111541748047, + "learning_rate": 3.8139682539682544e-05, + "loss": 0.4614, + "step": 2743 + }, + { + "epoch": 15.68, + "grad_norm": 71.13114929199219, + "learning_rate": 3.8133333333333336e-05, + "loss": 0.4146, + "step": 2744 + }, + { + "epoch": 15.685714285714285, + "grad_norm": 40.686546325683594, + "learning_rate": 3.812698412698413e-05, + "loss": 0.3792, + "step": 2745 + }, + { + "epoch": 15.691428571428572, + "grad_norm": 62.28633499145508, + "learning_rate": 3.812063492063492e-05, + "loss": 0.5746, + "step": 2746 + }, + { + "epoch": 15.697142857142858, + "grad_norm": 1480.721435546875, + "learning_rate": 3.8114285714285714e-05, + "loss": 0.4081, + "step": 2747 + }, + { + "epoch": 15.702857142857143, + "grad_norm": 30.005130767822266, + "learning_rate": 3.810793650793651e-05, + "loss": 0.4535, + "step": 2748 + }, + { + "epoch": 15.708571428571428, + "grad_norm": 61.335205078125, + "learning_rate": 3.8101587301587306e-05, + "loss": 0.5528, + "step": 2749 + }, + { + "epoch": 15.714285714285714, + "grad_norm": 44.220176696777344, + "learning_rate": 3.809523809523809e-05, + "loss": 0.4153, + "step": 2750 + }, + { + "epoch": 15.72, + "grad_norm": 55.29134750366211, + "learning_rate": 3.808888888888889e-05, + "loss": 0.4407, + "step": 2751 + }, + { + "epoch": 15.725714285714286, + "grad_norm": 19.705604553222656, + "learning_rate": 3.8082539682539684e-05, + "loss": 0.3805, + "step": 2752 + }, + { + "epoch": 15.731428571428571, + "grad_norm": 51.96522521972656, + "learning_rate": 3.807619047619048e-05, + "loss": 0.432, + "step": 2753 + }, + { + "epoch": 15.737142857142857, + "grad_norm": 71.69025421142578, + "learning_rate": 3.806984126984127e-05, + "loss": 0.3856, + "step": 2754 + }, + { + "epoch": 15.742857142857144, + "grad_norm": 34.365291595458984, + "learning_rate": 3.806349206349207e-05, + "loss": 0.4562, + "step": 2755 + }, + { + "epoch": 15.748571428571429, + "grad_norm": 56.59248352050781, + "learning_rate": 3.8057142857142855e-05, + "loss": 0.3469, + "step": 2756 + }, + { + "epoch": 15.754285714285714, + "grad_norm": 44.549007415771484, + "learning_rate": 3.8050793650793654e-05, + "loss": 0.5243, + "step": 2757 + }, + { + "epoch": 15.76, + "grad_norm": 834.0654296875, + "learning_rate": 3.804444444444445e-05, + "loss": 0.455, + "step": 2758 + }, + { + "epoch": 15.765714285714285, + "grad_norm": 38.67654037475586, + "learning_rate": 3.803809523809524e-05, + "loss": 0.3957, + "step": 2759 + }, + { + "epoch": 15.771428571428572, + "grad_norm": 27.433307647705078, + "learning_rate": 3.803174603174603e-05, + "loss": 0.6249, + "step": 2760 + }, + { + "epoch": 15.777142857142858, + "grad_norm": 55.26726531982422, + "learning_rate": 3.8025396825396825e-05, + "loss": 0.4457, + "step": 2761 + }, + { + "epoch": 15.782857142857143, + "grad_norm": 131.99993896484375, + "learning_rate": 3.8019047619047624e-05, + "loss": 0.3892, + "step": 2762 + }, + { + "epoch": 15.788571428571428, + "grad_norm": 24.48535919189453, + "learning_rate": 3.801269841269842e-05, + "loss": 0.2672, + "step": 2763 + }, + { + "epoch": 15.794285714285714, + "grad_norm": 36.06562423706055, + "learning_rate": 3.800634920634921e-05, + "loss": 0.3691, + "step": 2764 + }, + { + "epoch": 15.8, + "grad_norm": 76.08797454833984, + "learning_rate": 3.8e-05, + "loss": 0.533, + "step": 2765 + }, + { + "epoch": 15.805714285714286, + "grad_norm": 72.1341781616211, + "learning_rate": 3.7993650793650795e-05, + "loss": 0.4349, + "step": 2766 + }, + { + "epoch": 15.811428571428571, + "grad_norm": 57.52500534057617, + "learning_rate": 3.798730158730159e-05, + "loss": 0.5375, + "step": 2767 + }, + { + "epoch": 15.817142857142857, + "grad_norm": 250.50469970703125, + "learning_rate": 3.798095238095239e-05, + "loss": 0.3379, + "step": 2768 + }, + { + "epoch": 15.822857142857142, + "grad_norm": 52.38911437988281, + "learning_rate": 3.797460317460317e-05, + "loss": 0.4382, + "step": 2769 + }, + { + "epoch": 15.82857142857143, + "grad_norm": 33.9091911315918, + "learning_rate": 3.796825396825397e-05, + "loss": 0.4578, + "step": 2770 + }, + { + "epoch": 15.834285714285715, + "grad_norm": 37.47408676147461, + "learning_rate": 3.7961904761904765e-05, + "loss": 0.5218, + "step": 2771 + }, + { + "epoch": 15.84, + "grad_norm": 93.31388854980469, + "learning_rate": 3.795555555555556e-05, + "loss": 0.5025, + "step": 2772 + }, + { + "epoch": 15.845714285714285, + "grad_norm": 594.1710205078125, + "learning_rate": 3.794920634920635e-05, + "loss": 0.3977, + "step": 2773 + }, + { + "epoch": 15.85142857142857, + "grad_norm": 420.21630859375, + "learning_rate": 3.794285714285715e-05, + "loss": 0.4002, + "step": 2774 + }, + { + "epoch": 15.857142857142858, + "grad_norm": 56.7486572265625, + "learning_rate": 3.7936507936507935e-05, + "loss": 0.3438, + "step": 2775 + }, + { + "epoch": 15.862857142857143, + "grad_norm": 72.90874481201172, + "learning_rate": 3.7930158730158735e-05, + "loss": 0.3971, + "step": 2776 + }, + { + "epoch": 15.868571428571428, + "grad_norm": 54.37617874145508, + "learning_rate": 3.792380952380953e-05, + "loss": 0.5038, + "step": 2777 + }, + { + "epoch": 15.874285714285714, + "grad_norm": 27.699661254882812, + "learning_rate": 3.791746031746032e-05, + "loss": 0.7789, + "step": 2778 + }, + { + "epoch": 15.88, + "grad_norm": 39.549964904785156, + "learning_rate": 3.791111111111111e-05, + "loss": 0.3185, + "step": 2779 + }, + { + "epoch": 15.885714285714286, + "grad_norm": 66.69050598144531, + "learning_rate": 3.7904761904761905e-05, + "loss": 0.4035, + "step": 2780 + }, + { + "epoch": 15.891428571428571, + "grad_norm": 27.588712692260742, + "learning_rate": 3.78984126984127e-05, + "loss": 0.3815, + "step": 2781 + }, + { + "epoch": 15.897142857142857, + "grad_norm": 66.0377197265625, + "learning_rate": 3.78920634920635e-05, + "loss": 0.5724, + "step": 2782 + }, + { + "epoch": 15.902857142857142, + "grad_norm": 53.48280715942383, + "learning_rate": 3.788571428571428e-05, + "loss": 0.2976, + "step": 2783 + }, + { + "epoch": 15.90857142857143, + "grad_norm": 182.84542846679688, + "learning_rate": 3.787936507936508e-05, + "loss": 0.483, + "step": 2784 + }, + { + "epoch": 15.914285714285715, + "grad_norm": 42.55234146118164, + "learning_rate": 3.7873015873015875e-05, + "loss": 0.5256, + "step": 2785 + }, + { + "epoch": 15.92, + "grad_norm": 54.200740814208984, + "learning_rate": 3.786666666666667e-05, + "loss": 0.5245, + "step": 2786 + }, + { + "epoch": 15.925714285714285, + "grad_norm": 43.21343231201172, + "learning_rate": 3.786031746031746e-05, + "loss": 0.485, + "step": 2787 + }, + { + "epoch": 15.93142857142857, + "grad_norm": 71.44453430175781, + "learning_rate": 3.785396825396826e-05, + "loss": 0.4702, + "step": 2788 + }, + { + "epoch": 15.937142857142858, + "grad_norm": 25.477115631103516, + "learning_rate": 3.7847619047619046e-05, + "loss": 0.754, + "step": 2789 + }, + { + "epoch": 15.942857142857143, + "grad_norm": 35.87270736694336, + "learning_rate": 3.7841269841269845e-05, + "loss": 0.417, + "step": 2790 + }, + { + "epoch": 15.948571428571428, + "grad_norm": 31.33983039855957, + "learning_rate": 3.783492063492063e-05, + "loss": 0.5738, + "step": 2791 + }, + { + "epoch": 15.954285714285714, + "grad_norm": 37.12321853637695, + "learning_rate": 3.782857142857143e-05, + "loss": 0.5188, + "step": 2792 + }, + { + "epoch": 15.96, + "grad_norm": 41.17721939086914, + "learning_rate": 3.782222222222222e-05, + "loss": 0.433, + "step": 2793 + }, + { + "epoch": 15.965714285714286, + "grad_norm": 68.94007110595703, + "learning_rate": 3.7815873015873016e-05, + "loss": 0.2966, + "step": 2794 + }, + { + "epoch": 15.971428571428572, + "grad_norm": 747.8803100585938, + "learning_rate": 3.780952380952381e-05, + "loss": 0.3319, + "step": 2795 + }, + { + "epoch": 15.977142857142857, + "grad_norm": 41.70159149169922, + "learning_rate": 3.780317460317461e-05, + "loss": 0.584, + "step": 2796 + }, + { + "epoch": 15.982857142857142, + "grad_norm": 174.68283081054688, + "learning_rate": 3.77968253968254e-05, + "loss": 0.4363, + "step": 2797 + }, + { + "epoch": 15.98857142857143, + "grad_norm": 32.647674560546875, + "learning_rate": 3.779047619047619e-05, + "loss": 0.4207, + "step": 2798 + }, + { + "epoch": 15.994285714285715, + "grad_norm": 23.21664810180664, + "learning_rate": 3.7784126984126986e-05, + "loss": 0.3731, + "step": 2799 + }, + { + "epoch": 16.0, + "grad_norm": 34.9289436340332, + "learning_rate": 3.777777777777778e-05, + "loss": 0.5294, + "step": 2800 + }, + { + "epoch": 16.0, + "eval_classes": 0, + "eval_loss": 0.6346626877784729, + "eval_map": 0.9101, + "eval_map_50": 0.9563, + "eval_map_75": 0.9431, + "eval_map_large": 0.9104, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9101, + "eval_map_small": -1.0, + "eval_mar_1": 0.7705, + "eval_mar_10": 0.9676, + "eval_mar_100": 0.9784, + "eval_mar_100_per_class": 0.9784, + "eval_mar_large": 0.9784, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 14.2732, + "eval_samples_per_second": 20.598, + "eval_steps_per_second": 2.592, + "step": 2800 + }, + { + "epoch": 16.005714285714287, + "grad_norm": 70.0713119506836, + "learning_rate": 3.777142857142858e-05, + "loss": 0.3912, + "step": 2801 + }, + { + "epoch": 16.01142857142857, + "grad_norm": 164.6759033203125, + "learning_rate": 3.7765079365079364e-05, + "loss": 0.4858, + "step": 2802 + }, + { + "epoch": 16.017142857142858, + "grad_norm": 25.241962432861328, + "learning_rate": 3.775873015873016e-05, + "loss": 0.3923, + "step": 2803 + }, + { + "epoch": 16.02285714285714, + "grad_norm": 41.69196701049805, + "learning_rate": 3.7752380952380956e-05, + "loss": 0.4699, + "step": 2804 + }, + { + "epoch": 16.02857142857143, + "grad_norm": 58.33809280395508, + "learning_rate": 3.774603174603175e-05, + "loss": 0.3888, + "step": 2805 + }, + { + "epoch": 16.034285714285716, + "grad_norm": 31.243627548217773, + "learning_rate": 3.773968253968254e-05, + "loss": 0.524, + "step": 2806 + }, + { + "epoch": 16.04, + "grad_norm": 87.03840637207031, + "learning_rate": 3.773333333333334e-05, + "loss": 0.4914, + "step": 2807 + }, + { + "epoch": 16.045714285714286, + "grad_norm": 22.526199340820312, + "learning_rate": 3.7726984126984126e-05, + "loss": 0.3523, + "step": 2808 + }, + { + "epoch": 16.05142857142857, + "grad_norm": 45.92203903198242, + "learning_rate": 3.7720634920634926e-05, + "loss": 0.4451, + "step": 2809 + }, + { + "epoch": 16.057142857142857, + "grad_norm": 36.594539642333984, + "learning_rate": 3.771428571428572e-05, + "loss": 0.4803, + "step": 2810 + }, + { + "epoch": 16.062857142857144, + "grad_norm": 28.845773696899414, + "learning_rate": 3.770793650793651e-05, + "loss": 0.343, + "step": 2811 + }, + { + "epoch": 16.068571428571428, + "grad_norm": 58.28946304321289, + "learning_rate": 3.7701587301587303e-05, + "loss": 0.569, + "step": 2812 + }, + { + "epoch": 16.074285714285715, + "grad_norm": 48.23463439941406, + "learning_rate": 3.7695238095238096e-05, + "loss": 0.4001, + "step": 2813 + }, + { + "epoch": 16.08, + "grad_norm": 153.3620147705078, + "learning_rate": 3.768888888888889e-05, + "loss": 0.4701, + "step": 2814 + }, + { + "epoch": 16.085714285714285, + "grad_norm": 393.6240539550781, + "learning_rate": 3.768253968253969e-05, + "loss": 0.4897, + "step": 2815 + }, + { + "epoch": 16.091428571428573, + "grad_norm": 63.71856689453125, + "learning_rate": 3.7676190476190474e-05, + "loss": 0.6476, + "step": 2816 + }, + { + "epoch": 16.097142857142856, + "grad_norm": 61.07762145996094, + "learning_rate": 3.766984126984127e-05, + "loss": 0.392, + "step": 2817 + }, + { + "epoch": 16.102857142857143, + "grad_norm": 38.67390441894531, + "learning_rate": 3.7663492063492066e-05, + "loss": 0.4281, + "step": 2818 + }, + { + "epoch": 16.10857142857143, + "grad_norm": 43.751853942871094, + "learning_rate": 3.765714285714286e-05, + "loss": 0.3907, + "step": 2819 + }, + { + "epoch": 16.114285714285714, + "grad_norm": 43.01502990722656, + "learning_rate": 3.765079365079365e-05, + "loss": 0.334, + "step": 2820 + }, + { + "epoch": 16.12, + "grad_norm": 56.10337829589844, + "learning_rate": 3.764444444444445e-05, + "loss": 0.3067, + "step": 2821 + }, + { + "epoch": 16.125714285714285, + "grad_norm": 19.618566513061523, + "learning_rate": 3.7638095238095237e-05, + "loss": 0.3388, + "step": 2822 + }, + { + "epoch": 16.13142857142857, + "grad_norm": 65.33924865722656, + "learning_rate": 3.7631746031746036e-05, + "loss": 0.4639, + "step": 2823 + }, + { + "epoch": 16.13714285714286, + "grad_norm": 44.41948699951172, + "learning_rate": 3.762539682539682e-05, + "loss": 0.3743, + "step": 2824 + }, + { + "epoch": 16.142857142857142, + "grad_norm": 45.491249084472656, + "learning_rate": 3.761904761904762e-05, + "loss": 0.3742, + "step": 2825 + }, + { + "epoch": 16.14857142857143, + "grad_norm": 17.892271041870117, + "learning_rate": 3.7612698412698414e-05, + "loss": 0.3034, + "step": 2826 + }, + { + "epoch": 16.154285714285713, + "grad_norm": 52.51234436035156, + "learning_rate": 3.7606349206349207e-05, + "loss": 0.5413, + "step": 2827 + }, + { + "epoch": 16.16, + "grad_norm": 36.29309844970703, + "learning_rate": 3.76e-05, + "loss": 0.5266, + "step": 2828 + }, + { + "epoch": 16.165714285714287, + "grad_norm": 369.3990783691406, + "learning_rate": 3.75936507936508e-05, + "loss": 0.3304, + "step": 2829 + }, + { + "epoch": 16.17142857142857, + "grad_norm": 43.918094635009766, + "learning_rate": 3.7587301587301584e-05, + "loss": 0.3597, + "step": 2830 + }, + { + "epoch": 16.177142857142858, + "grad_norm": 42.64336395263672, + "learning_rate": 3.7580952380952384e-05, + "loss": 0.4644, + "step": 2831 + }, + { + "epoch": 16.18285714285714, + "grad_norm": 54.486289978027344, + "learning_rate": 3.7574603174603176e-05, + "loss": 0.4097, + "step": 2832 + }, + { + "epoch": 16.18857142857143, + "grad_norm": 43.048362731933594, + "learning_rate": 3.756825396825397e-05, + "loss": 0.403, + "step": 2833 + }, + { + "epoch": 16.194285714285716, + "grad_norm": 85.92316436767578, + "learning_rate": 3.756190476190476e-05, + "loss": 0.5137, + "step": 2834 + }, + { + "epoch": 16.2, + "grad_norm": 33.11586380004883, + "learning_rate": 3.7555555555555554e-05, + "loss": 0.284, + "step": 2835 + }, + { + "epoch": 16.205714285714286, + "grad_norm": 48.35753631591797, + "learning_rate": 3.7549206349206354e-05, + "loss": 0.5367, + "step": 2836 + }, + { + "epoch": 16.21142857142857, + "grad_norm": 41.10136413574219, + "learning_rate": 3.7542857142857146e-05, + "loss": 0.6013, + "step": 2837 + }, + { + "epoch": 16.217142857142857, + "grad_norm": 30.050140380859375, + "learning_rate": 3.753650793650794e-05, + "loss": 0.453, + "step": 2838 + }, + { + "epoch": 16.222857142857144, + "grad_norm": 70.18745422363281, + "learning_rate": 3.753015873015873e-05, + "loss": 0.4409, + "step": 2839 + }, + { + "epoch": 16.228571428571428, + "grad_norm": 42.98176193237305, + "learning_rate": 3.752380952380953e-05, + "loss": 0.4177, + "step": 2840 + }, + { + "epoch": 16.234285714285715, + "grad_norm": 54.87812042236328, + "learning_rate": 3.751746031746032e-05, + "loss": 0.4733, + "step": 2841 + }, + { + "epoch": 16.24, + "grad_norm": 57.734134674072266, + "learning_rate": 3.7511111111111116e-05, + "loss": 0.5491, + "step": 2842 + }, + { + "epoch": 16.245714285714286, + "grad_norm": 26.532094955444336, + "learning_rate": 3.750476190476191e-05, + "loss": 0.4879, + "step": 2843 + }, + { + "epoch": 16.251428571428573, + "grad_norm": 44.592567443847656, + "learning_rate": 3.74984126984127e-05, + "loss": 0.4672, + "step": 2844 + }, + { + "epoch": 16.257142857142856, + "grad_norm": 16.300241470336914, + "learning_rate": 3.7492063492063494e-05, + "loss": 0.4049, + "step": 2845 + }, + { + "epoch": 16.262857142857143, + "grad_norm": 81.98695373535156, + "learning_rate": 3.748571428571429e-05, + "loss": 0.4012, + "step": 2846 + }, + { + "epoch": 16.268571428571427, + "grad_norm": 209.06448364257812, + "learning_rate": 3.747936507936508e-05, + "loss": 0.3762, + "step": 2847 + }, + { + "epoch": 16.274285714285714, + "grad_norm": 53.82666778564453, + "learning_rate": 3.747301587301588e-05, + "loss": 0.3892, + "step": 2848 + }, + { + "epoch": 16.28, + "grad_norm": 53.240901947021484, + "learning_rate": 3.7466666666666665e-05, + "loss": 0.3839, + "step": 2849 + }, + { + "epoch": 16.285714285714285, + "grad_norm": 571.2046508789062, + "learning_rate": 3.7460317460317464e-05, + "loss": 0.4161, + "step": 2850 + }, + { + "epoch": 16.291428571428572, + "grad_norm": 135.78126525878906, + "learning_rate": 3.745396825396826e-05, + "loss": 0.46, + "step": 2851 + }, + { + "epoch": 16.29714285714286, + "grad_norm": 29.33208656311035, + "learning_rate": 3.744761904761905e-05, + "loss": 0.5425, + "step": 2852 + }, + { + "epoch": 16.302857142857142, + "grad_norm": 84.61248779296875, + "learning_rate": 3.744126984126984e-05, + "loss": 0.4112, + "step": 2853 + }, + { + "epoch": 16.30857142857143, + "grad_norm": 49.917423248291016, + "learning_rate": 3.743492063492064e-05, + "loss": 0.4645, + "step": 2854 + }, + { + "epoch": 16.314285714285713, + "grad_norm": 228.82005310058594, + "learning_rate": 3.742857142857143e-05, + "loss": 0.9195, + "step": 2855 + }, + { + "epoch": 16.32, + "grad_norm": 64.351318359375, + "learning_rate": 3.742222222222223e-05, + "loss": 0.2927, + "step": 2856 + }, + { + "epoch": 16.325714285714287, + "grad_norm": 68.40682220458984, + "learning_rate": 3.741587301587301e-05, + "loss": 0.4447, + "step": 2857 + }, + { + "epoch": 16.33142857142857, + "grad_norm": 30.027559280395508, + "learning_rate": 3.740952380952381e-05, + "loss": 0.5933, + "step": 2858 + }, + { + "epoch": 16.337142857142858, + "grad_norm": 62.45819091796875, + "learning_rate": 3.7403174603174605e-05, + "loss": 0.6428, + "step": 2859 + }, + { + "epoch": 16.34285714285714, + "grad_norm": 43.999488830566406, + "learning_rate": 3.73968253968254e-05, + "loss": 0.525, + "step": 2860 + }, + { + "epoch": 16.34857142857143, + "grad_norm": 43.05434799194336, + "learning_rate": 3.739047619047619e-05, + "loss": 0.3183, + "step": 2861 + }, + { + "epoch": 16.354285714285716, + "grad_norm": 44.31922912597656, + "learning_rate": 3.738412698412699e-05, + "loss": 0.6758, + "step": 2862 + }, + { + "epoch": 16.36, + "grad_norm": 50.412818908691406, + "learning_rate": 3.7377777777777775e-05, + "loss": 0.8291, + "step": 2863 + }, + { + "epoch": 16.365714285714287, + "grad_norm": 29.43809700012207, + "learning_rate": 3.7371428571428575e-05, + "loss": 0.5251, + "step": 2864 + }, + { + "epoch": 16.37142857142857, + "grad_norm": 30.69464874267578, + "learning_rate": 3.736507936507937e-05, + "loss": 0.3024, + "step": 2865 + }, + { + "epoch": 16.377142857142857, + "grad_norm": 77.88601684570312, + "learning_rate": 3.735873015873016e-05, + "loss": 0.4164, + "step": 2866 + }, + { + "epoch": 16.382857142857144, + "grad_norm": 47.551692962646484, + "learning_rate": 3.735238095238095e-05, + "loss": 0.4054, + "step": 2867 + }, + { + "epoch": 16.388571428571428, + "grad_norm": 44.56904602050781, + "learning_rate": 3.7346031746031745e-05, + "loss": 0.4226, + "step": 2868 + }, + { + "epoch": 16.394285714285715, + "grad_norm": 47.03104019165039, + "learning_rate": 3.733968253968254e-05, + "loss": 0.2749, + "step": 2869 + }, + { + "epoch": 16.4, + "grad_norm": 22.48953628540039, + "learning_rate": 3.733333333333334e-05, + "loss": 0.3491, + "step": 2870 + }, + { + "epoch": 16.405714285714286, + "grad_norm": 30.08992576599121, + "learning_rate": 3.732698412698413e-05, + "loss": 0.3032, + "step": 2871 + }, + { + "epoch": 16.411428571428573, + "grad_norm": 45.1522102355957, + "learning_rate": 3.732063492063492e-05, + "loss": 0.5812, + "step": 2872 + }, + { + "epoch": 16.417142857142856, + "grad_norm": 132.7905731201172, + "learning_rate": 3.7314285714285715e-05, + "loss": 0.4708, + "step": 2873 + }, + { + "epoch": 16.422857142857143, + "grad_norm": 49.749820709228516, + "learning_rate": 3.730793650793651e-05, + "loss": 0.4062, + "step": 2874 + }, + { + "epoch": 16.428571428571427, + "grad_norm": 28.60721778869629, + "learning_rate": 3.730158730158731e-05, + "loss": 0.3039, + "step": 2875 + }, + { + "epoch": 16.434285714285714, + "grad_norm": 88.1325912475586, + "learning_rate": 3.72952380952381e-05, + "loss": 0.4934, + "step": 2876 + }, + { + "epoch": 16.44, + "grad_norm": 30.96939468383789, + "learning_rate": 3.728888888888889e-05, + "loss": 0.4601, + "step": 2877 + }, + { + "epoch": 16.445714285714285, + "grad_norm": 28.20155906677246, + "learning_rate": 3.7282539682539685e-05, + "loss": 0.4153, + "step": 2878 + }, + { + "epoch": 16.451428571428572, + "grad_norm": 21.22536277770996, + "learning_rate": 3.727619047619048e-05, + "loss": 0.3379, + "step": 2879 + }, + { + "epoch": 16.457142857142856, + "grad_norm": 54.106266021728516, + "learning_rate": 3.726984126984127e-05, + "loss": 0.3259, + "step": 2880 + }, + { + "epoch": 16.462857142857143, + "grad_norm": 38.92586135864258, + "learning_rate": 3.726349206349207e-05, + "loss": 0.4354, + "step": 2881 + }, + { + "epoch": 16.46857142857143, + "grad_norm": 116.48697662353516, + "learning_rate": 3.7257142857142856e-05, + "loss": 0.3147, + "step": 2882 + }, + { + "epoch": 16.474285714285713, + "grad_norm": 49.13833999633789, + "learning_rate": 3.7250793650793655e-05, + "loss": 0.3508, + "step": 2883 + }, + { + "epoch": 16.48, + "grad_norm": 22.998550415039062, + "learning_rate": 3.724444444444445e-05, + "loss": 0.4516, + "step": 2884 + }, + { + "epoch": 16.485714285714284, + "grad_norm": 29.565940856933594, + "learning_rate": 3.723809523809524e-05, + "loss": 0.3818, + "step": 2885 + }, + { + "epoch": 16.49142857142857, + "grad_norm": 52.65448760986328, + "learning_rate": 3.723174603174603e-05, + "loss": 0.4555, + "step": 2886 + }, + { + "epoch": 16.497142857142858, + "grad_norm": 29.000492095947266, + "learning_rate": 3.722539682539683e-05, + "loss": 0.6371, + "step": 2887 + }, + { + "epoch": 16.502857142857142, + "grad_norm": 35.8780403137207, + "learning_rate": 3.721904761904762e-05, + "loss": 0.4364, + "step": 2888 + }, + { + "epoch": 16.50857142857143, + "grad_norm": 48.73274612426758, + "learning_rate": 3.721269841269842e-05, + "loss": 0.3026, + "step": 2889 + }, + { + "epoch": 16.514285714285712, + "grad_norm": 25.079204559326172, + "learning_rate": 3.7206349206349204e-05, + "loss": 0.4094, + "step": 2890 + }, + { + "epoch": 16.52, + "grad_norm": 126.92987823486328, + "learning_rate": 3.72e-05, + "loss": 0.3633, + "step": 2891 + }, + { + "epoch": 16.525714285714287, + "grad_norm": 37.80255126953125, + "learning_rate": 3.7193650793650796e-05, + "loss": 0.3065, + "step": 2892 + }, + { + "epoch": 16.53142857142857, + "grad_norm": 23.101350784301758, + "learning_rate": 3.718730158730159e-05, + "loss": 0.5256, + "step": 2893 + }, + { + "epoch": 16.537142857142857, + "grad_norm": 15.055671691894531, + "learning_rate": 3.718095238095238e-05, + "loss": 0.2747, + "step": 2894 + }, + { + "epoch": 16.542857142857144, + "grad_norm": 20.80678367614746, + "learning_rate": 3.717460317460318e-05, + "loss": 0.4808, + "step": 2895 + }, + { + "epoch": 16.548571428571428, + "grad_norm": 64.26889038085938, + "learning_rate": 3.7168253968253966e-05, + "loss": 0.4706, + "step": 2896 + }, + { + "epoch": 16.554285714285715, + "grad_norm": 42.92035675048828, + "learning_rate": 3.7161904761904766e-05, + "loss": 0.4106, + "step": 2897 + }, + { + "epoch": 16.56, + "grad_norm": 103.74580383300781, + "learning_rate": 3.715555555555555e-05, + "loss": 0.4485, + "step": 2898 + }, + { + "epoch": 16.565714285714286, + "grad_norm": 32.300621032714844, + "learning_rate": 3.714920634920635e-05, + "loss": 0.3642, + "step": 2899 + }, + { + "epoch": 16.571428571428573, + "grad_norm": 71.21755981445312, + "learning_rate": 3.7142857142857143e-05, + "loss": 0.3378, + "step": 2900 + }, + { + "epoch": 16.577142857142857, + "grad_norm": 58.02399826049805, + "learning_rate": 3.7136507936507936e-05, + "loss": 0.4404, + "step": 2901 + }, + { + "epoch": 16.582857142857144, + "grad_norm": 34.18932342529297, + "learning_rate": 3.713015873015873e-05, + "loss": 0.289, + "step": 2902 + }, + { + "epoch": 16.588571428571427, + "grad_norm": 25.37967300415039, + "learning_rate": 3.712380952380953e-05, + "loss": 0.3399, + "step": 2903 + }, + { + "epoch": 16.594285714285714, + "grad_norm": 31.51453399658203, + "learning_rate": 3.711746031746032e-05, + "loss": 0.38, + "step": 2904 + }, + { + "epoch": 16.6, + "grad_norm": 87.80889892578125, + "learning_rate": 3.7111111111111113e-05, + "loss": 0.4182, + "step": 2905 + }, + { + "epoch": 16.605714285714285, + "grad_norm": 48.856910705566406, + "learning_rate": 3.7104761904761906e-05, + "loss": 0.5091, + "step": 2906 + }, + { + "epoch": 16.611428571428572, + "grad_norm": 47.48170471191406, + "learning_rate": 3.70984126984127e-05, + "loss": 0.4715, + "step": 2907 + }, + { + "epoch": 16.617142857142856, + "grad_norm": 68.18540954589844, + "learning_rate": 3.709206349206349e-05, + "loss": 0.3001, + "step": 2908 + }, + { + "epoch": 16.622857142857143, + "grad_norm": 74.32144165039062, + "learning_rate": 3.7085714285714284e-05, + "loss": 0.3279, + "step": 2909 + }, + { + "epoch": 16.62857142857143, + "grad_norm": 46.929603576660156, + "learning_rate": 3.707936507936508e-05, + "loss": 0.4057, + "step": 2910 + }, + { + "epoch": 16.634285714285713, + "grad_norm": 138.69009399414062, + "learning_rate": 3.7073015873015876e-05, + "loss": 0.4818, + "step": 2911 + }, + { + "epoch": 16.64, + "grad_norm": 26.396936416625977, + "learning_rate": 3.706666666666667e-05, + "loss": 0.3299, + "step": 2912 + }, + { + "epoch": 16.645714285714284, + "grad_norm": 55.15882110595703, + "learning_rate": 3.706031746031746e-05, + "loss": 0.2829, + "step": 2913 + }, + { + "epoch": 16.65142857142857, + "grad_norm": 46.559879302978516, + "learning_rate": 3.705396825396826e-05, + "loss": 0.2844, + "step": 2914 + }, + { + "epoch": 16.65714285714286, + "grad_norm": 48.5512809753418, + "learning_rate": 3.7047619047619047e-05, + "loss": 0.3257, + "step": 2915 + }, + { + "epoch": 16.662857142857142, + "grad_norm": 30.01243019104004, + "learning_rate": 3.7041269841269846e-05, + "loss": 0.4041, + "step": 2916 + }, + { + "epoch": 16.66857142857143, + "grad_norm": 39.398494720458984, + "learning_rate": 3.703492063492064e-05, + "loss": 0.358, + "step": 2917 + }, + { + "epoch": 16.674285714285713, + "grad_norm": 26.072412490844727, + "learning_rate": 3.702857142857143e-05, + "loss": 0.5225, + "step": 2918 + }, + { + "epoch": 16.68, + "grad_norm": 46.15665817260742, + "learning_rate": 3.7022222222222224e-05, + "loss": 0.3997, + "step": 2919 + }, + { + "epoch": 16.685714285714287, + "grad_norm": 34.84221649169922, + "learning_rate": 3.7015873015873017e-05, + "loss": 0.3285, + "step": 2920 + }, + { + "epoch": 16.69142857142857, + "grad_norm": 174.21360778808594, + "learning_rate": 3.700952380952381e-05, + "loss": 0.3407, + "step": 2921 + }, + { + "epoch": 16.697142857142858, + "grad_norm": 75.02750396728516, + "learning_rate": 3.700317460317461e-05, + "loss": 0.3567, + "step": 2922 + }, + { + "epoch": 16.70285714285714, + "grad_norm": 30.636423110961914, + "learning_rate": 3.6996825396825394e-05, + "loss": 0.3554, + "step": 2923 + }, + { + "epoch": 16.708571428571428, + "grad_norm": 56.132301330566406, + "learning_rate": 3.6990476190476194e-05, + "loss": 0.3686, + "step": 2924 + }, + { + "epoch": 16.714285714285715, + "grad_norm": 37.22405242919922, + "learning_rate": 3.6984126984126986e-05, + "loss": 0.4488, + "step": 2925 + }, + { + "epoch": 16.72, + "grad_norm": 35.3524169921875, + "learning_rate": 3.697777777777778e-05, + "loss": 0.361, + "step": 2926 + }, + { + "epoch": 16.725714285714286, + "grad_norm": 39.20980453491211, + "learning_rate": 3.697142857142857e-05, + "loss": 0.3084, + "step": 2927 + }, + { + "epoch": 16.731428571428573, + "grad_norm": 38.22840881347656, + "learning_rate": 3.696507936507937e-05, + "loss": 0.2693, + "step": 2928 + }, + { + "epoch": 16.737142857142857, + "grad_norm": 65.73733520507812, + "learning_rate": 3.695873015873016e-05, + "loss": 0.4157, + "step": 2929 + }, + { + "epoch": 16.742857142857144, + "grad_norm": 18.452590942382812, + "learning_rate": 3.6952380952380956e-05, + "loss": 0.2219, + "step": 2930 + }, + { + "epoch": 16.748571428571427, + "grad_norm": 298.76788330078125, + "learning_rate": 3.694603174603174e-05, + "loss": 0.3322, + "step": 2931 + }, + { + "epoch": 16.754285714285714, + "grad_norm": 30.17223358154297, + "learning_rate": 3.693968253968254e-05, + "loss": 0.3755, + "step": 2932 + }, + { + "epoch": 16.76, + "grad_norm": 49.654518127441406, + "learning_rate": 3.6933333333333334e-05, + "loss": 0.5506, + "step": 2933 + }, + { + "epoch": 16.765714285714285, + "grad_norm": 82.34229278564453, + "learning_rate": 3.692698412698413e-05, + "loss": 0.3268, + "step": 2934 + }, + { + "epoch": 16.771428571428572, + "grad_norm": 23.01658058166504, + "learning_rate": 3.692063492063492e-05, + "loss": 0.3225, + "step": 2935 + }, + { + "epoch": 16.777142857142856, + "grad_norm": 60.740543365478516, + "learning_rate": 3.691428571428572e-05, + "loss": 0.3819, + "step": 2936 + }, + { + "epoch": 16.782857142857143, + "grad_norm": 28.621213912963867, + "learning_rate": 3.6907936507936505e-05, + "loss": 0.3418, + "step": 2937 + }, + { + "epoch": 16.78857142857143, + "grad_norm": 31.923004150390625, + "learning_rate": 3.6901587301587304e-05, + "loss": 0.4115, + "step": 2938 + }, + { + "epoch": 16.794285714285714, + "grad_norm": 33.66228103637695, + "learning_rate": 3.68952380952381e-05, + "loss": 0.4161, + "step": 2939 + }, + { + "epoch": 16.8, + "grad_norm": 142.54344177246094, + "learning_rate": 3.688888888888889e-05, + "loss": 0.4069, + "step": 2940 + }, + { + "epoch": 16.805714285714284, + "grad_norm": 241.73483276367188, + "learning_rate": 3.688253968253968e-05, + "loss": 0.456, + "step": 2941 + }, + { + "epoch": 16.81142857142857, + "grad_norm": 37.379966735839844, + "learning_rate": 3.6876190476190475e-05, + "loss": 0.4624, + "step": 2942 + }, + { + "epoch": 16.81714285714286, + "grad_norm": 56.85732650756836, + "learning_rate": 3.6869841269841274e-05, + "loss": 0.3562, + "step": 2943 + }, + { + "epoch": 16.822857142857142, + "grad_norm": 82.3569107055664, + "learning_rate": 3.686349206349207e-05, + "loss": 0.3482, + "step": 2944 + }, + { + "epoch": 16.82857142857143, + "grad_norm": 79.0669174194336, + "learning_rate": 3.685714285714286e-05, + "loss": 0.3587, + "step": 2945 + }, + { + "epoch": 16.834285714285713, + "grad_norm": 35.16192626953125, + "learning_rate": 3.685079365079365e-05, + "loss": 0.2756, + "step": 2946 + }, + { + "epoch": 16.84, + "grad_norm": 33.91846466064453, + "learning_rate": 3.6844444444444445e-05, + "loss": 0.3415, + "step": 2947 + }, + { + "epoch": 16.845714285714287, + "grad_norm": 41.016937255859375, + "learning_rate": 3.683809523809524e-05, + "loss": 0.4056, + "step": 2948 + }, + { + "epoch": 16.85142857142857, + "grad_norm": 916.173828125, + "learning_rate": 3.683174603174604e-05, + "loss": 0.2955, + "step": 2949 + }, + { + "epoch": 16.857142857142858, + "grad_norm": 78.90193939208984, + "learning_rate": 3.682539682539683e-05, + "loss": 0.7352, + "step": 2950 + }, + { + "epoch": 16.86285714285714, + "grad_norm": 264.2978515625, + "learning_rate": 3.681904761904762e-05, + "loss": 0.4367, + "step": 2951 + }, + { + "epoch": 16.86857142857143, + "grad_norm": 37.197513580322266, + "learning_rate": 3.6812698412698415e-05, + "loss": 0.3406, + "step": 2952 + }, + { + "epoch": 16.874285714285715, + "grad_norm": 70.45872497558594, + "learning_rate": 3.680634920634921e-05, + "loss": 0.338, + "step": 2953 + }, + { + "epoch": 16.88, + "grad_norm": 21.459962844848633, + "learning_rate": 3.68e-05, + "loss": 0.5137, + "step": 2954 + }, + { + "epoch": 16.885714285714286, + "grad_norm": 96.63094329833984, + "learning_rate": 3.67936507936508e-05, + "loss": 0.4871, + "step": 2955 + }, + { + "epoch": 16.89142857142857, + "grad_norm": 36.50151062011719, + "learning_rate": 3.6787301587301585e-05, + "loss": 0.4339, + "step": 2956 + }, + { + "epoch": 16.897142857142857, + "grad_norm": 28.966182708740234, + "learning_rate": 3.6780952380952385e-05, + "loss": 0.454, + "step": 2957 + }, + { + "epoch": 16.902857142857144, + "grad_norm": 37.07241439819336, + "learning_rate": 3.677460317460318e-05, + "loss": 0.428, + "step": 2958 + }, + { + "epoch": 16.908571428571427, + "grad_norm": 297.666015625, + "learning_rate": 3.676825396825397e-05, + "loss": 0.52, + "step": 2959 + }, + { + "epoch": 16.914285714285715, + "grad_norm": 71.79993438720703, + "learning_rate": 3.676190476190476e-05, + "loss": 0.3673, + "step": 2960 + }, + { + "epoch": 16.92, + "grad_norm": 86.70881652832031, + "learning_rate": 3.675555555555556e-05, + "loss": 0.4297, + "step": 2961 + }, + { + "epoch": 16.925714285714285, + "grad_norm": 72.41195678710938, + "learning_rate": 3.674920634920635e-05, + "loss": 0.4856, + "step": 2962 + }, + { + "epoch": 16.931428571428572, + "grad_norm": 50.68089294433594, + "learning_rate": 3.674285714285715e-05, + "loss": 0.4572, + "step": 2963 + }, + { + "epoch": 16.937142857142856, + "grad_norm": 54.560829162597656, + "learning_rate": 3.673650793650793e-05, + "loss": 0.3642, + "step": 2964 + }, + { + "epoch": 16.942857142857143, + "grad_norm": 73.60107421875, + "learning_rate": 3.673015873015873e-05, + "loss": 0.3029, + "step": 2965 + }, + { + "epoch": 16.94857142857143, + "grad_norm": 67.49030303955078, + "learning_rate": 3.6723809523809525e-05, + "loss": 0.2984, + "step": 2966 + }, + { + "epoch": 16.954285714285714, + "grad_norm": 69.36986541748047, + "learning_rate": 3.671746031746032e-05, + "loss": 0.6765, + "step": 2967 + }, + { + "epoch": 16.96, + "grad_norm": 61.46893310546875, + "learning_rate": 3.671111111111111e-05, + "loss": 0.4871, + "step": 2968 + }, + { + "epoch": 16.965714285714284, + "grad_norm": 67.94396209716797, + "learning_rate": 3.670476190476191e-05, + "loss": 0.4181, + "step": 2969 + }, + { + "epoch": 16.97142857142857, + "grad_norm": 54.474754333496094, + "learning_rate": 3.6698412698412696e-05, + "loss": 0.4505, + "step": 2970 + }, + { + "epoch": 16.97714285714286, + "grad_norm": 29.335342407226562, + "learning_rate": 3.6692063492063495e-05, + "loss": 0.4793, + "step": 2971 + }, + { + "epoch": 16.982857142857142, + "grad_norm": 64.14628601074219, + "learning_rate": 3.668571428571429e-05, + "loss": 0.412, + "step": 2972 + }, + { + "epoch": 16.98857142857143, + "grad_norm": 90.38223266601562, + "learning_rate": 3.667936507936508e-05, + "loss": 0.3173, + "step": 2973 + }, + { + "epoch": 16.994285714285713, + "grad_norm": 61.129180908203125, + "learning_rate": 3.667301587301587e-05, + "loss": 0.2842, + "step": 2974 + }, + { + "epoch": 17.0, + "grad_norm": 238.58200073242188, + "learning_rate": 3.6666666666666666e-05, + "loss": 0.5036, + "step": 2975 + }, + { + "epoch": 17.0, + "eval_classes": 0, + "eval_loss": 0.624391496181488, + "eval_map": 0.917, + "eval_map_50": 0.9557, + "eval_map_75": 0.9461, + "eval_map_large": 0.9173, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.917, + "eval_map_small": -1.0, + "eval_mar_1": 0.7778, + "eval_mar_10": 0.9679, + "eval_mar_100": 0.9781, + "eval_mar_100_per_class": 0.9781, + "eval_mar_large": 0.9781, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 15.4052, + "eval_samples_per_second": 19.084, + "eval_steps_per_second": 2.402, + "step": 2975 + }, + { + "epoch": 17.005714285714287, + "grad_norm": 39.56782531738281, + "learning_rate": 3.666031746031746e-05, + "loss": 0.5377, + "step": 2976 + }, + { + "epoch": 17.01142857142857, + "grad_norm": 73.38939666748047, + "learning_rate": 3.665396825396826e-05, + "loss": 0.3228, + "step": 2977 + }, + { + "epoch": 17.017142857142858, + "grad_norm": 69.7122802734375, + "learning_rate": 3.664761904761905e-05, + "loss": 0.2551, + "step": 2978 + }, + { + "epoch": 17.02285714285714, + "grad_norm": 40.25721740722656, + "learning_rate": 3.664126984126984e-05, + "loss": 0.452, + "step": 2979 + }, + { + "epoch": 17.02857142857143, + "grad_norm": 101.58560943603516, + "learning_rate": 3.6634920634920636e-05, + "loss": 0.5753, + "step": 2980 + }, + { + "epoch": 17.034285714285716, + "grad_norm": 51.15162658691406, + "learning_rate": 3.662857142857143e-05, + "loss": 0.4272, + "step": 2981 + }, + { + "epoch": 17.04, + "grad_norm": 44.9673957824707, + "learning_rate": 3.662222222222223e-05, + "loss": 0.3446, + "step": 2982 + }, + { + "epoch": 17.045714285714286, + "grad_norm": 63.32594299316406, + "learning_rate": 3.661587301587302e-05, + "loss": 0.2733, + "step": 2983 + }, + { + "epoch": 17.05142857142857, + "grad_norm": 79.81805419921875, + "learning_rate": 3.660952380952381e-05, + "loss": 0.3849, + "step": 2984 + }, + { + "epoch": 17.057142857142857, + "grad_norm": 109.49251556396484, + "learning_rate": 3.6603174603174606e-05, + "loss": 0.6633, + "step": 2985 + }, + { + "epoch": 17.062857142857144, + "grad_norm": 55.362274169921875, + "learning_rate": 3.65968253968254e-05, + "loss": 0.2912, + "step": 2986 + }, + { + "epoch": 17.068571428571428, + "grad_norm": 28.252758026123047, + "learning_rate": 3.659047619047619e-05, + "loss": 0.346, + "step": 2987 + }, + { + "epoch": 17.074285714285715, + "grad_norm": 34.26996994018555, + "learning_rate": 3.658412698412699e-05, + "loss": 0.476, + "step": 2988 + }, + { + "epoch": 17.08, + "grad_norm": 28.678089141845703, + "learning_rate": 3.6577777777777776e-05, + "loss": 0.381, + "step": 2989 + }, + { + "epoch": 17.085714285714285, + "grad_norm": 46.957889556884766, + "learning_rate": 3.6571428571428576e-05, + "loss": 0.3678, + "step": 2990 + }, + { + "epoch": 17.091428571428573, + "grad_norm": 25.080982208251953, + "learning_rate": 3.656507936507937e-05, + "loss": 0.4421, + "step": 2991 + }, + { + "epoch": 17.097142857142856, + "grad_norm": 60.31837844848633, + "learning_rate": 3.655873015873016e-05, + "loss": 0.3178, + "step": 2992 + }, + { + "epoch": 17.102857142857143, + "grad_norm": 39.1340446472168, + "learning_rate": 3.6552380952380953e-05, + "loss": 0.5619, + "step": 2993 + }, + { + "epoch": 17.10857142857143, + "grad_norm": 106.81813049316406, + "learning_rate": 3.654603174603175e-05, + "loss": 0.419, + "step": 2994 + }, + { + "epoch": 17.114285714285714, + "grad_norm": 25.078277587890625, + "learning_rate": 3.653968253968254e-05, + "loss": 0.37, + "step": 2995 + }, + { + "epoch": 17.12, + "grad_norm": 26.536161422729492, + "learning_rate": 3.653333333333334e-05, + "loss": 0.4933, + "step": 2996 + }, + { + "epoch": 17.125714285714285, + "grad_norm": 32.90211868286133, + "learning_rate": 3.6526984126984124e-05, + "loss": 0.4095, + "step": 2997 + }, + { + "epoch": 17.13142857142857, + "grad_norm": 39.533451080322266, + "learning_rate": 3.6520634920634923e-05, + "loss": 0.4326, + "step": 2998 + }, + { + "epoch": 17.13714285714286, + "grad_norm": 23.58121109008789, + "learning_rate": 3.6514285714285716e-05, + "loss": 0.3752, + "step": 2999 + }, + { + "epoch": 17.142857142857142, + "grad_norm": 50.7238883972168, + "learning_rate": 3.650793650793651e-05, + "loss": 0.3307, + "step": 3000 + }, + { + "epoch": 17.14857142857143, + "grad_norm": 34.07426452636719, + "learning_rate": 3.65015873015873e-05, + "loss": 0.3281, + "step": 3001 + }, + { + "epoch": 17.154285714285713, + "grad_norm": 35.5304069519043, + "learning_rate": 3.64952380952381e-05, + "loss": 0.3887, + "step": 3002 + }, + { + "epoch": 17.16, + "grad_norm": 88.01673126220703, + "learning_rate": 3.648888888888889e-05, + "loss": 0.4569, + "step": 3003 + }, + { + "epoch": 17.165714285714287, + "grad_norm": 63.651283264160156, + "learning_rate": 3.6482539682539686e-05, + "loss": 0.4374, + "step": 3004 + }, + { + "epoch": 17.17142857142857, + "grad_norm": 25.01502799987793, + "learning_rate": 3.647619047619048e-05, + "loss": 0.6202, + "step": 3005 + }, + { + "epoch": 17.177142857142858, + "grad_norm": 75.9010009765625, + "learning_rate": 3.646984126984127e-05, + "loss": 0.5473, + "step": 3006 + }, + { + "epoch": 17.18285714285714, + "grad_norm": 276.6267395019531, + "learning_rate": 3.6463492063492064e-05, + "loss": 0.3612, + "step": 3007 + }, + { + "epoch": 17.18857142857143, + "grad_norm": 23.316967010498047, + "learning_rate": 3.6457142857142857e-05, + "loss": 0.3967, + "step": 3008 + }, + { + "epoch": 17.194285714285716, + "grad_norm": 81.98340606689453, + "learning_rate": 3.645079365079365e-05, + "loss": 0.4192, + "step": 3009 + }, + { + "epoch": 17.2, + "grad_norm": 33.65291976928711, + "learning_rate": 3.644444444444445e-05, + "loss": 0.3537, + "step": 3010 + }, + { + "epoch": 17.205714285714286, + "grad_norm": 35.97007751464844, + "learning_rate": 3.6438095238095234e-05, + "loss": 0.5064, + "step": 3011 + }, + { + "epoch": 17.21142857142857, + "grad_norm": 56.14579391479492, + "learning_rate": 3.6431746031746034e-05, + "loss": 0.3929, + "step": 3012 + }, + { + "epoch": 17.217142857142857, + "grad_norm": 278.3437194824219, + "learning_rate": 3.6425396825396827e-05, + "loss": 0.6538, + "step": 3013 + }, + { + "epoch": 17.222857142857144, + "grad_norm": 61.36874771118164, + "learning_rate": 3.641904761904762e-05, + "loss": 0.4674, + "step": 3014 + }, + { + "epoch": 17.228571428571428, + "grad_norm": 97.59931182861328, + "learning_rate": 3.641269841269841e-05, + "loss": 0.4182, + "step": 3015 + }, + { + "epoch": 17.234285714285715, + "grad_norm": 35.03517532348633, + "learning_rate": 3.640634920634921e-05, + "loss": 0.3226, + "step": 3016 + }, + { + "epoch": 17.24, + "grad_norm": 21.447011947631836, + "learning_rate": 3.6400000000000004e-05, + "loss": 0.411, + "step": 3017 + }, + { + "epoch": 17.245714285714286, + "grad_norm": 66.64205169677734, + "learning_rate": 3.6393650793650796e-05, + "loss": 0.3091, + "step": 3018 + }, + { + "epoch": 17.251428571428573, + "grad_norm": 53.64439392089844, + "learning_rate": 3.638730158730159e-05, + "loss": 0.591, + "step": 3019 + }, + { + "epoch": 17.257142857142856, + "grad_norm": 48.212242126464844, + "learning_rate": 3.638095238095238e-05, + "loss": 0.3899, + "step": 3020 + }, + { + "epoch": 17.262857142857143, + "grad_norm": 30.058061599731445, + "learning_rate": 3.637460317460318e-05, + "loss": 0.242, + "step": 3021 + }, + { + "epoch": 17.268571428571427, + "grad_norm": 31.290916442871094, + "learning_rate": 3.636825396825397e-05, + "loss": 0.3487, + "step": 3022 + }, + { + "epoch": 17.274285714285714, + "grad_norm": 67.60273742675781, + "learning_rate": 3.6361904761904766e-05, + "loss": 0.3153, + "step": 3023 + }, + { + "epoch": 17.28, + "grad_norm": 43.177005767822266, + "learning_rate": 3.635555555555556e-05, + "loss": 0.2997, + "step": 3024 + }, + { + "epoch": 17.285714285714285, + "grad_norm": 66.38561248779297, + "learning_rate": 3.634920634920635e-05, + "loss": 0.2976, + "step": 3025 + }, + { + "epoch": 17.291428571428572, + "grad_norm": 34.7242317199707, + "learning_rate": 3.6342857142857144e-05, + "loss": 0.2728, + "step": 3026 + }, + { + "epoch": 17.29714285714286, + "grad_norm": 22.105117797851562, + "learning_rate": 3.6336507936507944e-05, + "loss": 0.4381, + "step": 3027 + }, + { + "epoch": 17.302857142857142, + "grad_norm": 49.966773986816406, + "learning_rate": 3.633015873015873e-05, + "loss": 0.3749, + "step": 3028 + }, + { + "epoch": 17.30857142857143, + "grad_norm": 64.73253631591797, + "learning_rate": 3.632380952380953e-05, + "loss": 0.5557, + "step": 3029 + }, + { + "epoch": 17.314285714285713, + "grad_norm": 35.297607421875, + "learning_rate": 3.6317460317460315e-05, + "loss": 0.4947, + "step": 3030 + }, + { + "epoch": 17.32, + "grad_norm": 30.194656372070312, + "learning_rate": 3.6311111111111114e-05, + "loss": 0.3203, + "step": 3031 + }, + { + "epoch": 17.325714285714287, + "grad_norm": 125.94969177246094, + "learning_rate": 3.630476190476191e-05, + "loss": 0.6281, + "step": 3032 + }, + { + "epoch": 17.33142857142857, + "grad_norm": 671.4624633789062, + "learning_rate": 3.62984126984127e-05, + "loss": 0.2862, + "step": 3033 + }, + { + "epoch": 17.337142857142858, + "grad_norm": 33.021636962890625, + "learning_rate": 3.629206349206349e-05, + "loss": 0.3952, + "step": 3034 + }, + { + "epoch": 17.34285714285714, + "grad_norm": 56.70850372314453, + "learning_rate": 3.628571428571429e-05, + "loss": 0.4269, + "step": 3035 + }, + { + "epoch": 17.34857142857143, + "grad_norm": 35.1391487121582, + "learning_rate": 3.627936507936508e-05, + "loss": 0.3951, + "step": 3036 + }, + { + "epoch": 17.354285714285716, + "grad_norm": 46.28184509277344, + "learning_rate": 3.627301587301588e-05, + "loss": 0.3088, + "step": 3037 + }, + { + "epoch": 17.36, + "grad_norm": 26.50900650024414, + "learning_rate": 3.626666666666667e-05, + "loss": 0.3955, + "step": 3038 + }, + { + "epoch": 17.365714285714287, + "grad_norm": 29.32590103149414, + "learning_rate": 3.626031746031746e-05, + "loss": 0.3719, + "step": 3039 + }, + { + "epoch": 17.37142857142857, + "grad_norm": 50.76521301269531, + "learning_rate": 3.6253968253968255e-05, + "loss": 0.4154, + "step": 3040 + }, + { + "epoch": 17.377142857142857, + "grad_norm": 64.29402160644531, + "learning_rate": 3.624761904761905e-05, + "loss": 0.3531, + "step": 3041 + }, + { + "epoch": 17.382857142857144, + "grad_norm": 60.52572250366211, + "learning_rate": 3.624126984126984e-05, + "loss": 0.4497, + "step": 3042 + }, + { + "epoch": 17.388571428571428, + "grad_norm": 24.278873443603516, + "learning_rate": 3.623492063492064e-05, + "loss": 0.3828, + "step": 3043 + }, + { + "epoch": 17.394285714285715, + "grad_norm": 70.13561248779297, + "learning_rate": 3.6228571428571425e-05, + "loss": 0.3123, + "step": 3044 + }, + { + "epoch": 17.4, + "grad_norm": 63.94584274291992, + "learning_rate": 3.6222222222222225e-05, + "loss": 0.4073, + "step": 3045 + }, + { + "epoch": 17.405714285714286, + "grad_norm": 15.398472785949707, + "learning_rate": 3.621587301587302e-05, + "loss": 0.2719, + "step": 3046 + }, + { + "epoch": 17.411428571428573, + "grad_norm": 62.43785858154297, + "learning_rate": 3.620952380952381e-05, + "loss": 0.309, + "step": 3047 + }, + { + "epoch": 17.417142857142856, + "grad_norm": 69.08655548095703, + "learning_rate": 3.62031746031746e-05, + "loss": 0.2742, + "step": 3048 + }, + { + "epoch": 17.422857142857143, + "grad_norm": 45.39681625366211, + "learning_rate": 3.61968253968254e-05, + "loss": 0.3325, + "step": 3049 + }, + { + "epoch": 17.428571428571427, + "grad_norm": 39.39765930175781, + "learning_rate": 3.619047619047619e-05, + "loss": 0.5171, + "step": 3050 + }, + { + "epoch": 17.434285714285714, + "grad_norm": 92.37077331542969, + "learning_rate": 3.618412698412699e-05, + "loss": 0.5371, + "step": 3051 + }, + { + "epoch": 17.44, + "grad_norm": 32.013282775878906, + "learning_rate": 3.617777777777778e-05, + "loss": 0.3515, + "step": 3052 + }, + { + "epoch": 17.445714285714285, + "grad_norm": 71.93909454345703, + "learning_rate": 3.617142857142857e-05, + "loss": 0.3569, + "step": 3053 + }, + { + "epoch": 17.451428571428572, + "grad_norm": 30.76221466064453, + "learning_rate": 3.6165079365079365e-05, + "loss": 0.4061, + "step": 3054 + }, + { + "epoch": 17.457142857142856, + "grad_norm": 48.4053955078125, + "learning_rate": 3.615873015873016e-05, + "loss": 0.3483, + "step": 3055 + }, + { + "epoch": 17.462857142857143, + "grad_norm": 77.21479797363281, + "learning_rate": 3.615238095238096e-05, + "loss": 0.2637, + "step": 3056 + }, + { + "epoch": 17.46857142857143, + "grad_norm": 82.1701889038086, + "learning_rate": 3.614603174603175e-05, + "loss": 0.2644, + "step": 3057 + }, + { + "epoch": 17.474285714285713, + "grad_norm": 29.19261360168457, + "learning_rate": 3.613968253968254e-05, + "loss": 0.3375, + "step": 3058 + }, + { + "epoch": 17.48, + "grad_norm": 44.2556037902832, + "learning_rate": 3.6133333333333335e-05, + "loss": 0.3602, + "step": 3059 + }, + { + "epoch": 17.485714285714284, + "grad_norm": 47.6033935546875, + "learning_rate": 3.6126984126984135e-05, + "loss": 0.2414, + "step": 3060 + }, + { + "epoch": 17.49142857142857, + "grad_norm": 87.08003997802734, + "learning_rate": 3.612063492063492e-05, + "loss": 0.5281, + "step": 3061 + }, + { + "epoch": 17.497142857142858, + "grad_norm": 33.60626983642578, + "learning_rate": 3.611428571428572e-05, + "loss": 0.452, + "step": 3062 + }, + { + "epoch": 17.502857142857142, + "grad_norm": 40.42241287231445, + "learning_rate": 3.6107936507936506e-05, + "loss": 0.3873, + "step": 3063 + }, + { + "epoch": 17.50857142857143, + "grad_norm": 61.18794250488281, + "learning_rate": 3.6101587301587305e-05, + "loss": 0.3682, + "step": 3064 + }, + { + "epoch": 17.514285714285712, + "grad_norm": 39.720245361328125, + "learning_rate": 3.60952380952381e-05, + "loss": 0.3718, + "step": 3065 + }, + { + "epoch": 17.52, + "grad_norm": 39.689727783203125, + "learning_rate": 3.608888888888889e-05, + "loss": 0.5894, + "step": 3066 + }, + { + "epoch": 17.525714285714287, + "grad_norm": 39.23014831542969, + "learning_rate": 3.608253968253968e-05, + "loss": 0.4398, + "step": 3067 + }, + { + "epoch": 17.53142857142857, + "grad_norm": 40.88357162475586, + "learning_rate": 3.607619047619048e-05, + "loss": 0.3682, + "step": 3068 + }, + { + "epoch": 17.537142857142857, + "grad_norm": 73.21632385253906, + "learning_rate": 3.606984126984127e-05, + "loss": 0.2962, + "step": 3069 + }, + { + "epoch": 17.542857142857144, + "grad_norm": 42.4008674621582, + "learning_rate": 3.606349206349207e-05, + "loss": 0.4491, + "step": 3070 + }, + { + "epoch": 17.548571428571428, + "grad_norm": 92.43096923828125, + "learning_rate": 3.605714285714286e-05, + "loss": 0.34, + "step": 3071 + }, + { + "epoch": 17.554285714285715, + "grad_norm": 30.038530349731445, + "learning_rate": 3.605079365079365e-05, + "loss": 0.4729, + "step": 3072 + }, + { + "epoch": 17.56, + "grad_norm": 28.09479522705078, + "learning_rate": 3.6044444444444446e-05, + "loss": 0.3499, + "step": 3073 + }, + { + "epoch": 17.565714285714286, + "grad_norm": 115.12698364257812, + "learning_rate": 3.603809523809524e-05, + "loss": 0.3443, + "step": 3074 + }, + { + "epoch": 17.571428571428573, + "grad_norm": 40.825111389160156, + "learning_rate": 3.603174603174603e-05, + "loss": 0.6257, + "step": 3075 + }, + { + "epoch": 17.577142857142857, + "grad_norm": 28.286985397338867, + "learning_rate": 3.602539682539683e-05, + "loss": 0.484, + "step": 3076 + }, + { + "epoch": 17.582857142857144, + "grad_norm": 38.43437957763672, + "learning_rate": 3.6019047619047616e-05, + "loss": 0.2579, + "step": 3077 + }, + { + "epoch": 17.588571428571427, + "grad_norm": 123.68315887451172, + "learning_rate": 3.6012698412698416e-05, + "loss": 0.3633, + "step": 3078 + }, + { + "epoch": 17.594285714285714, + "grad_norm": 47.84682083129883, + "learning_rate": 3.600634920634921e-05, + "loss": 0.5472, + "step": 3079 + }, + { + "epoch": 17.6, + "grad_norm": 36.36436462402344, + "learning_rate": 3.6e-05, + "loss": 0.3362, + "step": 3080 + }, + { + "epoch": 17.605714285714285, + "grad_norm": 56.942665100097656, + "learning_rate": 3.5993650793650794e-05, + "loss": 0.5156, + "step": 3081 + }, + { + "epoch": 17.611428571428572, + "grad_norm": 396.4652099609375, + "learning_rate": 3.598730158730159e-05, + "loss": 0.4402, + "step": 3082 + }, + { + "epoch": 17.617142857142856, + "grad_norm": 25.519779205322266, + "learning_rate": 3.598095238095238e-05, + "loss": 0.3491, + "step": 3083 + }, + { + "epoch": 17.622857142857143, + "grad_norm": 29.500160217285156, + "learning_rate": 3.597460317460318e-05, + "loss": 0.4003, + "step": 3084 + }, + { + "epoch": 17.62857142857143, + "grad_norm": 65.59475708007812, + "learning_rate": 3.596825396825397e-05, + "loss": 0.4087, + "step": 3085 + }, + { + "epoch": 17.634285714285713, + "grad_norm": 90.88926696777344, + "learning_rate": 3.5961904761904764e-05, + "loss": 0.3308, + "step": 3086 + }, + { + "epoch": 17.64, + "grad_norm": 31.910295486450195, + "learning_rate": 3.5955555555555556e-05, + "loss": 0.3877, + "step": 3087 + }, + { + "epoch": 17.645714285714284, + "grad_norm": 40.765869140625, + "learning_rate": 3.594920634920635e-05, + "loss": 0.396, + "step": 3088 + }, + { + "epoch": 17.65142857142857, + "grad_norm": 22.41646385192871, + "learning_rate": 3.594285714285714e-05, + "loss": 0.3837, + "step": 3089 + }, + { + "epoch": 17.65714285714286, + "grad_norm": 77.16261291503906, + "learning_rate": 3.593650793650794e-05, + "loss": 0.3443, + "step": 3090 + }, + { + "epoch": 17.662857142857142, + "grad_norm": 33.615787506103516, + "learning_rate": 3.5930158730158733e-05, + "loss": 0.2343, + "step": 3091 + }, + { + "epoch": 17.66857142857143, + "grad_norm": 26.944686889648438, + "learning_rate": 3.5923809523809526e-05, + "loss": 0.429, + "step": 3092 + }, + { + "epoch": 17.674285714285713, + "grad_norm": 90.06888580322266, + "learning_rate": 3.591746031746032e-05, + "loss": 0.4934, + "step": 3093 + }, + { + "epoch": 17.68, + "grad_norm": 46.003074645996094, + "learning_rate": 3.591111111111111e-05, + "loss": 0.525, + "step": 3094 + }, + { + "epoch": 17.685714285714287, + "grad_norm": 67.316162109375, + "learning_rate": 3.590476190476191e-05, + "loss": 0.3631, + "step": 3095 + }, + { + "epoch": 17.69142857142857, + "grad_norm": 50.84532165527344, + "learning_rate": 3.58984126984127e-05, + "loss": 0.3075, + "step": 3096 + }, + { + "epoch": 17.697142857142858, + "grad_norm": 50.38624572753906, + "learning_rate": 3.5892063492063496e-05, + "loss": 0.4122, + "step": 3097 + }, + { + "epoch": 17.70285714285714, + "grad_norm": 49.8840217590332, + "learning_rate": 3.588571428571429e-05, + "loss": 0.3559, + "step": 3098 + }, + { + "epoch": 17.708571428571428, + "grad_norm": 48.15559005737305, + "learning_rate": 3.587936507936508e-05, + "loss": 0.5752, + "step": 3099 + }, + { + "epoch": 17.714285714285715, + "grad_norm": 62.33614730834961, + "learning_rate": 3.5873015873015874e-05, + "loss": 0.3423, + "step": 3100 + }, + { + "epoch": 17.72, + "grad_norm": 35.41064453125, + "learning_rate": 3.586666666666667e-05, + "loss": 0.3472, + "step": 3101 + }, + { + "epoch": 17.725714285714286, + "grad_norm": 53.6688346862793, + "learning_rate": 3.586031746031746e-05, + "loss": 0.4619, + "step": 3102 + }, + { + "epoch": 17.731428571428573, + "grad_norm": 36.99428939819336, + "learning_rate": 3.585396825396826e-05, + "loss": 0.3232, + "step": 3103 + }, + { + "epoch": 17.737142857142857, + "grad_norm": 377.96697998046875, + "learning_rate": 3.584761904761905e-05, + "loss": 0.4811, + "step": 3104 + }, + { + "epoch": 17.742857142857144, + "grad_norm": 34.98124694824219, + "learning_rate": 3.5841269841269844e-05, + "loss": 0.5067, + "step": 3105 + }, + { + "epoch": 17.748571428571427, + "grad_norm": 53.51387405395508, + "learning_rate": 3.5834920634920637e-05, + "loss": 0.4038, + "step": 3106 + }, + { + "epoch": 17.754285714285714, + "grad_norm": 63.63230514526367, + "learning_rate": 3.582857142857143e-05, + "loss": 0.3999, + "step": 3107 + }, + { + "epoch": 17.76, + "grad_norm": 85.73343658447266, + "learning_rate": 3.582222222222222e-05, + "loss": 0.3618, + "step": 3108 + }, + { + "epoch": 17.765714285714285, + "grad_norm": 45.70502853393555, + "learning_rate": 3.581587301587302e-05, + "loss": 0.2773, + "step": 3109 + }, + { + "epoch": 17.771428571428572, + "grad_norm": 46.77682113647461, + "learning_rate": 3.580952380952381e-05, + "loss": 0.4154, + "step": 3110 + }, + { + "epoch": 17.777142857142856, + "grad_norm": 87.49949645996094, + "learning_rate": 3.5803174603174607e-05, + "loss": 0.4066, + "step": 3111 + }, + { + "epoch": 17.782857142857143, + "grad_norm": 44.131649017333984, + "learning_rate": 3.57968253968254e-05, + "loss": 0.3703, + "step": 3112 + }, + { + "epoch": 17.78857142857143, + "grad_norm": 430.80548095703125, + "learning_rate": 3.579047619047619e-05, + "loss": 0.4413, + "step": 3113 + }, + { + "epoch": 17.794285714285714, + "grad_norm": 68.55734252929688, + "learning_rate": 3.5784126984126984e-05, + "loss": 0.3507, + "step": 3114 + }, + { + "epoch": 17.8, + "grad_norm": 55.162235260009766, + "learning_rate": 3.577777777777778e-05, + "loss": 0.3646, + "step": 3115 + }, + { + "epoch": 17.805714285714284, + "grad_norm": 76.46490478515625, + "learning_rate": 3.577142857142857e-05, + "loss": 0.4046, + "step": 3116 + }, + { + "epoch": 17.81142857142857, + "grad_norm": 30.54099464416504, + "learning_rate": 3.576507936507937e-05, + "loss": 0.4883, + "step": 3117 + }, + { + "epoch": 17.81714285714286, + "grad_norm": 94.63453674316406, + "learning_rate": 3.5758730158730155e-05, + "loss": 0.4853, + "step": 3118 + }, + { + "epoch": 17.822857142857142, + "grad_norm": 52.40849304199219, + "learning_rate": 3.5752380952380954e-05, + "loss": 0.3569, + "step": 3119 + }, + { + "epoch": 17.82857142857143, + "grad_norm": 77.03597259521484, + "learning_rate": 3.574603174603175e-05, + "loss": 0.3342, + "step": 3120 + }, + { + "epoch": 17.834285714285713, + "grad_norm": 31.358612060546875, + "learning_rate": 3.573968253968254e-05, + "loss": 0.3779, + "step": 3121 + }, + { + "epoch": 17.84, + "grad_norm": 82.40419006347656, + "learning_rate": 3.573333333333333e-05, + "loss": 0.364, + "step": 3122 + }, + { + "epoch": 17.845714285714287, + "grad_norm": 142.53746032714844, + "learning_rate": 3.572698412698413e-05, + "loss": 0.4036, + "step": 3123 + }, + { + "epoch": 17.85142857142857, + "grad_norm": 70.76148986816406, + "learning_rate": 3.5720634920634924e-05, + "loss": 0.3868, + "step": 3124 + }, + { + "epoch": 17.857142857142858, + "grad_norm": 38.553592681884766, + "learning_rate": 3.571428571428572e-05, + "loss": 0.4233, + "step": 3125 + }, + { + "epoch": 17.86285714285714, + "grad_norm": 95.4551773071289, + "learning_rate": 3.570793650793651e-05, + "loss": 0.336, + "step": 3126 + }, + { + "epoch": 17.86857142857143, + "grad_norm": 46.99102783203125, + "learning_rate": 3.57015873015873e-05, + "loss": 0.3251, + "step": 3127 + }, + { + "epoch": 17.874285714285715, + "grad_norm": 39.050758361816406, + "learning_rate": 3.5695238095238095e-05, + "loss": 0.405, + "step": 3128 + }, + { + "epoch": 17.88, + "grad_norm": 113.9549331665039, + "learning_rate": 3.568888888888889e-05, + "loss": 0.3941, + "step": 3129 + }, + { + "epoch": 17.885714285714286, + "grad_norm": 65.36485290527344, + "learning_rate": 3.568253968253969e-05, + "loss": 0.3726, + "step": 3130 + }, + { + "epoch": 17.89142857142857, + "grad_norm": 39.975582122802734, + "learning_rate": 3.567619047619048e-05, + "loss": 0.2399, + "step": 3131 + }, + { + "epoch": 17.897142857142857, + "grad_norm": 42.00217819213867, + "learning_rate": 3.566984126984127e-05, + "loss": 0.4181, + "step": 3132 + }, + { + "epoch": 17.902857142857144, + "grad_norm": 54.97571563720703, + "learning_rate": 3.5663492063492065e-05, + "loss": 0.5467, + "step": 3133 + }, + { + "epoch": 17.908571428571427, + "grad_norm": 67.74969482421875, + "learning_rate": 3.5657142857142864e-05, + "loss": 0.7542, + "step": 3134 + }, + { + "epoch": 17.914285714285715, + "grad_norm": 98.58827209472656, + "learning_rate": 3.565079365079365e-05, + "loss": 0.5048, + "step": 3135 + }, + { + "epoch": 17.92, + "grad_norm": 47.780155181884766, + "learning_rate": 3.564444444444445e-05, + "loss": 0.3109, + "step": 3136 + }, + { + "epoch": 17.925714285714285, + "grad_norm": 43.75344467163086, + "learning_rate": 3.5638095238095235e-05, + "loss": 0.3467, + "step": 3137 + }, + { + "epoch": 17.931428571428572, + "grad_norm": 66.09368896484375, + "learning_rate": 3.5631746031746035e-05, + "loss": 0.3271, + "step": 3138 + }, + { + "epoch": 17.937142857142856, + "grad_norm": 25.249197006225586, + "learning_rate": 3.562539682539683e-05, + "loss": 0.5889, + "step": 3139 + }, + { + "epoch": 17.942857142857143, + "grad_norm": 43.17115783691406, + "learning_rate": 3.561904761904762e-05, + "loss": 0.3516, + "step": 3140 + }, + { + "epoch": 17.94857142857143, + "grad_norm": 54.95415496826172, + "learning_rate": 3.561269841269841e-05, + "loss": 0.4138, + "step": 3141 + }, + { + "epoch": 17.954285714285714, + "grad_norm": 52.29851531982422, + "learning_rate": 3.560634920634921e-05, + "loss": 0.3052, + "step": 3142 + }, + { + "epoch": 17.96, + "grad_norm": 51.46090316772461, + "learning_rate": 3.56e-05, + "loss": 0.5461, + "step": 3143 + }, + { + "epoch": 17.965714285714284, + "grad_norm": 76.5407943725586, + "learning_rate": 3.55936507936508e-05, + "loss": 0.3495, + "step": 3144 + }, + { + "epoch": 17.97142857142857, + "grad_norm": 69.67864990234375, + "learning_rate": 3.558730158730159e-05, + "loss": 0.5133, + "step": 3145 + }, + { + "epoch": 17.97714285714286, + "grad_norm": 61.10658645629883, + "learning_rate": 3.558095238095238e-05, + "loss": 0.44, + "step": 3146 + }, + { + "epoch": 17.982857142857142, + "grad_norm": 25.05204963684082, + "learning_rate": 3.5574603174603175e-05, + "loss": 0.6404, + "step": 3147 + }, + { + "epoch": 17.98857142857143, + "grad_norm": 786.7589111328125, + "learning_rate": 3.556825396825397e-05, + "loss": 0.5873, + "step": 3148 + }, + { + "epoch": 17.994285714285713, + "grad_norm": 46.797298431396484, + "learning_rate": 3.556190476190476e-05, + "loss": 0.3733, + "step": 3149 + }, + { + "epoch": 18.0, + "grad_norm": 103.56895446777344, + "learning_rate": 3.555555555555556e-05, + "loss": 0.4865, + "step": 3150 + }, + { + "epoch": 18.0, + "eval_classes": 0, + "eval_loss": 0.6253594160079956, + "eval_map": 0.9198, + "eval_map_50": 0.9628, + "eval_map_75": 0.9522, + "eval_map_large": 0.9199, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9198, + "eval_map_small": -1.0, + "eval_mar_1": 0.7806, + "eval_mar_10": 0.966, + "eval_mar_100": 0.9756, + "eval_mar_100_per_class": 0.9756, + "eval_mar_large": 0.9756, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 14.5229, + "eval_samples_per_second": 20.244, + "eval_steps_per_second": 2.548, + "step": 3150 + }, + { + "epoch": 18.005714285714287, + "grad_norm": 43.199241638183594, + "learning_rate": 3.5549206349206346e-05, + "loss": 0.3592, + "step": 3151 + }, + { + "epoch": 18.01142857142857, + "grad_norm": 55.78414535522461, + "learning_rate": 3.5542857142857145e-05, + "loss": 0.3606, + "step": 3152 + }, + { + "epoch": 18.017142857142858, + "grad_norm": 30.153759002685547, + "learning_rate": 3.553650793650794e-05, + "loss": 0.4209, + "step": 3153 + }, + { + "epoch": 18.02285714285714, + "grad_norm": 36.56952667236328, + "learning_rate": 3.553015873015873e-05, + "loss": 0.3696, + "step": 3154 + }, + { + "epoch": 18.02857142857143, + "grad_norm": 40.697784423828125, + "learning_rate": 3.552380952380952e-05, + "loss": 0.2977, + "step": 3155 + }, + { + "epoch": 18.034285714285716, + "grad_norm": 23.288288116455078, + "learning_rate": 3.551746031746032e-05, + "loss": 0.3205, + "step": 3156 + }, + { + "epoch": 18.04, + "grad_norm": 40.85267639160156, + "learning_rate": 3.551111111111111e-05, + "loss": 0.4282, + "step": 3157 + }, + { + "epoch": 18.045714285714286, + "grad_norm": 94.27670288085938, + "learning_rate": 3.550476190476191e-05, + "loss": 0.3684, + "step": 3158 + }, + { + "epoch": 18.05142857142857, + "grad_norm": 76.08488464355469, + "learning_rate": 3.54984126984127e-05, + "loss": 0.3418, + "step": 3159 + }, + { + "epoch": 18.057142857142857, + "grad_norm": 83.23991394042969, + "learning_rate": 3.549206349206349e-05, + "loss": 0.3182, + "step": 3160 + }, + { + "epoch": 18.062857142857144, + "grad_norm": 44.40380859375, + "learning_rate": 3.5485714285714286e-05, + "loss": 0.3782, + "step": 3161 + }, + { + "epoch": 18.068571428571428, + "grad_norm": 41.11808395385742, + "learning_rate": 3.547936507936508e-05, + "loss": 0.2367, + "step": 3162 + }, + { + "epoch": 18.074285714285715, + "grad_norm": 57.094913482666016, + "learning_rate": 3.547301587301588e-05, + "loss": 0.2702, + "step": 3163 + }, + { + "epoch": 18.08, + "grad_norm": 63.90068054199219, + "learning_rate": 3.546666666666667e-05, + "loss": 0.3866, + "step": 3164 + }, + { + "epoch": 18.085714285714285, + "grad_norm": 52.84870529174805, + "learning_rate": 3.546031746031746e-05, + "loss": 0.2268, + "step": 3165 + }, + { + "epoch": 18.091428571428573, + "grad_norm": 161.87777709960938, + "learning_rate": 3.5453968253968256e-05, + "loss": 0.3524, + "step": 3166 + }, + { + "epoch": 18.097142857142856, + "grad_norm": 65.514404296875, + "learning_rate": 3.5447619047619055e-05, + "loss": 0.4595, + "step": 3167 + }, + { + "epoch": 18.102857142857143, + "grad_norm": 33.12297821044922, + "learning_rate": 3.544126984126984e-05, + "loss": 0.377, + "step": 3168 + }, + { + "epoch": 18.10857142857143, + "grad_norm": 65.4037857055664, + "learning_rate": 3.543492063492064e-05, + "loss": 0.3687, + "step": 3169 + }, + { + "epoch": 18.114285714285714, + "grad_norm": 62.527587890625, + "learning_rate": 3.5428571428571426e-05, + "loss": 0.2815, + "step": 3170 + }, + { + "epoch": 18.12, + "grad_norm": 57.75078582763672, + "learning_rate": 3.5422222222222226e-05, + "loss": 0.346, + "step": 3171 + }, + { + "epoch": 18.125714285714285, + "grad_norm": 39.5456657409668, + "learning_rate": 3.541587301587302e-05, + "loss": 0.4617, + "step": 3172 + }, + { + "epoch": 18.13142857142857, + "grad_norm": 51.52202606201172, + "learning_rate": 3.540952380952381e-05, + "loss": 0.2961, + "step": 3173 + }, + { + "epoch": 18.13714285714286, + "grad_norm": 43.48557662963867, + "learning_rate": 3.5403174603174604e-05, + "loss": 0.4519, + "step": 3174 + }, + { + "epoch": 18.142857142857142, + "grad_norm": 61.564796447753906, + "learning_rate": 3.53968253968254e-05, + "loss": 0.2732, + "step": 3175 + }, + { + "epoch": 18.14857142857143, + "grad_norm": 454.7781677246094, + "learning_rate": 3.539047619047619e-05, + "loss": 0.4123, + "step": 3176 + }, + { + "epoch": 18.154285714285713, + "grad_norm": 29.593908309936523, + "learning_rate": 3.538412698412699e-05, + "loss": 0.5679, + "step": 3177 + }, + { + "epoch": 18.16, + "grad_norm": 85.99415588378906, + "learning_rate": 3.537777777777778e-05, + "loss": 0.4035, + "step": 3178 + }, + { + "epoch": 18.165714285714287, + "grad_norm": 95.85099029541016, + "learning_rate": 3.5371428571428574e-05, + "loss": 0.3839, + "step": 3179 + }, + { + "epoch": 18.17142857142857, + "grad_norm": 49.89384460449219, + "learning_rate": 3.5365079365079366e-05, + "loss": 0.4968, + "step": 3180 + }, + { + "epoch": 18.177142857142858, + "grad_norm": 118.2928466796875, + "learning_rate": 3.535873015873016e-05, + "loss": 0.3729, + "step": 3181 + }, + { + "epoch": 18.18285714285714, + "grad_norm": 87.427734375, + "learning_rate": 3.535238095238095e-05, + "loss": 0.4553, + "step": 3182 + }, + { + "epoch": 18.18857142857143, + "grad_norm": 71.05023956298828, + "learning_rate": 3.534603174603175e-05, + "loss": 0.3504, + "step": 3183 + }, + { + "epoch": 18.194285714285716, + "grad_norm": 76.40508270263672, + "learning_rate": 3.533968253968254e-05, + "loss": 0.245, + "step": 3184 + }, + { + "epoch": 18.2, + "grad_norm": 61.27660369873047, + "learning_rate": 3.5333333333333336e-05, + "loss": 0.2832, + "step": 3185 + }, + { + "epoch": 18.205714285714286, + "grad_norm": 38.750450134277344, + "learning_rate": 3.532698412698413e-05, + "loss": 0.4819, + "step": 3186 + }, + { + "epoch": 18.21142857142857, + "grad_norm": 38.24382400512695, + "learning_rate": 3.532063492063492e-05, + "loss": 0.2887, + "step": 3187 + }, + { + "epoch": 18.217142857142857, + "grad_norm": 72.64383697509766, + "learning_rate": 3.5314285714285714e-05, + "loss": 0.3745, + "step": 3188 + }, + { + "epoch": 18.222857142857144, + "grad_norm": 42.5111083984375, + "learning_rate": 3.5307936507936513e-05, + "loss": 0.3943, + "step": 3189 + }, + { + "epoch": 18.228571428571428, + "grad_norm": 54.62807083129883, + "learning_rate": 3.53015873015873e-05, + "loss": 0.4434, + "step": 3190 + }, + { + "epoch": 18.234285714285715, + "grad_norm": 63.11427688598633, + "learning_rate": 3.52952380952381e-05, + "loss": 0.4309, + "step": 3191 + }, + { + "epoch": 18.24, + "grad_norm": 42.79474639892578, + "learning_rate": 3.528888888888889e-05, + "loss": 0.3559, + "step": 3192 + }, + { + "epoch": 18.245714285714286, + "grad_norm": 48.369224548339844, + "learning_rate": 3.5282539682539684e-05, + "loss": 0.3314, + "step": 3193 + }, + { + "epoch": 18.251428571428573, + "grad_norm": 74.17704010009766, + "learning_rate": 3.5276190476190477e-05, + "loss": 0.4334, + "step": 3194 + }, + { + "epoch": 18.257142857142856, + "grad_norm": 67.38375091552734, + "learning_rate": 3.526984126984127e-05, + "loss": 0.5354, + "step": 3195 + }, + { + "epoch": 18.262857142857143, + "grad_norm": 30.76678466796875, + "learning_rate": 3.526349206349206e-05, + "loss": 0.3759, + "step": 3196 + }, + { + "epoch": 18.268571428571427, + "grad_norm": 36.84651184082031, + "learning_rate": 3.525714285714286e-05, + "loss": 0.3854, + "step": 3197 + }, + { + "epoch": 18.274285714285714, + "grad_norm": 37.59387969970703, + "learning_rate": 3.5250793650793654e-05, + "loss": 0.3735, + "step": 3198 + }, + { + "epoch": 18.28, + "grad_norm": 64.6196517944336, + "learning_rate": 3.5244444444444447e-05, + "loss": 0.3306, + "step": 3199 + }, + { + "epoch": 18.285714285714285, + "grad_norm": 59.37558364868164, + "learning_rate": 3.523809523809524e-05, + "loss": 0.3057, + "step": 3200 + }, + { + "epoch": 18.291428571428572, + "grad_norm": 113.92902374267578, + "learning_rate": 3.523174603174603e-05, + "loss": 0.4922, + "step": 3201 + }, + { + "epoch": 18.29714285714286, + "grad_norm": 85.95907592773438, + "learning_rate": 3.522539682539683e-05, + "loss": 0.4439, + "step": 3202 + }, + { + "epoch": 18.302857142857142, + "grad_norm": 74.69219207763672, + "learning_rate": 3.521904761904762e-05, + "loss": 0.3237, + "step": 3203 + }, + { + "epoch": 18.30857142857143, + "grad_norm": 101.25186920166016, + "learning_rate": 3.5212698412698417e-05, + "loss": 0.3955, + "step": 3204 + }, + { + "epoch": 18.314285714285713, + "grad_norm": 84.63744354248047, + "learning_rate": 3.520634920634921e-05, + "loss": 0.3627, + "step": 3205 + }, + { + "epoch": 18.32, + "grad_norm": 84.56731414794922, + "learning_rate": 3.52e-05, + "loss": 0.3336, + "step": 3206 + }, + { + "epoch": 18.325714285714287, + "grad_norm": 44.616302490234375, + "learning_rate": 3.5193650793650794e-05, + "loss": 0.3344, + "step": 3207 + }, + { + "epoch": 18.33142857142857, + "grad_norm": 69.16703033447266, + "learning_rate": 3.5187301587301594e-05, + "loss": 0.3907, + "step": 3208 + }, + { + "epoch": 18.337142857142858, + "grad_norm": 79.36182403564453, + "learning_rate": 3.518095238095238e-05, + "loss": 0.3139, + "step": 3209 + }, + { + "epoch": 18.34285714285714, + "grad_norm": 85.03105926513672, + "learning_rate": 3.517460317460318e-05, + "loss": 0.2982, + "step": 3210 + }, + { + "epoch": 18.34857142857143, + "grad_norm": 39.43722915649414, + "learning_rate": 3.516825396825397e-05, + "loss": 0.3983, + "step": 3211 + }, + { + "epoch": 18.354285714285716, + "grad_norm": 41.102115631103516, + "learning_rate": 3.5161904761904764e-05, + "loss": 0.5258, + "step": 3212 + }, + { + "epoch": 18.36, + "grad_norm": 36.52622985839844, + "learning_rate": 3.515555555555556e-05, + "loss": 0.3115, + "step": 3213 + }, + { + "epoch": 18.365714285714287, + "grad_norm": 76.81031036376953, + "learning_rate": 3.514920634920635e-05, + "loss": 0.4972, + "step": 3214 + }, + { + "epoch": 18.37142857142857, + "grad_norm": 54.84734344482422, + "learning_rate": 3.514285714285714e-05, + "loss": 0.3642, + "step": 3215 + }, + { + "epoch": 18.377142857142857, + "grad_norm": 100.14500427246094, + "learning_rate": 3.513650793650794e-05, + "loss": 0.6879, + "step": 3216 + }, + { + "epoch": 18.382857142857144, + "grad_norm": 36.46712112426758, + "learning_rate": 3.513015873015873e-05, + "loss": 0.2939, + "step": 3217 + }, + { + "epoch": 18.388571428571428, + "grad_norm": 62.769412994384766, + "learning_rate": 3.512380952380953e-05, + "loss": 0.9316, + "step": 3218 + }, + { + "epoch": 18.394285714285715, + "grad_norm": 39.455169677734375, + "learning_rate": 3.511746031746032e-05, + "loss": 0.3678, + "step": 3219 + }, + { + "epoch": 18.4, + "grad_norm": 325.67950439453125, + "learning_rate": 3.511111111111111e-05, + "loss": 0.3816, + "step": 3220 + }, + { + "epoch": 18.405714285714286, + "grad_norm": 79.74323272705078, + "learning_rate": 3.5104761904761905e-05, + "loss": 0.5354, + "step": 3221 + }, + { + "epoch": 18.411428571428573, + "grad_norm": 197.36605834960938, + "learning_rate": 3.5098412698412704e-05, + "loss": 0.3999, + "step": 3222 + }, + { + "epoch": 18.417142857142856, + "grad_norm": 85.20283508300781, + "learning_rate": 3.509206349206349e-05, + "loss": 0.4736, + "step": 3223 + }, + { + "epoch": 18.422857142857143, + "grad_norm": 212.142578125, + "learning_rate": 3.508571428571429e-05, + "loss": 0.5339, + "step": 3224 + }, + { + "epoch": 18.428571428571427, + "grad_norm": 83.710205078125, + "learning_rate": 3.5079365079365075e-05, + "loss": 0.3566, + "step": 3225 + }, + { + "epoch": 18.434285714285714, + "grad_norm": 27.423030853271484, + "learning_rate": 3.5073015873015875e-05, + "loss": 0.3837, + "step": 3226 + }, + { + "epoch": 18.44, + "grad_norm": 50.2109260559082, + "learning_rate": 3.506666666666667e-05, + "loss": 0.3335, + "step": 3227 + }, + { + "epoch": 18.445714285714285, + "grad_norm": 28.182552337646484, + "learning_rate": 3.506031746031746e-05, + "loss": 0.4677, + "step": 3228 + }, + { + "epoch": 18.451428571428572, + "grad_norm": 36.24056625366211, + "learning_rate": 3.505396825396825e-05, + "loss": 0.2841, + "step": 3229 + }, + { + "epoch": 18.457142857142856, + "grad_norm": 28.210988998413086, + "learning_rate": 3.504761904761905e-05, + "loss": 0.3922, + "step": 3230 + }, + { + "epoch": 18.462857142857143, + "grad_norm": 57.17612075805664, + "learning_rate": 3.5041269841269845e-05, + "loss": 0.3097, + "step": 3231 + }, + { + "epoch": 18.46857142857143, + "grad_norm": 27.290563583374023, + "learning_rate": 3.503492063492064e-05, + "loss": 0.3782, + "step": 3232 + }, + { + "epoch": 18.474285714285713, + "grad_norm": 63.29109191894531, + "learning_rate": 3.502857142857143e-05, + "loss": 0.3363, + "step": 3233 + }, + { + "epoch": 18.48, + "grad_norm": 43.08891296386719, + "learning_rate": 3.502222222222222e-05, + "loss": 0.509, + "step": 3234 + }, + { + "epoch": 18.485714285714284, + "grad_norm": 57.84274673461914, + "learning_rate": 3.5015873015873015e-05, + "loss": 0.2903, + "step": 3235 + }, + { + "epoch": 18.49142857142857, + "grad_norm": 29.947025299072266, + "learning_rate": 3.500952380952381e-05, + "loss": 0.5381, + "step": 3236 + }, + { + "epoch": 18.497142857142858, + "grad_norm": 27.890987396240234, + "learning_rate": 3.500317460317461e-05, + "loss": 0.2969, + "step": 3237 + }, + { + "epoch": 18.502857142857142, + "grad_norm": 52.41931915283203, + "learning_rate": 3.49968253968254e-05, + "loss": 0.3769, + "step": 3238 + }, + { + "epoch": 18.50857142857143, + "grad_norm": 40.1724967956543, + "learning_rate": 3.499047619047619e-05, + "loss": 0.4841, + "step": 3239 + }, + { + "epoch": 18.514285714285712, + "grad_norm": 89.99263763427734, + "learning_rate": 3.4984126984126985e-05, + "loss": 0.5822, + "step": 3240 + }, + { + "epoch": 18.52, + "grad_norm": 36.47848129272461, + "learning_rate": 3.4977777777777785e-05, + "loss": 0.3401, + "step": 3241 + }, + { + "epoch": 18.525714285714287, + "grad_norm": 53.10023498535156, + "learning_rate": 3.497142857142857e-05, + "loss": 0.3334, + "step": 3242 + }, + { + "epoch": 18.53142857142857, + "grad_norm": 25.754384994506836, + "learning_rate": 3.496507936507937e-05, + "loss": 0.3938, + "step": 3243 + }, + { + "epoch": 18.537142857142857, + "grad_norm": 220.01368713378906, + "learning_rate": 3.495873015873016e-05, + "loss": 0.6147, + "step": 3244 + }, + { + "epoch": 18.542857142857144, + "grad_norm": 32.224937438964844, + "learning_rate": 3.4952380952380955e-05, + "loss": 0.3356, + "step": 3245 + }, + { + "epoch": 18.548571428571428, + "grad_norm": 66.4201431274414, + "learning_rate": 3.494603174603175e-05, + "loss": 0.4077, + "step": 3246 + }, + { + "epoch": 18.554285714285715, + "grad_norm": 112.10049438476562, + "learning_rate": 3.493968253968254e-05, + "loss": 0.3764, + "step": 3247 + }, + { + "epoch": 18.56, + "grad_norm": 44.7986946105957, + "learning_rate": 3.493333333333333e-05, + "loss": 0.329, + "step": 3248 + }, + { + "epoch": 18.565714285714286, + "grad_norm": 93.60568237304688, + "learning_rate": 3.492698412698413e-05, + "loss": 0.3607, + "step": 3249 + }, + { + "epoch": 18.571428571428573, + "grad_norm": 65.51329040527344, + "learning_rate": 3.492063492063492e-05, + "loss": 0.3553, + "step": 3250 + }, + { + "epoch": 18.577142857142857, + "grad_norm": 76.55573272705078, + "learning_rate": 3.491428571428572e-05, + "loss": 0.4236, + "step": 3251 + }, + { + "epoch": 18.582857142857144, + "grad_norm": 28.30959701538086, + "learning_rate": 3.490793650793651e-05, + "loss": 0.5552, + "step": 3252 + }, + { + "epoch": 18.588571428571427, + "grad_norm": 537.5946044921875, + "learning_rate": 3.49015873015873e-05, + "loss": 0.3013, + "step": 3253 + }, + { + "epoch": 18.594285714285714, + "grad_norm": 37.939945220947266, + "learning_rate": 3.4895238095238096e-05, + "loss": 0.3303, + "step": 3254 + }, + { + "epoch": 18.6, + "grad_norm": 21.929025650024414, + "learning_rate": 3.4888888888888895e-05, + "loss": 0.378, + "step": 3255 + }, + { + "epoch": 18.605714285714285, + "grad_norm": 62.76546859741211, + "learning_rate": 3.488253968253968e-05, + "loss": 0.3737, + "step": 3256 + }, + { + "epoch": 18.611428571428572, + "grad_norm": 43.64086151123047, + "learning_rate": 3.487619047619048e-05, + "loss": 0.375, + "step": 3257 + }, + { + "epoch": 18.617142857142856, + "grad_norm": 49.89305114746094, + "learning_rate": 3.4869841269841266e-05, + "loss": 0.409, + "step": 3258 + }, + { + "epoch": 18.622857142857143, + "grad_norm": 41.23617935180664, + "learning_rate": 3.4863492063492066e-05, + "loss": 0.3682, + "step": 3259 + }, + { + "epoch": 18.62857142857143, + "grad_norm": 61.988040924072266, + "learning_rate": 3.485714285714286e-05, + "loss": 0.432, + "step": 3260 + }, + { + "epoch": 18.634285714285713, + "grad_norm": 148.89205932617188, + "learning_rate": 3.485079365079365e-05, + "loss": 0.4126, + "step": 3261 + }, + { + "epoch": 18.64, + "grad_norm": 56.83814239501953, + "learning_rate": 3.4844444444444444e-05, + "loss": 0.3521, + "step": 3262 + }, + { + "epoch": 18.645714285714284, + "grad_norm": 31.013830184936523, + "learning_rate": 3.483809523809524e-05, + "loss": 0.4353, + "step": 3263 + }, + { + "epoch": 18.65142857142857, + "grad_norm": 61.98045349121094, + "learning_rate": 3.483174603174603e-05, + "loss": 0.35, + "step": 3264 + }, + { + "epoch": 18.65714285714286, + "grad_norm": 18.87017250061035, + "learning_rate": 3.482539682539683e-05, + "loss": 0.2831, + "step": 3265 + }, + { + "epoch": 18.662857142857142, + "grad_norm": 22.309398651123047, + "learning_rate": 3.481904761904762e-05, + "loss": 0.3345, + "step": 3266 + }, + { + "epoch": 18.66857142857143, + "grad_norm": 74.41686248779297, + "learning_rate": 3.4812698412698414e-05, + "loss": 0.4046, + "step": 3267 + }, + { + "epoch": 18.674285714285713, + "grad_norm": 55.20325469970703, + "learning_rate": 3.4806349206349206e-05, + "loss": 0.3444, + "step": 3268 + }, + { + "epoch": 18.68, + "grad_norm": 41.73289489746094, + "learning_rate": 3.48e-05, + "loss": 0.4791, + "step": 3269 + }, + { + "epoch": 18.685714285714287, + "grad_norm": 52.391178131103516, + "learning_rate": 3.47936507936508e-05, + "loss": 0.488, + "step": 3270 + }, + { + "epoch": 18.69142857142857, + "grad_norm": 41.188087463378906, + "learning_rate": 3.478730158730159e-05, + "loss": 0.3235, + "step": 3271 + }, + { + "epoch": 18.697142857142858, + "grad_norm": 88.90873718261719, + "learning_rate": 3.4780952380952384e-05, + "loss": 0.3831, + "step": 3272 + }, + { + "epoch": 18.70285714285714, + "grad_norm": 46.88115692138672, + "learning_rate": 3.4774603174603176e-05, + "loss": 0.3078, + "step": 3273 + }, + { + "epoch": 18.708571428571428, + "grad_norm": 18.235933303833008, + "learning_rate": 3.476825396825397e-05, + "loss": 0.464, + "step": 3274 + }, + { + "epoch": 18.714285714285715, + "grad_norm": 53.559288024902344, + "learning_rate": 3.476190476190476e-05, + "loss": 0.3916, + "step": 3275 + }, + { + "epoch": 18.72, + "grad_norm": 23.915830612182617, + "learning_rate": 3.475555555555556e-05, + "loss": 0.3189, + "step": 3276 + }, + { + "epoch": 18.725714285714286, + "grad_norm": 38.997554779052734, + "learning_rate": 3.4749206349206353e-05, + "loss": 0.3874, + "step": 3277 + }, + { + "epoch": 18.731428571428573, + "grad_norm": 32.495235443115234, + "learning_rate": 3.4742857142857146e-05, + "loss": 0.2955, + "step": 3278 + }, + { + "epoch": 18.737142857142857, + "grad_norm": 26.708459854125977, + "learning_rate": 3.473650793650794e-05, + "loss": 0.4058, + "step": 3279 + }, + { + "epoch": 18.742857142857144, + "grad_norm": 31.728487014770508, + "learning_rate": 3.473015873015873e-05, + "loss": 0.4113, + "step": 3280 + }, + { + "epoch": 18.748571428571427, + "grad_norm": 63.8240852355957, + "learning_rate": 3.4723809523809524e-05, + "loss": 0.2827, + "step": 3281 + }, + { + "epoch": 18.754285714285714, + "grad_norm": 54.470394134521484, + "learning_rate": 3.4717460317460323e-05, + "loss": 0.3469, + "step": 3282 + }, + { + "epoch": 18.76, + "grad_norm": 65.894775390625, + "learning_rate": 3.471111111111111e-05, + "loss": 0.358, + "step": 3283 + }, + { + "epoch": 18.765714285714285, + "grad_norm": 56.5354118347168, + "learning_rate": 3.470476190476191e-05, + "loss": 0.2944, + "step": 3284 + }, + { + "epoch": 18.771428571428572, + "grad_norm": 42.3811149597168, + "learning_rate": 3.46984126984127e-05, + "loss": 0.4474, + "step": 3285 + }, + { + "epoch": 18.777142857142856, + "grad_norm": 67.40564727783203, + "learning_rate": 3.4692063492063494e-05, + "loss": 0.3332, + "step": 3286 + }, + { + "epoch": 18.782857142857143, + "grad_norm": 30.77213478088379, + "learning_rate": 3.468571428571429e-05, + "loss": 0.4628, + "step": 3287 + }, + { + "epoch": 18.78857142857143, + "grad_norm": 15.608197212219238, + "learning_rate": 3.4679365079365086e-05, + "loss": 0.3094, + "step": 3288 + }, + { + "epoch": 18.794285714285714, + "grad_norm": 15.401975631713867, + "learning_rate": 3.467301587301587e-05, + "loss": 0.3165, + "step": 3289 + }, + { + "epoch": 18.8, + "grad_norm": 32.28103256225586, + "learning_rate": 3.466666666666667e-05, + "loss": 0.2876, + "step": 3290 + }, + { + "epoch": 18.805714285714284, + "grad_norm": 84.35736846923828, + "learning_rate": 3.466031746031746e-05, + "loss": 0.2955, + "step": 3291 + }, + { + "epoch": 18.81142857142857, + "grad_norm": 44.69303512573242, + "learning_rate": 3.4653968253968257e-05, + "loss": 0.3536, + "step": 3292 + }, + { + "epoch": 18.81714285714286, + "grad_norm": 48.58481979370117, + "learning_rate": 3.464761904761905e-05, + "loss": 0.6354, + "step": 3293 + }, + { + "epoch": 18.822857142857142, + "grad_norm": 35.53000259399414, + "learning_rate": 3.464126984126984e-05, + "loss": 0.4207, + "step": 3294 + }, + { + "epoch": 18.82857142857143, + "grad_norm": 34.199031829833984, + "learning_rate": 3.4634920634920634e-05, + "loss": 0.5652, + "step": 3295 + }, + { + "epoch": 18.834285714285713, + "grad_norm": 98.38899230957031, + "learning_rate": 3.4628571428571434e-05, + "loss": 0.4791, + "step": 3296 + }, + { + "epoch": 18.84, + "grad_norm": 27.646554946899414, + "learning_rate": 3.462222222222222e-05, + "loss": 0.3193, + "step": 3297 + }, + { + "epoch": 18.845714285714287, + "grad_norm": 21.69123649597168, + "learning_rate": 3.461587301587302e-05, + "loss": 0.3577, + "step": 3298 + }, + { + "epoch": 18.85142857142857, + "grad_norm": 32.46466827392578, + "learning_rate": 3.460952380952381e-05, + "loss": 0.3855, + "step": 3299 + }, + { + "epoch": 18.857142857142858, + "grad_norm": 26.237293243408203, + "learning_rate": 3.4603174603174604e-05, + "loss": 0.2561, + "step": 3300 + }, + { + "epoch": 18.86285714285714, + "grad_norm": 21.17353057861328, + "learning_rate": 3.45968253968254e-05, + "loss": 0.2684, + "step": 3301 + }, + { + "epoch": 18.86857142857143, + "grad_norm": 30.09574317932129, + "learning_rate": 3.459047619047619e-05, + "loss": 0.2668, + "step": 3302 + }, + { + "epoch": 18.874285714285715, + "grad_norm": 32.279048919677734, + "learning_rate": 3.458412698412698e-05, + "loss": 0.4253, + "step": 3303 + }, + { + "epoch": 18.88, + "grad_norm": 30.678268432617188, + "learning_rate": 3.457777777777778e-05, + "loss": 0.2842, + "step": 3304 + }, + { + "epoch": 18.885714285714286, + "grad_norm": 48.52307891845703, + "learning_rate": 3.4571428571428574e-05, + "loss": 0.4292, + "step": 3305 + }, + { + "epoch": 18.89142857142857, + "grad_norm": 25.623342514038086, + "learning_rate": 3.456507936507937e-05, + "loss": 0.533, + "step": 3306 + }, + { + "epoch": 18.897142857142857, + "grad_norm": 54.10114288330078, + "learning_rate": 3.455873015873016e-05, + "loss": 0.4586, + "step": 3307 + }, + { + "epoch": 18.902857142857144, + "grad_norm": 869.677001953125, + "learning_rate": 3.455238095238095e-05, + "loss": 0.2632, + "step": 3308 + }, + { + "epoch": 18.908571428571427, + "grad_norm": 61.15915298461914, + "learning_rate": 3.454603174603175e-05, + "loss": 0.3718, + "step": 3309 + }, + { + "epoch": 18.914285714285715, + "grad_norm": 67.51522064208984, + "learning_rate": 3.4539682539682544e-05, + "loss": 0.3563, + "step": 3310 + }, + { + "epoch": 18.92, + "grad_norm": 91.92138671875, + "learning_rate": 3.453333333333334e-05, + "loss": 0.2882, + "step": 3311 + }, + { + "epoch": 18.925714285714285, + "grad_norm": 154.09152221679688, + "learning_rate": 3.452698412698413e-05, + "loss": 0.392, + "step": 3312 + }, + { + "epoch": 18.931428571428572, + "grad_norm": 58.27983856201172, + "learning_rate": 3.452063492063492e-05, + "loss": 0.3814, + "step": 3313 + }, + { + "epoch": 18.937142857142856, + "grad_norm": 27.329254150390625, + "learning_rate": 3.4514285714285715e-05, + "loss": 0.4226, + "step": 3314 + }, + { + "epoch": 18.942857142857143, + "grad_norm": 36.9119873046875, + "learning_rate": 3.4507936507936514e-05, + "loss": 0.3041, + "step": 3315 + }, + { + "epoch": 18.94857142857143, + "grad_norm": 98.28682708740234, + "learning_rate": 3.45015873015873e-05, + "loss": 0.4285, + "step": 3316 + }, + { + "epoch": 18.954285714285714, + "grad_norm": 51.9772834777832, + "learning_rate": 3.44952380952381e-05, + "loss": 0.3947, + "step": 3317 + }, + { + "epoch": 18.96, + "grad_norm": 50.20566940307617, + "learning_rate": 3.448888888888889e-05, + "loss": 0.4644, + "step": 3318 + }, + { + "epoch": 18.965714285714284, + "grad_norm": 58.896080017089844, + "learning_rate": 3.4482539682539685e-05, + "loss": 0.3752, + "step": 3319 + }, + { + "epoch": 18.97142857142857, + "grad_norm": 102.45153045654297, + "learning_rate": 3.447619047619048e-05, + "loss": 0.5225, + "step": 3320 + }, + { + "epoch": 18.97714285714286, + "grad_norm": 19.400266647338867, + "learning_rate": 3.446984126984128e-05, + "loss": 0.3767, + "step": 3321 + }, + { + "epoch": 18.982857142857142, + "grad_norm": 28.068359375, + "learning_rate": 3.446349206349206e-05, + "loss": 0.3446, + "step": 3322 + }, + { + "epoch": 18.98857142857143, + "grad_norm": 33.36658477783203, + "learning_rate": 3.445714285714286e-05, + "loss": 0.3872, + "step": 3323 + }, + { + "epoch": 18.994285714285713, + "grad_norm": 273.208740234375, + "learning_rate": 3.445079365079365e-05, + "loss": 0.3829, + "step": 3324 + }, + { + "epoch": 19.0, + "grad_norm": 49.097652435302734, + "learning_rate": 3.444444444444445e-05, + "loss": 0.3178, + "step": 3325 + }, + { + "epoch": 19.0, + "eval_classes": 0, + "eval_loss": 0.6071962714195251, + "eval_map": 0.918, + "eval_map_50": 0.9633, + "eval_map_75": 0.9507, + "eval_map_large": 0.9182, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.918, + "eval_map_small": -1.0, + "eval_mar_1": 0.7803, + "eval_mar_10": 0.9686, + "eval_mar_100": 0.9743, + "eval_mar_100_per_class": 0.9743, + "eval_mar_large": 0.9743, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.8546, + "eval_samples_per_second": 21.22, + "eval_steps_per_second": 2.671, + "step": 3325 + }, + { + "epoch": 19.005714285714287, + "grad_norm": 108.3809585571289, + "learning_rate": 3.443809523809524e-05, + "loss": 0.4658, + "step": 3326 + }, + { + "epoch": 19.01142857142857, + "grad_norm": 81.60541534423828, + "learning_rate": 3.443174603174603e-05, + "loss": 0.3664, + "step": 3327 + }, + { + "epoch": 19.017142857142858, + "grad_norm": 70.19352722167969, + "learning_rate": 3.4425396825396825e-05, + "loss": 0.4201, + "step": 3328 + }, + { + "epoch": 19.02285714285714, + "grad_norm": 83.45301818847656, + "learning_rate": 3.4419047619047625e-05, + "loss": 0.4855, + "step": 3329 + }, + { + "epoch": 19.02857142857143, + "grad_norm": 75.37036895751953, + "learning_rate": 3.441269841269841e-05, + "loss": 0.4015, + "step": 3330 + }, + { + "epoch": 19.034285714285716, + "grad_norm": 61.17378616333008, + "learning_rate": 3.440634920634921e-05, + "loss": 0.402, + "step": 3331 + }, + { + "epoch": 19.04, + "grad_norm": 107.7389907836914, + "learning_rate": 3.4399999999999996e-05, + "loss": 0.5198, + "step": 3332 + }, + { + "epoch": 19.045714285714286, + "grad_norm": 27.87543296813965, + "learning_rate": 3.4393650793650795e-05, + "loss": 0.2983, + "step": 3333 + }, + { + "epoch": 19.05142857142857, + "grad_norm": 63.95698928833008, + "learning_rate": 3.438730158730159e-05, + "loss": 0.4856, + "step": 3334 + }, + { + "epoch": 19.057142857142857, + "grad_norm": 59.24372100830078, + "learning_rate": 3.438095238095238e-05, + "loss": 0.4489, + "step": 3335 + }, + { + "epoch": 19.062857142857144, + "grad_norm": 49.524620056152344, + "learning_rate": 3.437460317460317e-05, + "loss": 0.346, + "step": 3336 + }, + { + "epoch": 19.068571428571428, + "grad_norm": 22.447019577026367, + "learning_rate": 3.436825396825397e-05, + "loss": 0.3913, + "step": 3337 + }, + { + "epoch": 19.074285714285715, + "grad_norm": 53.7479362487793, + "learning_rate": 3.436190476190476e-05, + "loss": 0.3361, + "step": 3338 + }, + { + "epoch": 19.08, + "grad_norm": 43.6589241027832, + "learning_rate": 3.435555555555556e-05, + "loss": 0.4637, + "step": 3339 + }, + { + "epoch": 19.085714285714285, + "grad_norm": 28.344255447387695, + "learning_rate": 3.434920634920635e-05, + "loss": 0.3446, + "step": 3340 + }, + { + "epoch": 19.091428571428573, + "grad_norm": 26.627819061279297, + "learning_rate": 3.434285714285714e-05, + "loss": 0.3049, + "step": 3341 + }, + { + "epoch": 19.097142857142856, + "grad_norm": 41.315155029296875, + "learning_rate": 3.4336507936507936e-05, + "loss": 0.3491, + "step": 3342 + }, + { + "epoch": 19.102857142857143, + "grad_norm": 25.992656707763672, + "learning_rate": 3.433015873015873e-05, + "loss": 0.4037, + "step": 3343 + }, + { + "epoch": 19.10857142857143, + "grad_norm": 24.71420669555664, + "learning_rate": 3.432380952380953e-05, + "loss": 0.3653, + "step": 3344 + }, + { + "epoch": 19.114285714285714, + "grad_norm": 57.442039489746094, + "learning_rate": 3.431746031746032e-05, + "loss": 0.4772, + "step": 3345 + }, + { + "epoch": 19.12, + "grad_norm": 36.09226608276367, + "learning_rate": 3.431111111111111e-05, + "loss": 0.3446, + "step": 3346 + }, + { + "epoch": 19.125714285714285, + "grad_norm": 81.4795150756836, + "learning_rate": 3.4304761904761906e-05, + "loss": 0.3171, + "step": 3347 + }, + { + "epoch": 19.13142857142857, + "grad_norm": 18.19621467590332, + "learning_rate": 3.4298412698412705e-05, + "loss": 0.3355, + "step": 3348 + }, + { + "epoch": 19.13714285714286, + "grad_norm": 49.13719940185547, + "learning_rate": 3.429206349206349e-05, + "loss": 0.3004, + "step": 3349 + }, + { + "epoch": 19.142857142857142, + "grad_norm": 39.006492614746094, + "learning_rate": 3.428571428571429e-05, + "loss": 0.3202, + "step": 3350 + }, + { + "epoch": 19.14857142857143, + "grad_norm": 49.013946533203125, + "learning_rate": 3.427936507936508e-05, + "loss": 0.297, + "step": 3351 + }, + { + "epoch": 19.154285714285713, + "grad_norm": 63.2196044921875, + "learning_rate": 3.4273015873015876e-05, + "loss": 0.4038, + "step": 3352 + }, + { + "epoch": 19.16, + "grad_norm": 46.97873306274414, + "learning_rate": 3.426666666666667e-05, + "loss": 0.331, + "step": 3353 + }, + { + "epoch": 19.165714285714287, + "grad_norm": 37.16547393798828, + "learning_rate": 3.426031746031746e-05, + "loss": 0.3239, + "step": 3354 + }, + { + "epoch": 19.17142857142857, + "grad_norm": 400.9812316894531, + "learning_rate": 3.4253968253968254e-05, + "loss": 0.5407, + "step": 3355 + }, + { + "epoch": 19.177142857142858, + "grad_norm": 82.57059478759766, + "learning_rate": 3.424761904761905e-05, + "loss": 0.3921, + "step": 3356 + }, + { + "epoch": 19.18285714285714, + "grad_norm": 25.264549255371094, + "learning_rate": 3.424126984126984e-05, + "loss": 0.3145, + "step": 3357 + }, + { + "epoch": 19.18857142857143, + "grad_norm": 221.32171630859375, + "learning_rate": 3.423492063492064e-05, + "loss": 0.3765, + "step": 3358 + }, + { + "epoch": 19.194285714285716, + "grad_norm": 20.174264907836914, + "learning_rate": 3.422857142857143e-05, + "loss": 0.3129, + "step": 3359 + }, + { + "epoch": 19.2, + "grad_norm": 30.072860717773438, + "learning_rate": 3.4222222222222224e-05, + "loss": 0.2448, + "step": 3360 + }, + { + "epoch": 19.205714285714286, + "grad_norm": 48.83212661743164, + "learning_rate": 3.4215873015873016e-05, + "loss": 0.3296, + "step": 3361 + }, + { + "epoch": 19.21142857142857, + "grad_norm": 55.796146392822266, + "learning_rate": 3.4209523809523816e-05, + "loss": 0.3444, + "step": 3362 + }, + { + "epoch": 19.217142857142857, + "grad_norm": 22.57940101623535, + "learning_rate": 3.42031746031746e-05, + "loss": 0.322, + "step": 3363 + }, + { + "epoch": 19.222857142857144, + "grad_norm": 147.75096130371094, + "learning_rate": 3.41968253968254e-05, + "loss": 0.4688, + "step": 3364 + }, + { + "epoch": 19.228571428571428, + "grad_norm": 31.902299880981445, + "learning_rate": 3.419047619047619e-05, + "loss": 0.3138, + "step": 3365 + }, + { + "epoch": 19.234285714285715, + "grad_norm": 91.63981628417969, + "learning_rate": 3.4184126984126986e-05, + "loss": 0.398, + "step": 3366 + }, + { + "epoch": 19.24, + "grad_norm": 60.106632232666016, + "learning_rate": 3.417777777777778e-05, + "loss": 0.3671, + "step": 3367 + }, + { + "epoch": 19.245714285714286, + "grad_norm": 145.38572692871094, + "learning_rate": 3.417142857142857e-05, + "loss": 0.4003, + "step": 3368 + }, + { + "epoch": 19.251428571428573, + "grad_norm": 39.69725036621094, + "learning_rate": 3.4165079365079364e-05, + "loss": 0.4837, + "step": 3369 + }, + { + "epoch": 19.257142857142856, + "grad_norm": 29.655527114868164, + "learning_rate": 3.4158730158730164e-05, + "loss": 0.5045, + "step": 3370 + }, + { + "epoch": 19.262857142857143, + "grad_norm": 28.100563049316406, + "learning_rate": 3.415238095238095e-05, + "loss": 0.37, + "step": 3371 + }, + { + "epoch": 19.268571428571427, + "grad_norm": 94.77354431152344, + "learning_rate": 3.414603174603175e-05, + "loss": 0.3595, + "step": 3372 + }, + { + "epoch": 19.274285714285714, + "grad_norm": 39.54533386230469, + "learning_rate": 3.413968253968254e-05, + "loss": 0.3759, + "step": 3373 + }, + { + "epoch": 19.28, + "grad_norm": 36.4666862487793, + "learning_rate": 3.4133333333333334e-05, + "loss": 0.2876, + "step": 3374 + }, + { + "epoch": 19.285714285714285, + "grad_norm": 59.548675537109375, + "learning_rate": 3.412698412698413e-05, + "loss": 0.3374, + "step": 3375 + }, + { + "epoch": 19.291428571428572, + "grad_norm": 69.64942932128906, + "learning_rate": 3.412063492063492e-05, + "loss": 0.3064, + "step": 3376 + }, + { + "epoch": 19.29714285714286, + "grad_norm": 58.498817443847656, + "learning_rate": 3.411428571428571e-05, + "loss": 0.4424, + "step": 3377 + }, + { + "epoch": 19.302857142857142, + "grad_norm": 27.89930534362793, + "learning_rate": 3.410793650793651e-05, + "loss": 0.451, + "step": 3378 + }, + { + "epoch": 19.30857142857143, + "grad_norm": 63.122528076171875, + "learning_rate": 3.4101587301587304e-05, + "loss": 0.4518, + "step": 3379 + }, + { + "epoch": 19.314285714285713, + "grad_norm": 29.976390838623047, + "learning_rate": 3.40952380952381e-05, + "loss": 0.3307, + "step": 3380 + }, + { + "epoch": 19.32, + "grad_norm": 30.81755828857422, + "learning_rate": 3.408888888888889e-05, + "loss": 0.3633, + "step": 3381 + }, + { + "epoch": 19.325714285714287, + "grad_norm": 50.29838562011719, + "learning_rate": 3.408253968253968e-05, + "loss": 0.4438, + "step": 3382 + }, + { + "epoch": 19.33142857142857, + "grad_norm": 82.86028289794922, + "learning_rate": 3.407619047619048e-05, + "loss": 0.2431, + "step": 3383 + }, + { + "epoch": 19.337142857142858, + "grad_norm": 70.57789611816406, + "learning_rate": 3.4069841269841274e-05, + "loss": 0.6577, + "step": 3384 + }, + { + "epoch": 19.34285714285714, + "grad_norm": 49.527103424072266, + "learning_rate": 3.4063492063492067e-05, + "loss": 0.3687, + "step": 3385 + }, + { + "epoch": 19.34857142857143, + "grad_norm": 35.476287841796875, + "learning_rate": 3.405714285714286e-05, + "loss": 0.3299, + "step": 3386 + }, + { + "epoch": 19.354285714285716, + "grad_norm": 43.367225646972656, + "learning_rate": 3.405079365079365e-05, + "loss": 0.2333, + "step": 3387 + }, + { + "epoch": 19.36, + "grad_norm": 30.95724868774414, + "learning_rate": 3.4044444444444445e-05, + "loss": 0.2513, + "step": 3388 + }, + { + "epoch": 19.365714285714287, + "grad_norm": 127.33224487304688, + "learning_rate": 3.4038095238095244e-05, + "loss": 0.2649, + "step": 3389 + }, + { + "epoch": 19.37142857142857, + "grad_norm": 41.26674270629883, + "learning_rate": 3.403174603174603e-05, + "loss": 0.4281, + "step": 3390 + }, + { + "epoch": 19.377142857142857, + "grad_norm": 386.9308166503906, + "learning_rate": 3.402539682539683e-05, + "loss": 0.2776, + "step": 3391 + }, + { + "epoch": 19.382857142857144, + "grad_norm": 24.09364891052246, + "learning_rate": 3.401904761904762e-05, + "loss": 0.3776, + "step": 3392 + }, + { + "epoch": 19.388571428571428, + "grad_norm": 61.508121490478516, + "learning_rate": 3.4012698412698414e-05, + "loss": 0.3687, + "step": 3393 + }, + { + "epoch": 19.394285714285715, + "grad_norm": 96.15426635742188, + "learning_rate": 3.400634920634921e-05, + "loss": 0.3404, + "step": 3394 + }, + { + "epoch": 19.4, + "grad_norm": 83.6374740600586, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.3019, + "step": 3395 + }, + { + "epoch": 19.405714285714286, + "grad_norm": 55.773799896240234, + "learning_rate": 3.399365079365079e-05, + "loss": 0.3834, + "step": 3396 + }, + { + "epoch": 19.411428571428573, + "grad_norm": 68.75739288330078, + "learning_rate": 3.398730158730159e-05, + "loss": 0.3088, + "step": 3397 + }, + { + "epoch": 19.417142857142856, + "grad_norm": 148.99221801757812, + "learning_rate": 3.398095238095238e-05, + "loss": 0.3929, + "step": 3398 + }, + { + "epoch": 19.422857142857143, + "grad_norm": 106.68118286132812, + "learning_rate": 3.397460317460318e-05, + "loss": 0.3633, + "step": 3399 + }, + { + "epoch": 19.428571428571427, + "grad_norm": 122.96265411376953, + "learning_rate": 3.396825396825397e-05, + "loss": 0.2715, + "step": 3400 + }, + { + "epoch": 19.434285714285714, + "grad_norm": 187.85357666015625, + "learning_rate": 3.396190476190476e-05, + "loss": 0.4533, + "step": 3401 + }, + { + "epoch": 19.44, + "grad_norm": 48.39687728881836, + "learning_rate": 3.3955555555555555e-05, + "loss": 0.4207, + "step": 3402 + }, + { + "epoch": 19.445714285714285, + "grad_norm": 25.29762840270996, + "learning_rate": 3.3949206349206354e-05, + "loss": 0.344, + "step": 3403 + }, + { + "epoch": 19.451428571428572, + "grad_norm": 45.91965866088867, + "learning_rate": 3.394285714285714e-05, + "loss": 0.4517, + "step": 3404 + }, + { + "epoch": 19.457142857142856, + "grad_norm": 549.9448852539062, + "learning_rate": 3.393650793650794e-05, + "loss": 0.5016, + "step": 3405 + }, + { + "epoch": 19.462857142857143, + "grad_norm": 304.6712646484375, + "learning_rate": 3.393015873015873e-05, + "loss": 0.3779, + "step": 3406 + }, + { + "epoch": 19.46857142857143, + "grad_norm": 56.09303665161133, + "learning_rate": 3.3923809523809525e-05, + "loss": 0.4077, + "step": 3407 + }, + { + "epoch": 19.474285714285713, + "grad_norm": 45.298744201660156, + "learning_rate": 3.391746031746032e-05, + "loss": 0.406, + "step": 3408 + }, + { + "epoch": 19.48, + "grad_norm": 60.030433654785156, + "learning_rate": 3.391111111111111e-05, + "loss": 0.321, + "step": 3409 + }, + { + "epoch": 19.485714285714284, + "grad_norm": 71.22777557373047, + "learning_rate": 3.39047619047619e-05, + "loss": 0.5016, + "step": 3410 + }, + { + "epoch": 19.49142857142857, + "grad_norm": 81.2491226196289, + "learning_rate": 3.38984126984127e-05, + "loss": 0.4921, + "step": 3411 + }, + { + "epoch": 19.497142857142858, + "grad_norm": 27.062021255493164, + "learning_rate": 3.3892063492063495e-05, + "loss": 0.3775, + "step": 3412 + }, + { + "epoch": 19.502857142857142, + "grad_norm": 17.73770523071289, + "learning_rate": 3.388571428571429e-05, + "loss": 0.4258, + "step": 3413 + }, + { + "epoch": 19.50857142857143, + "grad_norm": 26.88576316833496, + "learning_rate": 3.387936507936508e-05, + "loss": 0.452, + "step": 3414 + }, + { + "epoch": 19.514285714285712, + "grad_norm": 104.78714752197266, + "learning_rate": 3.387301587301587e-05, + "loss": 0.4192, + "step": 3415 + }, + { + "epoch": 19.52, + "grad_norm": 40.716007232666016, + "learning_rate": 3.3866666666666665e-05, + "loss": 0.3494, + "step": 3416 + }, + { + "epoch": 19.525714285714287, + "grad_norm": 38.237709045410156, + "learning_rate": 3.3860317460317465e-05, + "loss": 0.3199, + "step": 3417 + }, + { + "epoch": 19.53142857142857, + "grad_norm": 29.721233367919922, + "learning_rate": 3.385396825396826e-05, + "loss": 0.3223, + "step": 3418 + }, + { + "epoch": 19.537142857142857, + "grad_norm": 73.37924194335938, + "learning_rate": 3.384761904761905e-05, + "loss": 0.3554, + "step": 3419 + }, + { + "epoch": 19.542857142857144, + "grad_norm": 68.08438110351562, + "learning_rate": 3.384126984126984e-05, + "loss": 0.3283, + "step": 3420 + }, + { + "epoch": 19.548571428571428, + "grad_norm": 197.18992614746094, + "learning_rate": 3.3834920634920635e-05, + "loss": 0.3387, + "step": 3421 + }, + { + "epoch": 19.554285714285715, + "grad_norm": 85.63236999511719, + "learning_rate": 3.3828571428571435e-05, + "loss": 0.3336, + "step": 3422 + }, + { + "epoch": 19.56, + "grad_norm": 62.67841720581055, + "learning_rate": 3.382222222222222e-05, + "loss": 0.4093, + "step": 3423 + }, + { + "epoch": 19.565714285714286, + "grad_norm": 73.47346496582031, + "learning_rate": 3.381587301587302e-05, + "loss": 0.3136, + "step": 3424 + }, + { + "epoch": 19.571428571428573, + "grad_norm": 73.36341857910156, + "learning_rate": 3.380952380952381e-05, + "loss": 0.393, + "step": 3425 + }, + { + "epoch": 19.577142857142857, + "grad_norm": 29.94477653503418, + "learning_rate": 3.3803174603174605e-05, + "loss": 0.3345, + "step": 3426 + }, + { + "epoch": 19.582857142857144, + "grad_norm": 35.631168365478516, + "learning_rate": 3.37968253968254e-05, + "loss": 0.3381, + "step": 3427 + }, + { + "epoch": 19.588571428571427, + "grad_norm": 38.06371307373047, + "learning_rate": 3.37904761904762e-05, + "loss": 0.3678, + "step": 3428 + }, + { + "epoch": 19.594285714285714, + "grad_norm": 62.71897506713867, + "learning_rate": 3.378412698412698e-05, + "loss": 0.5052, + "step": 3429 + }, + { + "epoch": 19.6, + "grad_norm": 340.963623046875, + "learning_rate": 3.377777777777778e-05, + "loss": 0.3526, + "step": 3430 + }, + { + "epoch": 19.605714285714285, + "grad_norm": 93.26659393310547, + "learning_rate": 3.377142857142857e-05, + "loss": 0.5047, + "step": 3431 + }, + { + "epoch": 19.611428571428572, + "grad_norm": 30.196279525756836, + "learning_rate": 3.376507936507937e-05, + "loss": 0.4641, + "step": 3432 + }, + { + "epoch": 19.617142857142856, + "grad_norm": 97.52349853515625, + "learning_rate": 3.375873015873016e-05, + "loss": 0.4104, + "step": 3433 + }, + { + "epoch": 19.622857142857143, + "grad_norm": 67.79190063476562, + "learning_rate": 3.375238095238095e-05, + "loss": 0.5344, + "step": 3434 + }, + { + "epoch": 19.62857142857143, + "grad_norm": 71.02275848388672, + "learning_rate": 3.3746031746031746e-05, + "loss": 0.3563, + "step": 3435 + }, + { + "epoch": 19.634285714285713, + "grad_norm": 60.292869567871094, + "learning_rate": 3.3739682539682545e-05, + "loss": 0.3805, + "step": 3436 + }, + { + "epoch": 19.64, + "grad_norm": 24.881738662719727, + "learning_rate": 3.373333333333333e-05, + "loss": 0.5486, + "step": 3437 + }, + { + "epoch": 19.645714285714284, + "grad_norm": 72.77564239501953, + "learning_rate": 3.372698412698413e-05, + "loss": 0.3257, + "step": 3438 + }, + { + "epoch": 19.65142857142857, + "grad_norm": 31.99180030822754, + "learning_rate": 3.372063492063492e-05, + "loss": 0.2574, + "step": 3439 + }, + { + "epoch": 19.65714285714286, + "grad_norm": 23.13508415222168, + "learning_rate": 3.3714285714285716e-05, + "loss": 0.2581, + "step": 3440 + }, + { + "epoch": 19.662857142857142, + "grad_norm": 41.29374313354492, + "learning_rate": 3.370793650793651e-05, + "loss": 0.3201, + "step": 3441 + }, + { + "epoch": 19.66857142857143, + "grad_norm": 204.74082946777344, + "learning_rate": 3.37015873015873e-05, + "loss": 0.2919, + "step": 3442 + }, + { + "epoch": 19.674285714285713, + "grad_norm": 77.87431335449219, + "learning_rate": 3.3695238095238094e-05, + "loss": 0.3872, + "step": 3443 + }, + { + "epoch": 19.68, + "grad_norm": 34.35457229614258, + "learning_rate": 3.368888888888889e-05, + "loss": 0.2946, + "step": 3444 + }, + { + "epoch": 19.685714285714287, + "grad_norm": 53.063560485839844, + "learning_rate": 3.368253968253968e-05, + "loss": 0.3896, + "step": 3445 + }, + { + "epoch": 19.69142857142857, + "grad_norm": 17.917118072509766, + "learning_rate": 3.367619047619048e-05, + "loss": 0.339, + "step": 3446 + }, + { + "epoch": 19.697142857142858, + "grad_norm": 62.1878776550293, + "learning_rate": 3.366984126984127e-05, + "loss": 0.2799, + "step": 3447 + }, + { + "epoch": 19.70285714285714, + "grad_norm": 33.710147857666016, + "learning_rate": 3.3663492063492064e-05, + "loss": 0.2277, + "step": 3448 + }, + { + "epoch": 19.708571428571428, + "grad_norm": 76.45654296875, + "learning_rate": 3.3657142857142856e-05, + "loss": 0.3065, + "step": 3449 + }, + { + "epoch": 19.714285714285715, + "grad_norm": 22.282197952270508, + "learning_rate": 3.3650793650793656e-05, + "loss": 0.3891, + "step": 3450 + }, + { + "epoch": 19.72, + "grad_norm": 844.6537475585938, + "learning_rate": 3.364444444444445e-05, + "loss": 0.4347, + "step": 3451 + }, + { + "epoch": 19.725714285714286, + "grad_norm": 37.986270904541016, + "learning_rate": 3.363809523809524e-05, + "loss": 0.3042, + "step": 3452 + }, + { + "epoch": 19.731428571428573, + "grad_norm": 47.54478073120117, + "learning_rate": 3.3631746031746034e-05, + "loss": 0.2708, + "step": 3453 + }, + { + "epoch": 19.737142857142857, + "grad_norm": 21.841947555541992, + "learning_rate": 3.3625396825396826e-05, + "loss": 0.2471, + "step": 3454 + }, + { + "epoch": 19.742857142857144, + "grad_norm": 120.77538299560547, + "learning_rate": 3.361904761904762e-05, + "loss": 0.3965, + "step": 3455 + }, + { + "epoch": 19.748571428571427, + "grad_norm": 102.79607391357422, + "learning_rate": 3.361269841269841e-05, + "loss": 0.3502, + "step": 3456 + }, + { + "epoch": 19.754285714285714, + "grad_norm": 91.01953125, + "learning_rate": 3.360634920634921e-05, + "loss": 0.2933, + "step": 3457 + }, + { + "epoch": 19.76, + "grad_norm": 192.01560974121094, + "learning_rate": 3.3600000000000004e-05, + "loss": 0.4807, + "step": 3458 + }, + { + "epoch": 19.765714285714285, + "grad_norm": 32.285369873046875, + "learning_rate": 3.3593650793650796e-05, + "loss": 0.3571, + "step": 3459 + }, + { + "epoch": 19.771428571428572, + "grad_norm": 89.04906463623047, + "learning_rate": 3.358730158730159e-05, + "loss": 0.3545, + "step": 3460 + }, + { + "epoch": 19.777142857142856, + "grad_norm": 22.91415786743164, + "learning_rate": 3.358095238095239e-05, + "loss": 0.3823, + "step": 3461 + }, + { + "epoch": 19.782857142857143, + "grad_norm": 77.87922668457031, + "learning_rate": 3.3574603174603174e-05, + "loss": 0.3593, + "step": 3462 + }, + { + "epoch": 19.78857142857143, + "grad_norm": 21.24201774597168, + "learning_rate": 3.3568253968253974e-05, + "loss": 0.4239, + "step": 3463 + }, + { + "epoch": 19.794285714285714, + "grad_norm": 48.2424430847168, + "learning_rate": 3.356190476190476e-05, + "loss": 0.3687, + "step": 3464 + }, + { + "epoch": 19.8, + "grad_norm": 47.62773895263672, + "learning_rate": 3.355555555555556e-05, + "loss": 0.348, + "step": 3465 + }, + { + "epoch": 19.805714285714284, + "grad_norm": 71.29972076416016, + "learning_rate": 3.354920634920635e-05, + "loss": 0.4168, + "step": 3466 + }, + { + "epoch": 19.81142857142857, + "grad_norm": 56.72834396362305, + "learning_rate": 3.3542857142857144e-05, + "loss": 0.2963, + "step": 3467 + }, + { + "epoch": 19.81714285714286, + "grad_norm": 41.31034469604492, + "learning_rate": 3.353650793650794e-05, + "loss": 0.3007, + "step": 3468 + }, + { + "epoch": 19.822857142857142, + "grad_norm": 90.26205444335938, + "learning_rate": 3.3530158730158736e-05, + "loss": 0.2514, + "step": 3469 + }, + { + "epoch": 19.82857142857143, + "grad_norm": 47.04402542114258, + "learning_rate": 3.352380952380952e-05, + "loss": 0.5244, + "step": 3470 + }, + { + "epoch": 19.834285714285713, + "grad_norm": 37.07173156738281, + "learning_rate": 3.351746031746032e-05, + "loss": 0.3592, + "step": 3471 + }, + { + "epoch": 19.84, + "grad_norm": 28.39764404296875, + "learning_rate": 3.3511111111111114e-05, + "loss": 0.361, + "step": 3472 + }, + { + "epoch": 19.845714285714287, + "grad_norm": 73.49800109863281, + "learning_rate": 3.350476190476191e-05, + "loss": 0.4602, + "step": 3473 + }, + { + "epoch": 19.85142857142857, + "grad_norm": 40.269344329833984, + "learning_rate": 3.34984126984127e-05, + "loss": 0.3875, + "step": 3474 + }, + { + "epoch": 19.857142857142858, + "grad_norm": 42.47099685668945, + "learning_rate": 3.349206349206349e-05, + "loss": 0.4689, + "step": 3475 + }, + { + "epoch": 19.86285714285714, + "grad_norm": 35.60416030883789, + "learning_rate": 3.3485714285714285e-05, + "loss": 0.3837, + "step": 3476 + }, + { + "epoch": 19.86857142857143, + "grad_norm": 49.231346130371094, + "learning_rate": 3.3479365079365084e-05, + "loss": 0.4882, + "step": 3477 + }, + { + "epoch": 19.874285714285715, + "grad_norm": 25.547861099243164, + "learning_rate": 3.347301587301587e-05, + "loss": 0.5016, + "step": 3478 + }, + { + "epoch": 19.88, + "grad_norm": 40.764549255371094, + "learning_rate": 3.346666666666667e-05, + "loss": 0.4769, + "step": 3479 + }, + { + "epoch": 19.885714285714286, + "grad_norm": 562.2260131835938, + "learning_rate": 3.346031746031746e-05, + "loss": 0.3894, + "step": 3480 + }, + { + "epoch": 19.89142857142857, + "grad_norm": 37.60805130004883, + "learning_rate": 3.3453968253968255e-05, + "loss": 0.3554, + "step": 3481 + }, + { + "epoch": 19.897142857142857, + "grad_norm": 42.82327651977539, + "learning_rate": 3.344761904761905e-05, + "loss": 0.3977, + "step": 3482 + }, + { + "epoch": 19.902857142857144, + "grad_norm": 35.4509162902832, + "learning_rate": 3.3441269841269847e-05, + "loss": 0.4073, + "step": 3483 + }, + { + "epoch": 19.908571428571427, + "grad_norm": 48.23896408081055, + "learning_rate": 3.343492063492063e-05, + "loss": 0.2832, + "step": 3484 + }, + { + "epoch": 19.914285714285715, + "grad_norm": 573.278564453125, + "learning_rate": 3.342857142857143e-05, + "loss": 0.5797, + "step": 3485 + }, + { + "epoch": 19.92, + "grad_norm": 224.73423767089844, + "learning_rate": 3.3422222222222224e-05, + "loss": 0.4679, + "step": 3486 + }, + { + "epoch": 19.925714285714285, + "grad_norm": 257.82159423828125, + "learning_rate": 3.341587301587302e-05, + "loss": 0.4137, + "step": 3487 + }, + { + "epoch": 19.931428571428572, + "grad_norm": 37.30079650878906, + "learning_rate": 3.340952380952381e-05, + "loss": 0.4874, + "step": 3488 + }, + { + "epoch": 19.937142857142856, + "grad_norm": 41.37504959106445, + "learning_rate": 3.34031746031746e-05, + "loss": 0.5663, + "step": 3489 + }, + { + "epoch": 19.942857142857143, + "grad_norm": 85.4715805053711, + "learning_rate": 3.33968253968254e-05, + "loss": 0.574, + "step": 3490 + }, + { + "epoch": 19.94857142857143, + "grad_norm": 270.5368347167969, + "learning_rate": 3.3390476190476194e-05, + "loss": 0.5881, + "step": 3491 + }, + { + "epoch": 19.954285714285714, + "grad_norm": 35.138389587402344, + "learning_rate": 3.338412698412699e-05, + "loss": 0.4335, + "step": 3492 + }, + { + "epoch": 19.96, + "grad_norm": 44.54415512084961, + "learning_rate": 3.337777777777778e-05, + "loss": 0.3731, + "step": 3493 + }, + { + "epoch": 19.965714285714284, + "grad_norm": 46.49395751953125, + "learning_rate": 3.337142857142857e-05, + "loss": 0.3291, + "step": 3494 + }, + { + "epoch": 19.97142857142857, + "grad_norm": 24.26507568359375, + "learning_rate": 3.3365079365079365e-05, + "loss": 0.3144, + "step": 3495 + }, + { + "epoch": 19.97714285714286, + "grad_norm": 38.6696891784668, + "learning_rate": 3.3358730158730164e-05, + "loss": 0.4773, + "step": 3496 + }, + { + "epoch": 19.982857142857142, + "grad_norm": 501.0118408203125, + "learning_rate": 3.335238095238095e-05, + "loss": 0.5959, + "step": 3497 + }, + { + "epoch": 19.98857142857143, + "grad_norm": 70.6850357055664, + "learning_rate": 3.334603174603175e-05, + "loss": 0.4196, + "step": 3498 + }, + { + "epoch": 19.994285714285713, + "grad_norm": 34.70767593383789, + "learning_rate": 3.333968253968254e-05, + "loss": 0.4273, + "step": 3499 + }, + { + "epoch": 20.0, + "grad_norm": 126.21673583984375, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.5125, + "step": 3500 + }, + { + "epoch": 20.0, + "eval_classes": 0, + "eval_loss": 0.8406780958175659, + "eval_map": 0.8424, + "eval_map_50": 0.9321, + "eval_map_75": 0.8951, + "eval_map_large": 0.8426, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.8424, + "eval_map_small": -1.0, + "eval_mar_1": 0.7362, + "eval_mar_10": 0.9371, + "eval_mar_100": 0.947, + "eval_mar_100_per_class": 0.947, + "eval_mar_large": 0.947, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 14.1918, + "eval_samples_per_second": 20.716, + "eval_steps_per_second": 2.607, + "step": 3500 + }, + { + "epoch": 20.005714285714287, + "grad_norm": 20.92926597595215, + "learning_rate": 3.332698412698413e-05, + "loss": 0.3793, + "step": 3501 + }, + { + "epoch": 20.01142857142857, + "grad_norm": 36.212100982666016, + "learning_rate": 3.332063492063493e-05, + "loss": 0.4771, + "step": 3502 + }, + { + "epoch": 20.017142857142858, + "grad_norm": 28.421642303466797, + "learning_rate": 3.331428571428571e-05, + "loss": 0.414, + "step": 3503 + }, + { + "epoch": 20.02285714285714, + "grad_norm": 76.52762603759766, + "learning_rate": 3.330793650793651e-05, + "loss": 0.3281, + "step": 3504 + }, + { + "epoch": 20.02857142857143, + "grad_norm": 103.1466064453125, + "learning_rate": 3.3301587301587305e-05, + "loss": 0.3919, + "step": 3505 + }, + { + "epoch": 20.034285714285716, + "grad_norm": 61.027793884277344, + "learning_rate": 3.32952380952381e-05, + "loss": 0.3695, + "step": 3506 + }, + { + "epoch": 20.04, + "grad_norm": 64.57794952392578, + "learning_rate": 3.328888888888889e-05, + "loss": 0.4059, + "step": 3507 + }, + { + "epoch": 20.045714285714286, + "grad_norm": 40.50300216674805, + "learning_rate": 3.328253968253968e-05, + "loss": 0.5027, + "step": 3508 + }, + { + "epoch": 20.05142857142857, + "grad_norm": 76.50260925292969, + "learning_rate": 3.3276190476190475e-05, + "loss": 0.5772, + "step": 3509 + }, + { + "epoch": 20.057142857142857, + "grad_norm": 55.68210983276367, + "learning_rate": 3.3269841269841275e-05, + "loss": 0.4448, + "step": 3510 + }, + { + "epoch": 20.062857142857144, + "grad_norm": 65.80165100097656, + "learning_rate": 3.326349206349206e-05, + "loss": 0.574, + "step": 3511 + }, + { + "epoch": 20.068571428571428, + "grad_norm": 195.4805908203125, + "learning_rate": 3.325714285714286e-05, + "loss": 0.3547, + "step": 3512 + }, + { + "epoch": 20.074285714285715, + "grad_norm": 350.74420166015625, + "learning_rate": 3.325079365079365e-05, + "loss": 0.4308, + "step": 3513 + }, + { + "epoch": 20.08, + "grad_norm": 92.72909545898438, + "learning_rate": 3.3244444444444445e-05, + "loss": 0.3202, + "step": 3514 + }, + { + "epoch": 20.085714285714285, + "grad_norm": 121.07453155517578, + "learning_rate": 3.323809523809524e-05, + "loss": 0.4489, + "step": 3515 + }, + { + "epoch": 20.091428571428573, + "grad_norm": 29.92794418334961, + "learning_rate": 3.323174603174604e-05, + "loss": 0.3939, + "step": 3516 + }, + { + "epoch": 20.097142857142856, + "grad_norm": 49.57035446166992, + "learning_rate": 3.322539682539682e-05, + "loss": 0.2621, + "step": 3517 + }, + { + "epoch": 20.102857142857143, + "grad_norm": 69.45816802978516, + "learning_rate": 3.321904761904762e-05, + "loss": 0.238, + "step": 3518 + }, + { + "epoch": 20.10857142857143, + "grad_norm": 59.2504997253418, + "learning_rate": 3.3212698412698415e-05, + "loss": 0.5043, + "step": 3519 + }, + { + "epoch": 20.114285714285714, + "grad_norm": 49.68259811401367, + "learning_rate": 3.320634920634921e-05, + "loss": 0.2716, + "step": 3520 + }, + { + "epoch": 20.12, + "grad_norm": 28.603973388671875, + "learning_rate": 3.32e-05, + "loss": 0.4292, + "step": 3521 + }, + { + "epoch": 20.125714285714285, + "grad_norm": 34.90120315551758, + "learning_rate": 3.319365079365079e-05, + "loss": 0.5205, + "step": 3522 + }, + { + "epoch": 20.13142857142857, + "grad_norm": 67.77092742919922, + "learning_rate": 3.3187301587301586e-05, + "loss": 0.4614, + "step": 3523 + }, + { + "epoch": 20.13714285714286, + "grad_norm": 86.2787094116211, + "learning_rate": 3.3180952380952385e-05, + "loss": 0.4002, + "step": 3524 + }, + { + "epoch": 20.142857142857142, + "grad_norm": 59.58238983154297, + "learning_rate": 3.317460317460318e-05, + "loss": 0.2968, + "step": 3525 + }, + { + "epoch": 20.14857142857143, + "grad_norm": 53.9903678894043, + "learning_rate": 3.316825396825397e-05, + "loss": 0.4827, + "step": 3526 + }, + { + "epoch": 20.154285714285713, + "grad_norm": 47.19764709472656, + "learning_rate": 3.316190476190476e-05, + "loss": 0.3388, + "step": 3527 + }, + { + "epoch": 20.16, + "grad_norm": 87.91258239746094, + "learning_rate": 3.3155555555555556e-05, + "loss": 0.2736, + "step": 3528 + }, + { + "epoch": 20.165714285714287, + "grad_norm": 58.533050537109375, + "learning_rate": 3.3149206349206355e-05, + "loss": 0.3742, + "step": 3529 + }, + { + "epoch": 20.17142857142857, + "grad_norm": 42.35979461669922, + "learning_rate": 3.314285714285714e-05, + "loss": 0.3093, + "step": 3530 + }, + { + "epoch": 20.177142857142858, + "grad_norm": 49.007110595703125, + "learning_rate": 3.313650793650794e-05, + "loss": 0.2862, + "step": 3531 + }, + { + "epoch": 20.18285714285714, + "grad_norm": 78.78868103027344, + "learning_rate": 3.313015873015873e-05, + "loss": 0.2489, + "step": 3532 + }, + { + "epoch": 20.18857142857143, + "grad_norm": 116.97369384765625, + "learning_rate": 3.3123809523809526e-05, + "loss": 0.3767, + "step": 3533 + }, + { + "epoch": 20.194285714285716, + "grad_norm": 37.7362174987793, + "learning_rate": 3.311746031746032e-05, + "loss": 0.2782, + "step": 3534 + }, + { + "epoch": 20.2, + "grad_norm": 183.8468017578125, + "learning_rate": 3.311111111111112e-05, + "loss": 0.2742, + "step": 3535 + }, + { + "epoch": 20.205714285714286, + "grad_norm": 27.15769386291504, + "learning_rate": 3.3104761904761904e-05, + "loss": 0.2069, + "step": 3536 + }, + { + "epoch": 20.21142857142857, + "grad_norm": 43.55573272705078, + "learning_rate": 3.30984126984127e-05, + "loss": 0.3321, + "step": 3537 + }, + { + "epoch": 20.217142857142857, + "grad_norm": 36.51277542114258, + "learning_rate": 3.309206349206349e-05, + "loss": 0.4491, + "step": 3538 + }, + { + "epoch": 20.222857142857144, + "grad_norm": 30.36905860900879, + "learning_rate": 3.308571428571429e-05, + "loss": 0.5704, + "step": 3539 + }, + { + "epoch": 20.228571428571428, + "grad_norm": 296.42828369140625, + "learning_rate": 3.307936507936508e-05, + "loss": 0.377, + "step": 3540 + }, + { + "epoch": 20.234285714285715, + "grad_norm": 53.208351135253906, + "learning_rate": 3.3073015873015874e-05, + "loss": 0.4801, + "step": 3541 + }, + { + "epoch": 20.24, + "grad_norm": 112.80752563476562, + "learning_rate": 3.3066666666666666e-05, + "loss": 0.4057, + "step": 3542 + }, + { + "epoch": 20.245714285714286, + "grad_norm": 108.6169662475586, + "learning_rate": 3.3060317460317466e-05, + "loss": 0.5041, + "step": 3543 + }, + { + "epoch": 20.251428571428573, + "grad_norm": 59.439430236816406, + "learning_rate": 3.305396825396825e-05, + "loss": 0.3092, + "step": 3544 + }, + { + "epoch": 20.257142857142856, + "grad_norm": 191.1466064453125, + "learning_rate": 3.304761904761905e-05, + "loss": 0.4724, + "step": 3545 + }, + { + "epoch": 20.262857142857143, + "grad_norm": 22.837505340576172, + "learning_rate": 3.3041269841269844e-05, + "loss": 0.449, + "step": 3546 + }, + { + "epoch": 20.268571428571427, + "grad_norm": 44.4491081237793, + "learning_rate": 3.3034920634920636e-05, + "loss": 0.3667, + "step": 3547 + }, + { + "epoch": 20.274285714285714, + "grad_norm": 69.18882751464844, + "learning_rate": 3.302857142857143e-05, + "loss": 0.3382, + "step": 3548 + }, + { + "epoch": 20.28, + "grad_norm": 59.61344909667969, + "learning_rate": 3.302222222222222e-05, + "loss": 0.3483, + "step": 3549 + }, + { + "epoch": 20.285714285714285, + "grad_norm": 33.22392272949219, + "learning_rate": 3.3015873015873014e-05, + "loss": 0.353, + "step": 3550 + }, + { + "epoch": 20.291428571428572, + "grad_norm": 39.61313247680664, + "learning_rate": 3.3009523809523814e-05, + "loss": 0.4028, + "step": 3551 + }, + { + "epoch": 20.29714285714286, + "grad_norm": 124.15302276611328, + "learning_rate": 3.30031746031746e-05, + "loss": 0.5019, + "step": 3552 + }, + { + "epoch": 20.302857142857142, + "grad_norm": 39.78657150268555, + "learning_rate": 3.29968253968254e-05, + "loss": 0.3265, + "step": 3553 + }, + { + "epoch": 20.30857142857143, + "grad_norm": 60.90039825439453, + "learning_rate": 3.299047619047619e-05, + "loss": 0.2673, + "step": 3554 + }, + { + "epoch": 20.314285714285713, + "grad_norm": 41.07734680175781, + "learning_rate": 3.2984126984126984e-05, + "loss": 0.3912, + "step": 3555 + }, + { + "epoch": 20.32, + "grad_norm": 70.89137268066406, + "learning_rate": 3.297777777777778e-05, + "loss": 0.4581, + "step": 3556 + }, + { + "epoch": 20.325714285714287, + "grad_norm": 90.77945709228516, + "learning_rate": 3.2971428571428576e-05, + "loss": 0.616, + "step": 3557 + }, + { + "epoch": 20.33142857142857, + "grad_norm": 66.2533950805664, + "learning_rate": 3.296507936507937e-05, + "loss": 0.3016, + "step": 3558 + }, + { + "epoch": 20.337142857142858, + "grad_norm": 31.12895965576172, + "learning_rate": 3.295873015873016e-05, + "loss": 0.253, + "step": 3559 + }, + { + "epoch": 20.34285714285714, + "grad_norm": 53.287845611572266, + "learning_rate": 3.2952380952380954e-05, + "loss": 0.3133, + "step": 3560 + }, + { + "epoch": 20.34857142857143, + "grad_norm": 27.55655860900879, + "learning_rate": 3.294603174603175e-05, + "loss": 0.3104, + "step": 3561 + }, + { + "epoch": 20.354285714285716, + "grad_norm": 21.481998443603516, + "learning_rate": 3.293968253968254e-05, + "loss": 0.3095, + "step": 3562 + }, + { + "epoch": 20.36, + "grad_norm": 43.74786376953125, + "learning_rate": 3.293333333333333e-05, + "loss": 0.3787, + "step": 3563 + }, + { + "epoch": 20.365714285714287, + "grad_norm": 21.40505027770996, + "learning_rate": 3.292698412698413e-05, + "loss": 0.3613, + "step": 3564 + }, + { + "epoch": 20.37142857142857, + "grad_norm": 86.09355163574219, + "learning_rate": 3.2920634920634924e-05, + "loss": 0.4475, + "step": 3565 + }, + { + "epoch": 20.377142857142857, + "grad_norm": 38.143455505371094, + "learning_rate": 3.291428571428572e-05, + "loss": 0.427, + "step": 3566 + }, + { + "epoch": 20.382857142857144, + "grad_norm": 30.078758239746094, + "learning_rate": 3.290793650793651e-05, + "loss": 0.4033, + "step": 3567 + }, + { + "epoch": 20.388571428571428, + "grad_norm": 61.135982513427734, + "learning_rate": 3.290158730158731e-05, + "loss": 0.5424, + "step": 3568 + }, + { + "epoch": 20.394285714285715, + "grad_norm": 80.14017486572266, + "learning_rate": 3.2895238095238095e-05, + "loss": 0.3496, + "step": 3569 + }, + { + "epoch": 20.4, + "grad_norm": 52.26523208618164, + "learning_rate": 3.2888888888888894e-05, + "loss": 0.43, + "step": 3570 + }, + { + "epoch": 20.405714285714286, + "grad_norm": 41.19422149658203, + "learning_rate": 3.288253968253968e-05, + "loss": 0.4903, + "step": 3571 + }, + { + "epoch": 20.411428571428573, + "grad_norm": 33.27156448364258, + "learning_rate": 3.287619047619048e-05, + "loss": 0.4012, + "step": 3572 + }, + { + "epoch": 20.417142857142856, + "grad_norm": 35.2028694152832, + "learning_rate": 3.286984126984127e-05, + "loss": 0.327, + "step": 3573 + }, + { + "epoch": 20.422857142857143, + "grad_norm": 60.655948638916016, + "learning_rate": 3.2863492063492065e-05, + "loss": 0.3294, + "step": 3574 + }, + { + "epoch": 20.428571428571427, + "grad_norm": 74.69729614257812, + "learning_rate": 3.285714285714286e-05, + "loss": 0.2993, + "step": 3575 + }, + { + "epoch": 20.434285714285714, + "grad_norm": 61.959712982177734, + "learning_rate": 3.2850793650793657e-05, + "loss": 0.2402, + "step": 3576 + }, + { + "epoch": 20.44, + "grad_norm": 49.8610954284668, + "learning_rate": 3.284444444444444e-05, + "loss": 0.4154, + "step": 3577 + }, + { + "epoch": 20.445714285714285, + "grad_norm": 75.74891662597656, + "learning_rate": 3.283809523809524e-05, + "loss": 0.4226, + "step": 3578 + }, + { + "epoch": 20.451428571428572, + "grad_norm": 27.832401275634766, + "learning_rate": 3.2831746031746034e-05, + "loss": 0.3972, + "step": 3579 + }, + { + "epoch": 20.457142857142856, + "grad_norm": 21.77943229675293, + "learning_rate": 3.282539682539683e-05, + "loss": 0.4014, + "step": 3580 + }, + { + "epoch": 20.462857142857143, + "grad_norm": 37.12984085083008, + "learning_rate": 3.281904761904762e-05, + "loss": 0.2845, + "step": 3581 + }, + { + "epoch": 20.46857142857143, + "grad_norm": 65.29959869384766, + "learning_rate": 3.281269841269841e-05, + "loss": 0.3069, + "step": 3582 + }, + { + "epoch": 20.474285714285713, + "grad_norm": 54.41910171508789, + "learning_rate": 3.2806349206349205e-05, + "loss": 0.3176, + "step": 3583 + }, + { + "epoch": 20.48, + "grad_norm": 59.21097946166992, + "learning_rate": 3.2800000000000004e-05, + "loss": 0.3664, + "step": 3584 + }, + { + "epoch": 20.485714285714284, + "grad_norm": 42.1156005859375, + "learning_rate": 3.279365079365079e-05, + "loss": 0.4677, + "step": 3585 + }, + { + "epoch": 20.49142857142857, + "grad_norm": 37.332820892333984, + "learning_rate": 3.278730158730159e-05, + "loss": 0.4599, + "step": 3586 + }, + { + "epoch": 20.497142857142858, + "grad_norm": 47.108253479003906, + "learning_rate": 3.278095238095238e-05, + "loss": 0.4497, + "step": 3587 + }, + { + "epoch": 20.502857142857142, + "grad_norm": 30.58877944946289, + "learning_rate": 3.2774603174603175e-05, + "loss": 0.3067, + "step": 3588 + }, + { + "epoch": 20.50857142857143, + "grad_norm": 38.203102111816406, + "learning_rate": 3.276825396825397e-05, + "loss": 0.3709, + "step": 3589 + }, + { + "epoch": 20.514285714285712, + "grad_norm": 30.350473403930664, + "learning_rate": 3.276190476190477e-05, + "loss": 0.336, + "step": 3590 + }, + { + "epoch": 20.52, + "grad_norm": 54.0617561340332, + "learning_rate": 3.275555555555555e-05, + "loss": 0.2258, + "step": 3591 + }, + { + "epoch": 20.525714285714287, + "grad_norm": 451.57080078125, + "learning_rate": 3.274920634920635e-05, + "loss": 0.3309, + "step": 3592 + }, + { + "epoch": 20.53142857142857, + "grad_norm": 54.708030700683594, + "learning_rate": 3.2742857142857145e-05, + "loss": 0.3262, + "step": 3593 + }, + { + "epoch": 20.537142857142857, + "grad_norm": 38.319000244140625, + "learning_rate": 3.273650793650794e-05, + "loss": 0.2958, + "step": 3594 + }, + { + "epoch": 20.542857142857144, + "grad_norm": 52.20222473144531, + "learning_rate": 3.273015873015873e-05, + "loss": 0.3768, + "step": 3595 + }, + { + "epoch": 20.548571428571428, + "grad_norm": 24.321361541748047, + "learning_rate": 3.272380952380952e-05, + "loss": 0.4274, + "step": 3596 + }, + { + "epoch": 20.554285714285715, + "grad_norm": 38.93540573120117, + "learning_rate": 3.271746031746032e-05, + "loss": 0.3738, + "step": 3597 + }, + { + "epoch": 20.56, + "grad_norm": 31.37497329711914, + "learning_rate": 3.2711111111111115e-05, + "loss": 0.308, + "step": 3598 + }, + { + "epoch": 20.565714285714286, + "grad_norm": 62.128143310546875, + "learning_rate": 3.270476190476191e-05, + "loss": 0.243, + "step": 3599 + }, + { + "epoch": 20.571428571428573, + "grad_norm": 88.3516845703125, + "learning_rate": 3.26984126984127e-05, + "loss": 0.2807, + "step": 3600 + }, + { + "epoch": 20.577142857142857, + "grad_norm": 34.790374755859375, + "learning_rate": 3.269206349206349e-05, + "loss": 0.2898, + "step": 3601 + }, + { + "epoch": 20.582857142857144, + "grad_norm": 114.55731964111328, + "learning_rate": 3.2685714285714285e-05, + "loss": 0.4922, + "step": 3602 + }, + { + "epoch": 20.588571428571427, + "grad_norm": 92.77104949951172, + "learning_rate": 3.2679365079365085e-05, + "loss": 0.425, + "step": 3603 + }, + { + "epoch": 20.594285714285714, + "grad_norm": 26.583147048950195, + "learning_rate": 3.267301587301587e-05, + "loss": 0.4188, + "step": 3604 + }, + { + "epoch": 20.6, + "grad_norm": 28.349626541137695, + "learning_rate": 3.266666666666667e-05, + "loss": 0.3628, + "step": 3605 + }, + { + "epoch": 20.605714285714285, + "grad_norm": 62.43906021118164, + "learning_rate": 3.266031746031746e-05, + "loss": 0.3274, + "step": 3606 + }, + { + "epoch": 20.611428571428572, + "grad_norm": 32.20339584350586, + "learning_rate": 3.2653968253968255e-05, + "loss": 0.4397, + "step": 3607 + }, + { + "epoch": 20.617142857142856, + "grad_norm": 64.8906021118164, + "learning_rate": 3.264761904761905e-05, + "loss": 0.5333, + "step": 3608 + }, + { + "epoch": 20.622857142857143, + "grad_norm": 41.74363708496094, + "learning_rate": 3.264126984126985e-05, + "loss": 0.2679, + "step": 3609 + }, + { + "epoch": 20.62857142857143, + "grad_norm": 36.93730163574219, + "learning_rate": 3.263492063492063e-05, + "loss": 0.286, + "step": 3610 + }, + { + "epoch": 20.634285714285713, + "grad_norm": 114.60037994384766, + "learning_rate": 3.262857142857143e-05, + "loss": 0.5103, + "step": 3611 + }, + { + "epoch": 20.64, + "grad_norm": 45.13642120361328, + "learning_rate": 3.2622222222222225e-05, + "loss": 0.2691, + "step": 3612 + }, + { + "epoch": 20.645714285714284, + "grad_norm": 50.80180358886719, + "learning_rate": 3.261587301587302e-05, + "loss": 0.4702, + "step": 3613 + }, + { + "epoch": 20.65142857142857, + "grad_norm": 17.604503631591797, + "learning_rate": 3.260952380952381e-05, + "loss": 0.274, + "step": 3614 + }, + { + "epoch": 20.65714285714286, + "grad_norm": 56.79952621459961, + "learning_rate": 3.26031746031746e-05, + "loss": 0.3094, + "step": 3615 + }, + { + "epoch": 20.662857142857142, + "grad_norm": 40.64170455932617, + "learning_rate": 3.2596825396825396e-05, + "loss": 0.2468, + "step": 3616 + }, + { + "epoch": 20.66857142857143, + "grad_norm": 45.026268005371094, + "learning_rate": 3.2590476190476195e-05, + "loss": 0.3393, + "step": 3617 + }, + { + "epoch": 20.674285714285713, + "grad_norm": 30.018081665039062, + "learning_rate": 3.258412698412698e-05, + "loss": 0.3994, + "step": 3618 + }, + { + "epoch": 20.68, + "grad_norm": 56.77120590209961, + "learning_rate": 3.257777777777778e-05, + "loss": 0.3715, + "step": 3619 + }, + { + "epoch": 20.685714285714287, + "grad_norm": 1320.7337646484375, + "learning_rate": 3.257142857142857e-05, + "loss": 0.4171, + "step": 3620 + }, + { + "epoch": 20.69142857142857, + "grad_norm": 38.26957321166992, + "learning_rate": 3.2565079365079366e-05, + "loss": 0.4449, + "step": 3621 + }, + { + "epoch": 20.697142857142858, + "grad_norm": 43.36183166503906, + "learning_rate": 3.255873015873016e-05, + "loss": 0.373, + "step": 3622 + }, + { + "epoch": 20.70285714285714, + "grad_norm": 37.03742980957031, + "learning_rate": 3.255238095238096e-05, + "loss": 0.3653, + "step": 3623 + }, + { + "epoch": 20.708571428571428, + "grad_norm": 36.908016204833984, + "learning_rate": 3.2546031746031744e-05, + "loss": 0.3292, + "step": 3624 + }, + { + "epoch": 20.714285714285715, + "grad_norm": 45.65389633178711, + "learning_rate": 3.253968253968254e-05, + "loss": 0.2525, + "step": 3625 + }, + { + "epoch": 20.72, + "grad_norm": 80.70460510253906, + "learning_rate": 3.253333333333333e-05, + "loss": 0.3117, + "step": 3626 + }, + { + "epoch": 20.725714285714286, + "grad_norm": 31.633970260620117, + "learning_rate": 3.252698412698413e-05, + "loss": 0.333, + "step": 3627 + }, + { + "epoch": 20.731428571428573, + "grad_norm": 32.370887756347656, + "learning_rate": 3.252063492063492e-05, + "loss": 0.5887, + "step": 3628 + }, + { + "epoch": 20.737142857142857, + "grad_norm": 73.466796875, + "learning_rate": 3.2514285714285714e-05, + "loss": 0.3408, + "step": 3629 + }, + { + "epoch": 20.742857142857144, + "grad_norm": 43.25515365600586, + "learning_rate": 3.2507936507936506e-05, + "loss": 0.3952, + "step": 3630 + }, + { + "epoch": 20.748571428571427, + "grad_norm": 45.522369384765625, + "learning_rate": 3.2501587301587306e-05, + "loss": 0.3448, + "step": 3631 + }, + { + "epoch": 20.754285714285714, + "grad_norm": 78.45230102539062, + "learning_rate": 3.24952380952381e-05, + "loss": 0.3797, + "step": 3632 + }, + { + "epoch": 20.76, + "grad_norm": 54.089656829833984, + "learning_rate": 3.248888888888889e-05, + "loss": 0.2894, + "step": 3633 + }, + { + "epoch": 20.765714285714285, + "grad_norm": 55.51853942871094, + "learning_rate": 3.2482539682539684e-05, + "loss": 0.3811, + "step": 3634 + }, + { + "epoch": 20.771428571428572, + "grad_norm": 75.3653335571289, + "learning_rate": 3.2476190476190476e-05, + "loss": 0.3138, + "step": 3635 + }, + { + "epoch": 20.777142857142856, + "grad_norm": 594.2489013671875, + "learning_rate": 3.2469841269841276e-05, + "loss": 0.3179, + "step": 3636 + }, + { + "epoch": 20.782857142857143, + "grad_norm": 162.05276489257812, + "learning_rate": 3.246349206349206e-05, + "loss": 0.3792, + "step": 3637 + }, + { + "epoch": 20.78857142857143, + "grad_norm": 1130.9315185546875, + "learning_rate": 3.245714285714286e-05, + "loss": 0.3471, + "step": 3638 + }, + { + "epoch": 20.794285714285714, + "grad_norm": 92.6990737915039, + "learning_rate": 3.2450793650793654e-05, + "loss": 0.3092, + "step": 3639 + }, + { + "epoch": 20.8, + "grad_norm": 66.91148376464844, + "learning_rate": 3.2444444444444446e-05, + "loss": 0.4639, + "step": 3640 + }, + { + "epoch": 20.805714285714284, + "grad_norm": 79.60892486572266, + "learning_rate": 3.243809523809524e-05, + "loss": 0.4086, + "step": 3641 + }, + { + "epoch": 20.81142857142857, + "grad_norm": 38.52240753173828, + "learning_rate": 3.243174603174604e-05, + "loss": 0.5145, + "step": 3642 + }, + { + "epoch": 20.81714285714286, + "grad_norm": 54.60650634765625, + "learning_rate": 3.2425396825396824e-05, + "loss": 0.4872, + "step": 3643 + }, + { + "epoch": 20.822857142857142, + "grad_norm": 99.12556457519531, + "learning_rate": 3.2419047619047624e-05, + "loss": 0.3176, + "step": 3644 + }, + { + "epoch": 20.82857142857143, + "grad_norm": 100.69367218017578, + "learning_rate": 3.2412698412698416e-05, + "loss": 0.4089, + "step": 3645 + }, + { + "epoch": 20.834285714285713, + "grad_norm": 40.28134536743164, + "learning_rate": 3.240634920634921e-05, + "loss": 0.6446, + "step": 3646 + }, + { + "epoch": 20.84, + "grad_norm": 69.2869873046875, + "learning_rate": 3.24e-05, + "loss": 0.3736, + "step": 3647 + }, + { + "epoch": 20.845714285714287, + "grad_norm": 41.41655731201172, + "learning_rate": 3.2393650793650794e-05, + "loss": 0.3731, + "step": 3648 + }, + { + "epoch": 20.85142857142857, + "grad_norm": 30.325632095336914, + "learning_rate": 3.238730158730159e-05, + "loss": 0.3695, + "step": 3649 + }, + { + "epoch": 20.857142857142858, + "grad_norm": 43.05796432495117, + "learning_rate": 3.2380952380952386e-05, + "loss": 0.6107, + "step": 3650 + }, + { + "epoch": 20.86285714285714, + "grad_norm": 31.62732696533203, + "learning_rate": 3.237460317460317e-05, + "loss": 0.2847, + "step": 3651 + }, + { + "epoch": 20.86857142857143, + "grad_norm": 34.25601577758789, + "learning_rate": 3.236825396825397e-05, + "loss": 0.3036, + "step": 3652 + }, + { + "epoch": 20.874285714285715, + "grad_norm": 40.31098175048828, + "learning_rate": 3.2361904761904764e-05, + "loss": 0.5153, + "step": 3653 + }, + { + "epoch": 20.88, + "grad_norm": 56.50481033325195, + "learning_rate": 3.235555555555556e-05, + "loss": 0.2486, + "step": 3654 + }, + { + "epoch": 20.885714285714286, + "grad_norm": 441.31976318359375, + "learning_rate": 3.234920634920635e-05, + "loss": 0.3427, + "step": 3655 + }, + { + "epoch": 20.89142857142857, + "grad_norm": 60.5505256652832, + "learning_rate": 3.234285714285715e-05, + "loss": 0.2867, + "step": 3656 + }, + { + "epoch": 20.897142857142857, + "grad_norm": 63.12777328491211, + "learning_rate": 3.2336507936507935e-05, + "loss": 0.3733, + "step": 3657 + }, + { + "epoch": 20.902857142857144, + "grad_norm": 63.3406867980957, + "learning_rate": 3.2330158730158734e-05, + "loss": 0.3769, + "step": 3658 + }, + { + "epoch": 20.908571428571427, + "grad_norm": 79.66120910644531, + "learning_rate": 3.232380952380952e-05, + "loss": 0.3189, + "step": 3659 + }, + { + "epoch": 20.914285714285715, + "grad_norm": 32.31673049926758, + "learning_rate": 3.231746031746032e-05, + "loss": 0.4188, + "step": 3660 + }, + { + "epoch": 20.92, + "grad_norm": 98.06800842285156, + "learning_rate": 3.231111111111111e-05, + "loss": 0.4109, + "step": 3661 + }, + { + "epoch": 20.925714285714285, + "grad_norm": 63.263858795166016, + "learning_rate": 3.2304761904761905e-05, + "loss": 0.3264, + "step": 3662 + }, + { + "epoch": 20.931428571428572, + "grad_norm": 282.83843994140625, + "learning_rate": 3.22984126984127e-05, + "loss": 0.3349, + "step": 3663 + }, + { + "epoch": 20.937142857142856, + "grad_norm": 80.6650161743164, + "learning_rate": 3.22920634920635e-05, + "loss": 0.291, + "step": 3664 + }, + { + "epoch": 20.942857142857143, + "grad_norm": 764.670166015625, + "learning_rate": 3.228571428571428e-05, + "loss": 0.2556, + "step": 3665 + }, + { + "epoch": 20.94857142857143, + "grad_norm": 53.90130615234375, + "learning_rate": 3.227936507936508e-05, + "loss": 0.3263, + "step": 3666 + }, + { + "epoch": 20.954285714285714, + "grad_norm": 84.06610107421875, + "learning_rate": 3.2273015873015875e-05, + "loss": 0.4107, + "step": 3667 + }, + { + "epoch": 20.96, + "grad_norm": 159.95120239257812, + "learning_rate": 3.226666666666667e-05, + "loss": 0.369, + "step": 3668 + }, + { + "epoch": 20.965714285714284, + "grad_norm": 68.79747009277344, + "learning_rate": 3.226031746031746e-05, + "loss": 0.2484, + "step": 3669 + }, + { + "epoch": 20.97142857142857, + "grad_norm": 84.50321960449219, + "learning_rate": 3.225396825396825e-05, + "loss": 0.3455, + "step": 3670 + }, + { + "epoch": 20.97714285714286, + "grad_norm": 114.40209197998047, + "learning_rate": 3.224761904761905e-05, + "loss": 0.4595, + "step": 3671 + }, + { + "epoch": 20.982857142857142, + "grad_norm": 86.94263458251953, + "learning_rate": 3.2241269841269845e-05, + "loss": 0.4622, + "step": 3672 + }, + { + "epoch": 20.98857142857143, + "grad_norm": 71.7770004272461, + "learning_rate": 3.223492063492064e-05, + "loss": 0.4919, + "step": 3673 + }, + { + "epoch": 20.994285714285713, + "grad_norm": 65.27389526367188, + "learning_rate": 3.222857142857143e-05, + "loss": 0.2586, + "step": 3674 + }, + { + "epoch": 21.0, + "grad_norm": 21.672555923461914, + "learning_rate": 3.222222222222223e-05, + "loss": 0.3246, + "step": 3675 + }, + { + "epoch": 21.0, + "eval_classes": 0, + "eval_loss": 0.6261504888534546, + "eval_map": 0.9119, + "eval_map_50": 0.9592, + "eval_map_75": 0.9454, + "eval_map_large": 0.9131, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9119, + "eval_map_small": -1.0, + "eval_mar_1": 0.7749, + "eval_mar_10": 0.9657, + "eval_mar_100": 0.9737, + "eval_mar_100_per_class": 0.9737, + "eval_mar_large": 0.9737, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 14.2206, + "eval_samples_per_second": 20.674, + "eval_steps_per_second": 2.602, + "step": 3675 + }, + { + "epoch": 21.005714285714287, + "grad_norm": 49.29462814331055, + "learning_rate": 3.2215873015873015e-05, + "loss": 0.2999, + "step": 3676 + }, + { + "epoch": 21.01142857142857, + "grad_norm": 31.34654998779297, + "learning_rate": 3.2209523809523814e-05, + "loss": 0.3049, + "step": 3677 + }, + { + "epoch": 21.017142857142858, + "grad_norm": 477.85162353515625, + "learning_rate": 3.220317460317461e-05, + "loss": 0.3532, + "step": 3678 + }, + { + "epoch": 21.02285714285714, + "grad_norm": 40.38692092895508, + "learning_rate": 3.21968253968254e-05, + "loss": 0.2048, + "step": 3679 + }, + { + "epoch": 21.02857142857143, + "grad_norm": 68.68042755126953, + "learning_rate": 3.219047619047619e-05, + "loss": 0.3601, + "step": 3680 + }, + { + "epoch": 21.034285714285716, + "grad_norm": 64.91019439697266, + "learning_rate": 3.2184126984126985e-05, + "loss": 0.2925, + "step": 3681 + }, + { + "epoch": 21.04, + "grad_norm": 25.79218292236328, + "learning_rate": 3.217777777777778e-05, + "loss": 0.4298, + "step": 3682 + }, + { + "epoch": 21.045714285714286, + "grad_norm": 41.89329528808594, + "learning_rate": 3.217142857142858e-05, + "loss": 0.3936, + "step": 3683 + }, + { + "epoch": 21.05142857142857, + "grad_norm": 57.092620849609375, + "learning_rate": 3.216507936507936e-05, + "loss": 0.4503, + "step": 3684 + }, + { + "epoch": 21.057142857142857, + "grad_norm": 25.358844757080078, + "learning_rate": 3.215873015873016e-05, + "loss": 0.3085, + "step": 3685 + }, + { + "epoch": 21.062857142857144, + "grad_norm": 75.70011138916016, + "learning_rate": 3.2152380952380955e-05, + "loss": 0.2991, + "step": 3686 + }, + { + "epoch": 21.068571428571428, + "grad_norm": 87.57452392578125, + "learning_rate": 3.214603174603175e-05, + "loss": 0.3838, + "step": 3687 + }, + { + "epoch": 21.074285714285715, + "grad_norm": 37.64426040649414, + "learning_rate": 3.213968253968254e-05, + "loss": 0.3772, + "step": 3688 + }, + { + "epoch": 21.08, + "grad_norm": 58.89046859741211, + "learning_rate": 3.213333333333334e-05, + "loss": 0.2258, + "step": 3689 + }, + { + "epoch": 21.085714285714285, + "grad_norm": 30.542205810546875, + "learning_rate": 3.2126984126984126e-05, + "loss": 0.3013, + "step": 3690 + }, + { + "epoch": 21.091428571428573, + "grad_norm": 35.99055480957031, + "learning_rate": 3.2120634920634925e-05, + "loss": 0.3167, + "step": 3691 + }, + { + "epoch": 21.097142857142856, + "grad_norm": 57.693511962890625, + "learning_rate": 3.211428571428571e-05, + "loss": 0.4125, + "step": 3692 + }, + { + "epoch": 21.102857142857143, + "grad_norm": 86.25928497314453, + "learning_rate": 3.210793650793651e-05, + "loss": 0.3789, + "step": 3693 + }, + { + "epoch": 21.10857142857143, + "grad_norm": 59.57147216796875, + "learning_rate": 3.21015873015873e-05, + "loss": 0.3895, + "step": 3694 + }, + { + "epoch": 21.114285714285714, + "grad_norm": 61.320220947265625, + "learning_rate": 3.2095238095238095e-05, + "loss": 0.4248, + "step": 3695 + }, + { + "epoch": 21.12, + "grad_norm": 124.69442749023438, + "learning_rate": 3.208888888888889e-05, + "loss": 0.6228, + "step": 3696 + }, + { + "epoch": 21.125714285714285, + "grad_norm": 58.30948257446289, + "learning_rate": 3.208253968253969e-05, + "loss": 0.2777, + "step": 3697 + }, + { + "epoch": 21.13142857142857, + "grad_norm": 57.231990814208984, + "learning_rate": 3.207619047619047e-05, + "loss": 0.3169, + "step": 3698 + }, + { + "epoch": 21.13714285714286, + "grad_norm": 42.46031951904297, + "learning_rate": 3.206984126984127e-05, + "loss": 0.3544, + "step": 3699 + }, + { + "epoch": 21.142857142857142, + "grad_norm": 26.119014739990234, + "learning_rate": 3.2063492063492065e-05, + "loss": 0.2699, + "step": 3700 + }, + { + "epoch": 21.14857142857143, + "grad_norm": 64.62548828125, + "learning_rate": 3.205714285714286e-05, + "loss": 0.5126, + "step": 3701 + }, + { + "epoch": 21.154285714285713, + "grad_norm": 103.31525421142578, + "learning_rate": 3.205079365079365e-05, + "loss": 0.3685, + "step": 3702 + }, + { + "epoch": 21.16, + "grad_norm": 44.26395034790039, + "learning_rate": 3.204444444444444e-05, + "loss": 0.2793, + "step": 3703 + }, + { + "epoch": 21.165714285714287, + "grad_norm": 217.64198303222656, + "learning_rate": 3.2038095238095236e-05, + "loss": 0.3558, + "step": 3704 + }, + { + "epoch": 21.17142857142857, + "grad_norm": 34.432010650634766, + "learning_rate": 3.2031746031746035e-05, + "loss": 0.3642, + "step": 3705 + }, + { + "epoch": 21.177142857142858, + "grad_norm": 50.93349838256836, + "learning_rate": 3.202539682539683e-05, + "loss": 0.3908, + "step": 3706 + }, + { + "epoch": 21.18285714285714, + "grad_norm": 226.744873046875, + "learning_rate": 3.201904761904762e-05, + "loss": 0.375, + "step": 3707 + }, + { + "epoch": 21.18857142857143, + "grad_norm": 1020.2630004882812, + "learning_rate": 3.201269841269841e-05, + "loss": 0.3685, + "step": 3708 + }, + { + "epoch": 21.194285714285716, + "grad_norm": 40.030338287353516, + "learning_rate": 3.2006349206349206e-05, + "loss": 0.4141, + "step": 3709 + }, + { + "epoch": 21.2, + "grad_norm": 33.5488395690918, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.2744, + "step": 3710 + }, + { + "epoch": 21.205714285714286, + "grad_norm": 26.809579849243164, + "learning_rate": 3.19936507936508e-05, + "loss": 0.2272, + "step": 3711 + }, + { + "epoch": 21.21142857142857, + "grad_norm": 85.49323272705078, + "learning_rate": 3.198730158730159e-05, + "loss": 0.3234, + "step": 3712 + }, + { + "epoch": 21.217142857142857, + "grad_norm": 68.34223937988281, + "learning_rate": 3.198095238095238e-05, + "loss": 0.2902, + "step": 3713 + }, + { + "epoch": 21.222857142857144, + "grad_norm": 59.31341552734375, + "learning_rate": 3.1974603174603176e-05, + "loss": 0.3462, + "step": 3714 + }, + { + "epoch": 21.228571428571428, + "grad_norm": 220.84310913085938, + "learning_rate": 3.196825396825397e-05, + "loss": 0.4344, + "step": 3715 + }, + { + "epoch": 21.234285714285715, + "grad_norm": 34.62261962890625, + "learning_rate": 3.196190476190477e-05, + "loss": 0.3257, + "step": 3716 + }, + { + "epoch": 21.24, + "grad_norm": 51.02989196777344, + "learning_rate": 3.1955555555555554e-05, + "loss": 0.5492, + "step": 3717 + }, + { + "epoch": 21.245714285714286, + "grad_norm": 352.5619812011719, + "learning_rate": 3.194920634920635e-05, + "loss": 0.2573, + "step": 3718 + }, + { + "epoch": 21.251428571428573, + "grad_norm": 29.160369873046875, + "learning_rate": 3.1942857142857146e-05, + "loss": 0.4223, + "step": 3719 + }, + { + "epoch": 21.257142857142856, + "grad_norm": 64.06929016113281, + "learning_rate": 3.193650793650794e-05, + "loss": 0.3256, + "step": 3720 + }, + { + "epoch": 21.262857142857143, + "grad_norm": 50.68466567993164, + "learning_rate": 3.193015873015873e-05, + "loss": 0.2572, + "step": 3721 + }, + { + "epoch": 21.268571428571427, + "grad_norm": 74.49227142333984, + "learning_rate": 3.192380952380953e-05, + "loss": 0.4386, + "step": 3722 + }, + { + "epoch": 21.274285714285714, + "grad_norm": 49.489383697509766, + "learning_rate": 3.1917460317460316e-05, + "loss": 0.2706, + "step": 3723 + }, + { + "epoch": 21.28, + "grad_norm": 61.9919548034668, + "learning_rate": 3.1911111111111116e-05, + "loss": 0.3378, + "step": 3724 + }, + { + "epoch": 21.285714285714285, + "grad_norm": 38.12159729003906, + "learning_rate": 3.19047619047619e-05, + "loss": 0.2815, + "step": 3725 + }, + { + "epoch": 21.291428571428572, + "grad_norm": 30.66257667541504, + "learning_rate": 3.18984126984127e-05, + "loss": 0.3195, + "step": 3726 + }, + { + "epoch": 21.29714285714286, + "grad_norm": 21.73702621459961, + "learning_rate": 3.1892063492063494e-05, + "loss": 0.366, + "step": 3727 + }, + { + "epoch": 21.302857142857142, + "grad_norm": 27.58858871459961, + "learning_rate": 3.1885714285714286e-05, + "loss": 0.356, + "step": 3728 + }, + { + "epoch": 21.30857142857143, + "grad_norm": 50.17976379394531, + "learning_rate": 3.187936507936508e-05, + "loss": 0.2447, + "step": 3729 + }, + { + "epoch": 21.314285714285713, + "grad_norm": 64.07877349853516, + "learning_rate": 3.187301587301588e-05, + "loss": 0.3639, + "step": 3730 + }, + { + "epoch": 21.32, + "grad_norm": 46.20744323730469, + "learning_rate": 3.1866666666666664e-05, + "loss": 0.322, + "step": 3731 + }, + { + "epoch": 21.325714285714287, + "grad_norm": 42.561363220214844, + "learning_rate": 3.1860317460317464e-05, + "loss": 0.2724, + "step": 3732 + }, + { + "epoch": 21.33142857142857, + "grad_norm": 39.54890441894531, + "learning_rate": 3.1853968253968256e-05, + "loss": 0.2953, + "step": 3733 + }, + { + "epoch": 21.337142857142858, + "grad_norm": 123.82516479492188, + "learning_rate": 3.184761904761905e-05, + "loss": 0.2723, + "step": 3734 + }, + { + "epoch": 21.34285714285714, + "grad_norm": 18.572673797607422, + "learning_rate": 3.184126984126984e-05, + "loss": 0.5455, + "step": 3735 + }, + { + "epoch": 21.34857142857143, + "grad_norm": 29.958419799804688, + "learning_rate": 3.1834920634920634e-05, + "loss": 0.3059, + "step": 3736 + }, + { + "epoch": 21.354285714285716, + "grad_norm": 48.21112823486328, + "learning_rate": 3.182857142857143e-05, + "loss": 0.2854, + "step": 3737 + }, + { + "epoch": 21.36, + "grad_norm": 88.85002899169922, + "learning_rate": 3.1822222222222226e-05, + "loss": 0.3987, + "step": 3738 + }, + { + "epoch": 21.365714285714287, + "grad_norm": 468.011962890625, + "learning_rate": 3.181587301587302e-05, + "loss": 0.2847, + "step": 3739 + }, + { + "epoch": 21.37142857142857, + "grad_norm": 31.966691970825195, + "learning_rate": 3.180952380952381e-05, + "loss": 0.3266, + "step": 3740 + }, + { + "epoch": 21.377142857142857, + "grad_norm": 51.56507873535156, + "learning_rate": 3.1803174603174604e-05, + "loss": 0.4651, + "step": 3741 + }, + { + "epoch": 21.382857142857144, + "grad_norm": 21.6018123626709, + "learning_rate": 3.17968253968254e-05, + "loss": 0.3151, + "step": 3742 + }, + { + "epoch": 21.388571428571428, + "grad_norm": 22.11574935913086, + "learning_rate": 3.179047619047619e-05, + "loss": 0.3947, + "step": 3743 + }, + { + "epoch": 21.394285714285715, + "grad_norm": 35.184356689453125, + "learning_rate": 3.178412698412699e-05, + "loss": 0.6054, + "step": 3744 + }, + { + "epoch": 21.4, + "grad_norm": 51.54069137573242, + "learning_rate": 3.177777777777778e-05, + "loss": 0.3085, + "step": 3745 + }, + { + "epoch": 21.405714285714286, + "grad_norm": 59.191768646240234, + "learning_rate": 3.1771428571428574e-05, + "loss": 0.4951, + "step": 3746 + }, + { + "epoch": 21.411428571428573, + "grad_norm": 91.00263214111328, + "learning_rate": 3.176507936507937e-05, + "loss": 0.4317, + "step": 3747 + }, + { + "epoch": 21.417142857142856, + "grad_norm": 40.64833450317383, + "learning_rate": 3.175873015873016e-05, + "loss": 0.3584, + "step": 3748 + }, + { + "epoch": 21.422857142857143, + "grad_norm": 54.025421142578125, + "learning_rate": 3.175238095238096e-05, + "loss": 0.3442, + "step": 3749 + }, + { + "epoch": 21.428571428571427, + "grad_norm": 42.4815559387207, + "learning_rate": 3.1746031746031745e-05, + "loss": 0.234, + "step": 3750 + }, + { + "epoch": 21.434285714285714, + "grad_norm": 37.965755462646484, + "learning_rate": 3.1739682539682544e-05, + "loss": 0.3482, + "step": 3751 + }, + { + "epoch": 21.44, + "grad_norm": 433.9432678222656, + "learning_rate": 3.173333333333334e-05, + "loss": 0.3578, + "step": 3752 + }, + { + "epoch": 21.445714285714285, + "grad_norm": 56.78814697265625, + "learning_rate": 3.172698412698413e-05, + "loss": 0.3879, + "step": 3753 + }, + { + "epoch": 21.451428571428572, + "grad_norm": 50.708091735839844, + "learning_rate": 3.172063492063492e-05, + "loss": 0.3293, + "step": 3754 + }, + { + "epoch": 21.457142857142856, + "grad_norm": 40.560787200927734, + "learning_rate": 3.1714285714285715e-05, + "loss": 0.4146, + "step": 3755 + }, + { + "epoch": 21.462857142857143, + "grad_norm": 45.53681182861328, + "learning_rate": 3.170793650793651e-05, + "loss": 0.3898, + "step": 3756 + }, + { + "epoch": 21.46857142857143, + "grad_norm": 57.23739242553711, + "learning_rate": 3.170158730158731e-05, + "loss": 0.4443, + "step": 3757 + }, + { + "epoch": 21.474285714285713, + "grad_norm": 87.60155487060547, + "learning_rate": 3.169523809523809e-05, + "loss": 0.5592, + "step": 3758 + }, + { + "epoch": 21.48, + "grad_norm": 44.68388366699219, + "learning_rate": 3.168888888888889e-05, + "loss": 0.3137, + "step": 3759 + }, + { + "epoch": 21.485714285714284, + "grad_norm": 289.986328125, + "learning_rate": 3.1682539682539685e-05, + "loss": 0.3203, + "step": 3760 + }, + { + "epoch": 21.49142857142857, + "grad_norm": 37.63692092895508, + "learning_rate": 3.167619047619048e-05, + "loss": 0.3281, + "step": 3761 + }, + { + "epoch": 21.497142857142858, + "grad_norm": 114.45169067382812, + "learning_rate": 3.166984126984127e-05, + "loss": 0.414, + "step": 3762 + }, + { + "epoch": 21.502857142857142, + "grad_norm": 90.52623748779297, + "learning_rate": 3.166349206349207e-05, + "loss": 0.3976, + "step": 3763 + }, + { + "epoch": 21.50857142857143, + "grad_norm": 62.72298812866211, + "learning_rate": 3.1657142857142855e-05, + "loss": 0.4485, + "step": 3764 + }, + { + "epoch": 21.514285714285712, + "grad_norm": 22.031055450439453, + "learning_rate": 3.1650793650793655e-05, + "loss": 0.3229, + "step": 3765 + }, + { + "epoch": 21.52, + "grad_norm": 54.60057067871094, + "learning_rate": 3.164444444444444e-05, + "loss": 0.4387, + "step": 3766 + }, + { + "epoch": 21.525714285714287, + "grad_norm": 261.3981628417969, + "learning_rate": 3.163809523809524e-05, + "loss": 0.2715, + "step": 3767 + }, + { + "epoch": 21.53142857142857, + "grad_norm": 38.28535079956055, + "learning_rate": 3.163174603174603e-05, + "loss": 0.3281, + "step": 3768 + }, + { + "epoch": 21.537142857142857, + "grad_norm": 515.3873901367188, + "learning_rate": 3.1625396825396825e-05, + "loss": 0.359, + "step": 3769 + }, + { + "epoch": 21.542857142857144, + "grad_norm": 101.58968353271484, + "learning_rate": 3.161904761904762e-05, + "loss": 0.5319, + "step": 3770 + }, + { + "epoch": 21.548571428571428, + "grad_norm": 24.83993911743164, + "learning_rate": 3.161269841269842e-05, + "loss": 0.3499, + "step": 3771 + }, + { + "epoch": 21.554285714285715, + "grad_norm": 78.21588897705078, + "learning_rate": 3.16063492063492e-05, + "loss": 0.604, + "step": 3772 + }, + { + "epoch": 21.56, + "grad_norm": 50.746639251708984, + "learning_rate": 3.16e-05, + "loss": 0.3406, + "step": 3773 + }, + { + "epoch": 21.565714285714286, + "grad_norm": 30.50995635986328, + "learning_rate": 3.1593650793650795e-05, + "loss": 0.3707, + "step": 3774 + }, + { + "epoch": 21.571428571428573, + "grad_norm": 94.28235626220703, + "learning_rate": 3.158730158730159e-05, + "loss": 0.4935, + "step": 3775 + }, + { + "epoch": 21.577142857142857, + "grad_norm": 85.49809265136719, + "learning_rate": 3.158095238095238e-05, + "loss": 0.2952, + "step": 3776 + }, + { + "epoch": 21.582857142857144, + "grad_norm": 192.1874542236328, + "learning_rate": 3.157460317460317e-05, + "loss": 0.4825, + "step": 3777 + }, + { + "epoch": 21.588571428571427, + "grad_norm": 23.69519805908203, + "learning_rate": 3.156825396825397e-05, + "loss": 0.3808, + "step": 3778 + }, + { + "epoch": 21.594285714285714, + "grad_norm": 54.67176055908203, + "learning_rate": 3.1561904761904765e-05, + "loss": 0.3762, + "step": 3779 + }, + { + "epoch": 21.6, + "grad_norm": 36.722557067871094, + "learning_rate": 3.155555555555556e-05, + "loss": 0.3383, + "step": 3780 + }, + { + "epoch": 21.605714285714285, + "grad_norm": 84.50801086425781, + "learning_rate": 3.154920634920635e-05, + "loss": 0.4195, + "step": 3781 + }, + { + "epoch": 21.611428571428572, + "grad_norm": 269.7778015136719, + "learning_rate": 3.154285714285714e-05, + "loss": 0.3313, + "step": 3782 + }, + { + "epoch": 21.617142857142856, + "grad_norm": 35.74866485595703, + "learning_rate": 3.1536507936507936e-05, + "loss": 0.399, + "step": 3783 + }, + { + "epoch": 21.622857142857143, + "grad_norm": 14.791391372680664, + "learning_rate": 3.1530158730158735e-05, + "loss": 0.2369, + "step": 3784 + }, + { + "epoch": 21.62857142857143, + "grad_norm": 55.04678726196289, + "learning_rate": 3.152380952380953e-05, + "loss": 0.3338, + "step": 3785 + }, + { + "epoch": 21.634285714285713, + "grad_norm": 31.66046905517578, + "learning_rate": 3.151746031746032e-05, + "loss": 0.2855, + "step": 3786 + }, + { + "epoch": 21.64, + "grad_norm": 43.476749420166016, + "learning_rate": 3.151111111111111e-05, + "loss": 0.2053, + "step": 3787 + }, + { + "epoch": 21.645714285714284, + "grad_norm": 108.86002349853516, + "learning_rate": 3.1504761904761905e-05, + "loss": 0.2844, + "step": 3788 + }, + { + "epoch": 21.65142857142857, + "grad_norm": 73.7070541381836, + "learning_rate": 3.14984126984127e-05, + "loss": 0.5292, + "step": 3789 + }, + { + "epoch": 21.65714285714286, + "grad_norm": 78.62653350830078, + "learning_rate": 3.14920634920635e-05, + "loss": 0.3417, + "step": 3790 + }, + { + "epoch": 21.662857142857142, + "grad_norm": 54.53212356567383, + "learning_rate": 3.148571428571428e-05, + "loss": 0.3056, + "step": 3791 + }, + { + "epoch": 21.66857142857143, + "grad_norm": 29.09296226501465, + "learning_rate": 3.147936507936508e-05, + "loss": 0.326, + "step": 3792 + }, + { + "epoch": 21.674285714285713, + "grad_norm": 44.63047790527344, + "learning_rate": 3.1473015873015875e-05, + "loss": 0.3932, + "step": 3793 + }, + { + "epoch": 21.68, + "grad_norm": 20.08134651184082, + "learning_rate": 3.146666666666667e-05, + "loss": 0.4483, + "step": 3794 + }, + { + "epoch": 21.685714285714287, + "grad_norm": 44.74596405029297, + "learning_rate": 3.146031746031746e-05, + "loss": 0.3569, + "step": 3795 + }, + { + "epoch": 21.69142857142857, + "grad_norm": 59.00993347167969, + "learning_rate": 3.145396825396826e-05, + "loss": 0.3985, + "step": 3796 + }, + { + "epoch": 21.697142857142858, + "grad_norm": 94.12641143798828, + "learning_rate": 3.1447619047619046e-05, + "loss": 0.4251, + "step": 3797 + }, + { + "epoch": 21.70285714285714, + "grad_norm": 99.79872131347656, + "learning_rate": 3.1441269841269845e-05, + "loss": 0.2642, + "step": 3798 + }, + { + "epoch": 21.708571428571428, + "grad_norm": 23.622644424438477, + "learning_rate": 3.143492063492063e-05, + "loss": 0.3422, + "step": 3799 + }, + { + "epoch": 21.714285714285715, + "grad_norm": 109.50407409667969, + "learning_rate": 3.142857142857143e-05, + "loss": 0.4371, + "step": 3800 + }, + { + "epoch": 21.72, + "grad_norm": 34.26578903198242, + "learning_rate": 3.142222222222222e-05, + "loss": 0.32, + "step": 3801 + }, + { + "epoch": 21.725714285714286, + "grad_norm": 33.98945617675781, + "learning_rate": 3.1415873015873016e-05, + "loss": 0.2817, + "step": 3802 + }, + { + "epoch": 21.731428571428573, + "grad_norm": 60.104774475097656, + "learning_rate": 3.140952380952381e-05, + "loss": 0.3346, + "step": 3803 + }, + { + "epoch": 21.737142857142857, + "grad_norm": 63.730594635009766, + "learning_rate": 3.140317460317461e-05, + "loss": 0.419, + "step": 3804 + }, + { + "epoch": 21.742857142857144, + "grad_norm": 41.9618034362793, + "learning_rate": 3.1396825396825394e-05, + "loss": 0.3094, + "step": 3805 + }, + { + "epoch": 21.748571428571427, + "grad_norm": 85.70345306396484, + "learning_rate": 3.139047619047619e-05, + "loss": 0.2412, + "step": 3806 + }, + { + "epoch": 21.754285714285714, + "grad_norm": 63.57206344604492, + "learning_rate": 3.1384126984126986e-05, + "loss": 0.336, + "step": 3807 + }, + { + "epoch": 21.76, + "grad_norm": 19.747631072998047, + "learning_rate": 3.137777777777778e-05, + "loss": 0.2959, + "step": 3808 + }, + { + "epoch": 21.765714285714285, + "grad_norm": 1825.9937744140625, + "learning_rate": 3.137142857142857e-05, + "loss": 0.444, + "step": 3809 + }, + { + "epoch": 21.771428571428572, + "grad_norm": 51.031494140625, + "learning_rate": 3.1365079365079364e-05, + "loss": 0.3296, + "step": 3810 + }, + { + "epoch": 21.777142857142856, + "grad_norm": 176.57481384277344, + "learning_rate": 3.1358730158730156e-05, + "loss": 0.4362, + "step": 3811 + }, + { + "epoch": 21.782857142857143, + "grad_norm": 56.659698486328125, + "learning_rate": 3.1352380952380956e-05, + "loss": 0.355, + "step": 3812 + }, + { + "epoch": 21.78857142857143, + "grad_norm": 62.81039047241211, + "learning_rate": 3.134603174603175e-05, + "loss": 0.3525, + "step": 3813 + }, + { + "epoch": 21.794285714285714, + "grad_norm": 60.78776168823242, + "learning_rate": 3.133968253968254e-05, + "loss": 0.3585, + "step": 3814 + }, + { + "epoch": 21.8, + "grad_norm": 108.34864044189453, + "learning_rate": 3.1333333333333334e-05, + "loss": 0.2698, + "step": 3815 + }, + { + "epoch": 21.805714285714284, + "grad_norm": 196.76712036132812, + "learning_rate": 3.1326984126984126e-05, + "loss": 0.3811, + "step": 3816 + }, + { + "epoch": 21.81142857142857, + "grad_norm": 55.96894454956055, + "learning_rate": 3.1320634920634926e-05, + "loss": 0.315, + "step": 3817 + }, + { + "epoch": 21.81714285714286, + "grad_norm": 50.5468635559082, + "learning_rate": 3.131428571428572e-05, + "loss": 0.1957, + "step": 3818 + }, + { + "epoch": 21.822857142857142, + "grad_norm": 135.99075317382812, + "learning_rate": 3.130793650793651e-05, + "loss": 0.3722, + "step": 3819 + }, + { + "epoch": 21.82857142857143, + "grad_norm": 24.163867950439453, + "learning_rate": 3.1301587301587304e-05, + "loss": 0.2995, + "step": 3820 + }, + { + "epoch": 21.834285714285713, + "grad_norm": 35.99555969238281, + "learning_rate": 3.1295238095238096e-05, + "loss": 0.2903, + "step": 3821 + }, + { + "epoch": 21.84, + "grad_norm": 252.39276123046875, + "learning_rate": 3.128888888888889e-05, + "loss": 0.2276, + "step": 3822 + }, + { + "epoch": 21.845714285714287, + "grad_norm": 47.01209259033203, + "learning_rate": 3.128253968253969e-05, + "loss": 0.3877, + "step": 3823 + }, + { + "epoch": 21.85142857142857, + "grad_norm": 47.04011535644531, + "learning_rate": 3.1276190476190474e-05, + "loss": 0.2424, + "step": 3824 + }, + { + "epoch": 21.857142857142858, + "grad_norm": 322.7526550292969, + "learning_rate": 3.1269841269841274e-05, + "loss": 0.3984, + "step": 3825 + }, + { + "epoch": 21.86285714285714, + "grad_norm": 31.5167293548584, + "learning_rate": 3.1263492063492066e-05, + "loss": 0.2904, + "step": 3826 + }, + { + "epoch": 21.86857142857143, + "grad_norm": 271.3404846191406, + "learning_rate": 3.125714285714286e-05, + "loss": 0.3466, + "step": 3827 + }, + { + "epoch": 21.874285714285715, + "grad_norm": 29.509052276611328, + "learning_rate": 3.125079365079365e-05, + "loss": 0.3587, + "step": 3828 + }, + { + "epoch": 21.88, + "grad_norm": 31.222827911376953, + "learning_rate": 3.124444444444445e-05, + "loss": 0.2593, + "step": 3829 + }, + { + "epoch": 21.885714285714286, + "grad_norm": 51.91364288330078, + "learning_rate": 3.123809523809524e-05, + "loss": 0.3321, + "step": 3830 + }, + { + "epoch": 21.89142857142857, + "grad_norm": 41.987300872802734, + "learning_rate": 3.1231746031746036e-05, + "loss": 0.2314, + "step": 3831 + }, + { + "epoch": 21.897142857142857, + "grad_norm": 75.75940704345703, + "learning_rate": 3.122539682539682e-05, + "loss": 0.2687, + "step": 3832 + }, + { + "epoch": 21.902857142857144, + "grad_norm": 34.43084716796875, + "learning_rate": 3.121904761904762e-05, + "loss": 0.2096, + "step": 3833 + }, + { + "epoch": 21.908571428571427, + "grad_norm": 102.84685516357422, + "learning_rate": 3.1212698412698414e-05, + "loss": 0.2934, + "step": 3834 + }, + { + "epoch": 21.914285714285715, + "grad_norm": 455.5080261230469, + "learning_rate": 3.120634920634921e-05, + "loss": 0.4464, + "step": 3835 + }, + { + "epoch": 21.92, + "grad_norm": 29.81694793701172, + "learning_rate": 3.12e-05, + "loss": 0.238, + "step": 3836 + }, + { + "epoch": 21.925714285714285, + "grad_norm": 54.29841613769531, + "learning_rate": 3.11936507936508e-05, + "loss": 0.3183, + "step": 3837 + }, + { + "epoch": 21.931428571428572, + "grad_norm": 57.172508239746094, + "learning_rate": 3.1187301587301585e-05, + "loss": 0.3099, + "step": 3838 + }, + { + "epoch": 21.937142857142856, + "grad_norm": 68.68402862548828, + "learning_rate": 3.1180952380952384e-05, + "loss": 0.4181, + "step": 3839 + }, + { + "epoch": 21.942857142857143, + "grad_norm": 23.65928840637207, + "learning_rate": 3.117460317460318e-05, + "loss": 0.4022, + "step": 3840 + }, + { + "epoch": 21.94857142857143, + "grad_norm": 81.38803100585938, + "learning_rate": 3.116825396825397e-05, + "loss": 0.45, + "step": 3841 + }, + { + "epoch": 21.954285714285714, + "grad_norm": 80.4809799194336, + "learning_rate": 3.116190476190476e-05, + "loss": 0.2891, + "step": 3842 + }, + { + "epoch": 21.96, + "grad_norm": 49.645843505859375, + "learning_rate": 3.1155555555555555e-05, + "loss": 0.4989, + "step": 3843 + }, + { + "epoch": 21.965714285714284, + "grad_norm": 62.76024627685547, + "learning_rate": 3.114920634920635e-05, + "loss": 0.2469, + "step": 3844 + }, + { + "epoch": 21.97142857142857, + "grad_norm": 83.84510040283203, + "learning_rate": 3.114285714285715e-05, + "loss": 0.4257, + "step": 3845 + }, + { + "epoch": 21.97714285714286, + "grad_norm": 31.186817169189453, + "learning_rate": 3.113650793650794e-05, + "loss": 0.3295, + "step": 3846 + }, + { + "epoch": 21.982857142857142, + "grad_norm": 56.99137878417969, + "learning_rate": 3.113015873015873e-05, + "loss": 0.3165, + "step": 3847 + }, + { + "epoch": 21.98857142857143, + "grad_norm": 64.08834075927734, + "learning_rate": 3.1123809523809525e-05, + "loss": 0.2338, + "step": 3848 + }, + { + "epoch": 21.994285714285713, + "grad_norm": 66.41158294677734, + "learning_rate": 3.111746031746032e-05, + "loss": 0.3492, + "step": 3849 + }, + { + "epoch": 22.0, + "grad_norm": 119.21939086914062, + "learning_rate": 3.111111111111111e-05, + "loss": 0.3374, + "step": 3850 + }, + { + "epoch": 22.0, + "eval_classes": 0, + "eval_loss": 0.5864205956459045, + "eval_map": 0.9208, + "eval_map_50": 0.9655, + "eval_map_75": 0.9527, + "eval_map_large": 0.9209, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9208, + "eval_map_small": -1.0, + "eval_mar_1": 0.7854, + "eval_mar_10": 0.9695, + "eval_mar_100": 0.973, + "eval_mar_100_per_class": 0.973, + "eval_mar_large": 0.973, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.411, + "eval_samples_per_second": 21.922, + "eval_steps_per_second": 2.759, + "step": 3850 + }, + { + "epoch": 22.005714285714287, + "grad_norm": 568.6182250976562, + "learning_rate": 3.110476190476191e-05, + "loss": 0.3612, + "step": 3851 + }, + { + "epoch": 22.01142857142857, + "grad_norm": 298.5441589355469, + "learning_rate": 3.10984126984127e-05, + "loss": 0.3196, + "step": 3852 + }, + { + "epoch": 22.017142857142858, + "grad_norm": 37.83338165283203, + "learning_rate": 3.1092063492063495e-05, + "loss": 0.2644, + "step": 3853 + }, + { + "epoch": 22.02285714285714, + "grad_norm": 139.16110229492188, + "learning_rate": 3.108571428571429e-05, + "loss": 0.3717, + "step": 3854 + }, + { + "epoch": 22.02857142857143, + "grad_norm": 437.6393737792969, + "learning_rate": 3.107936507936508e-05, + "loss": 0.3128, + "step": 3855 + }, + { + "epoch": 22.034285714285716, + "grad_norm": 37.889312744140625, + "learning_rate": 3.107301587301588e-05, + "loss": 0.4065, + "step": 3856 + }, + { + "epoch": 22.04, + "grad_norm": 26.51235580444336, + "learning_rate": 3.1066666666666665e-05, + "loss": 0.307, + "step": 3857 + }, + { + "epoch": 22.045714285714286, + "grad_norm": 24.09084129333496, + "learning_rate": 3.1060317460317465e-05, + "loss": 0.4787, + "step": 3858 + }, + { + "epoch": 22.05142857142857, + "grad_norm": 43.70731735229492, + "learning_rate": 3.105396825396826e-05, + "loss": 0.3874, + "step": 3859 + }, + { + "epoch": 22.057142857142857, + "grad_norm": 94.05394744873047, + "learning_rate": 3.104761904761905e-05, + "loss": 0.3835, + "step": 3860 + }, + { + "epoch": 22.062857142857144, + "grad_norm": 45.00230407714844, + "learning_rate": 3.104126984126984e-05, + "loss": 0.3022, + "step": 3861 + }, + { + "epoch": 22.068571428571428, + "grad_norm": 36.670806884765625, + "learning_rate": 3.103492063492064e-05, + "loss": 0.5788, + "step": 3862 + }, + { + "epoch": 22.074285714285715, + "grad_norm": 39.09443664550781, + "learning_rate": 3.102857142857143e-05, + "loss": 0.3785, + "step": 3863 + }, + { + "epoch": 22.08, + "grad_norm": 42.257102966308594, + "learning_rate": 3.102222222222223e-05, + "loss": 0.309, + "step": 3864 + }, + { + "epoch": 22.085714285714285, + "grad_norm": 37.75422286987305, + "learning_rate": 3.101587301587301e-05, + "loss": 0.3694, + "step": 3865 + }, + { + "epoch": 22.091428571428573, + "grad_norm": 57.656097412109375, + "learning_rate": 3.100952380952381e-05, + "loss": 0.3344, + "step": 3866 + }, + { + "epoch": 22.097142857142856, + "grad_norm": 57.66275405883789, + "learning_rate": 3.1003174603174605e-05, + "loss": 0.2738, + "step": 3867 + }, + { + "epoch": 22.102857142857143, + "grad_norm": 185.97476196289062, + "learning_rate": 3.09968253968254e-05, + "loss": 0.3516, + "step": 3868 + }, + { + "epoch": 22.10857142857143, + "grad_norm": 55.6573600769043, + "learning_rate": 3.099047619047619e-05, + "loss": 0.2769, + "step": 3869 + }, + { + "epoch": 22.114285714285714, + "grad_norm": 47.03541946411133, + "learning_rate": 3.098412698412699e-05, + "loss": 0.2358, + "step": 3870 + }, + { + "epoch": 22.12, + "grad_norm": 60.72099685668945, + "learning_rate": 3.0977777777777776e-05, + "loss": 0.4233, + "step": 3871 + }, + { + "epoch": 22.125714285714285, + "grad_norm": 77.88934326171875, + "learning_rate": 3.0971428571428575e-05, + "loss": 0.3265, + "step": 3872 + }, + { + "epoch": 22.13142857142857, + "grad_norm": 251.4064483642578, + "learning_rate": 3.096507936507937e-05, + "loss": 0.3101, + "step": 3873 + }, + { + "epoch": 22.13714285714286, + "grad_norm": 79.59466552734375, + "learning_rate": 3.095873015873016e-05, + "loss": 0.2651, + "step": 3874 + }, + { + "epoch": 22.142857142857142, + "grad_norm": 111.59322357177734, + "learning_rate": 3.095238095238095e-05, + "loss": 0.4637, + "step": 3875 + }, + { + "epoch": 22.14857142857143, + "grad_norm": 46.31583023071289, + "learning_rate": 3.0946031746031746e-05, + "loss": 0.2143, + "step": 3876 + }, + { + "epoch": 22.154285714285713, + "grad_norm": 56.032554626464844, + "learning_rate": 3.093968253968254e-05, + "loss": 0.3214, + "step": 3877 + }, + { + "epoch": 22.16, + "grad_norm": 51.20491409301758, + "learning_rate": 3.093333333333334e-05, + "loss": 0.2696, + "step": 3878 + }, + { + "epoch": 22.165714285714287, + "grad_norm": 78.42192077636719, + "learning_rate": 3.0926984126984123e-05, + "loss": 0.5291, + "step": 3879 + }, + { + "epoch": 22.17142857142857, + "grad_norm": 81.70121765136719, + "learning_rate": 3.092063492063492e-05, + "loss": 0.2933, + "step": 3880 + }, + { + "epoch": 22.177142857142858, + "grad_norm": 81.04582214355469, + "learning_rate": 3.0914285714285715e-05, + "loss": 0.5647, + "step": 3881 + }, + { + "epoch": 22.18285714285714, + "grad_norm": 34.81829833984375, + "learning_rate": 3.090793650793651e-05, + "loss": 0.3128, + "step": 3882 + }, + { + "epoch": 22.18857142857143, + "grad_norm": 107.74185180664062, + "learning_rate": 3.09015873015873e-05, + "loss": 0.3937, + "step": 3883 + }, + { + "epoch": 22.194285714285716, + "grad_norm": 52.18937301635742, + "learning_rate": 3.08952380952381e-05, + "loss": 0.2945, + "step": 3884 + }, + { + "epoch": 22.2, + "grad_norm": 74.31680297851562, + "learning_rate": 3.088888888888889e-05, + "loss": 0.4235, + "step": 3885 + }, + { + "epoch": 22.205714285714286, + "grad_norm": 43.282081604003906, + "learning_rate": 3.0882539682539685e-05, + "loss": 0.404, + "step": 3886 + }, + { + "epoch": 22.21142857142857, + "grad_norm": 47.725616455078125, + "learning_rate": 3.087619047619048e-05, + "loss": 0.2856, + "step": 3887 + }, + { + "epoch": 22.217142857142857, + "grad_norm": 20.372751235961914, + "learning_rate": 3.086984126984127e-05, + "loss": 0.3264, + "step": 3888 + }, + { + "epoch": 22.222857142857144, + "grad_norm": 120.78299713134766, + "learning_rate": 3.086349206349206e-05, + "loss": 0.3028, + "step": 3889 + }, + { + "epoch": 22.228571428571428, + "grad_norm": 45.60805130004883, + "learning_rate": 3.0857142857142856e-05, + "loss": 0.3358, + "step": 3890 + }, + { + "epoch": 22.234285714285715, + "grad_norm": 26.143068313598633, + "learning_rate": 3.0850793650793655e-05, + "loss": 0.3691, + "step": 3891 + }, + { + "epoch": 22.24, + "grad_norm": 37.71814727783203, + "learning_rate": 3.084444444444445e-05, + "loss": 0.29, + "step": 3892 + }, + { + "epoch": 22.245714285714286, + "grad_norm": 99.42308044433594, + "learning_rate": 3.083809523809524e-05, + "loss": 0.3096, + "step": 3893 + }, + { + "epoch": 22.251428571428573, + "grad_norm": 45.55909729003906, + "learning_rate": 3.083174603174603e-05, + "loss": 0.4135, + "step": 3894 + }, + { + "epoch": 22.257142857142856, + "grad_norm": 140.01119995117188, + "learning_rate": 3.082539682539683e-05, + "loss": 0.5953, + "step": 3895 + }, + { + "epoch": 22.262857142857143, + "grad_norm": 83.41049194335938, + "learning_rate": 3.081904761904762e-05, + "loss": 0.3512, + "step": 3896 + }, + { + "epoch": 22.268571428571427, + "grad_norm": 69.6951904296875, + "learning_rate": 3.081269841269842e-05, + "loss": 0.3473, + "step": 3897 + }, + { + "epoch": 22.274285714285714, + "grad_norm": 63.989315032958984, + "learning_rate": 3.0806349206349204e-05, + "loss": 0.4721, + "step": 3898 + }, + { + "epoch": 22.28, + "grad_norm": 62.20379638671875, + "learning_rate": 3.08e-05, + "loss": 0.4091, + "step": 3899 + }, + { + "epoch": 22.285714285714285, + "grad_norm": 54.67482376098633, + "learning_rate": 3.0793650793650796e-05, + "loss": 0.347, + "step": 3900 + }, + { + "epoch": 22.291428571428572, + "grad_norm": 31.573854446411133, + "learning_rate": 3.078730158730159e-05, + "loss": 0.3179, + "step": 3901 + }, + { + "epoch": 22.29714285714286, + "grad_norm": 28.191001892089844, + "learning_rate": 3.078095238095238e-05, + "loss": 0.3604, + "step": 3902 + }, + { + "epoch": 22.302857142857142, + "grad_norm": 56.241233825683594, + "learning_rate": 3.077460317460318e-05, + "loss": 0.4328, + "step": 3903 + }, + { + "epoch": 22.30857142857143, + "grad_norm": 60.73114776611328, + "learning_rate": 3.0768253968253966e-05, + "loss": 0.2629, + "step": 3904 + }, + { + "epoch": 22.314285714285713, + "grad_norm": 94.41016387939453, + "learning_rate": 3.0761904761904766e-05, + "loss": 0.3041, + "step": 3905 + }, + { + "epoch": 22.32, + "grad_norm": 38.220855712890625, + "learning_rate": 3.075555555555556e-05, + "loss": 0.4861, + "step": 3906 + }, + { + "epoch": 22.325714285714287, + "grad_norm": 98.41761779785156, + "learning_rate": 3.074920634920635e-05, + "loss": 0.3381, + "step": 3907 + }, + { + "epoch": 22.33142857142857, + "grad_norm": 31.45133399963379, + "learning_rate": 3.0742857142857144e-05, + "loss": 0.3808, + "step": 3908 + }, + { + "epoch": 22.337142857142858, + "grad_norm": 27.403478622436523, + "learning_rate": 3.0736507936507936e-05, + "loss": 0.3264, + "step": 3909 + }, + { + "epoch": 22.34285714285714, + "grad_norm": 22.842941284179688, + "learning_rate": 3.073015873015873e-05, + "loss": 0.4001, + "step": 3910 + }, + { + "epoch": 22.34857142857143, + "grad_norm": 24.106473922729492, + "learning_rate": 3.072380952380953e-05, + "loss": 0.2799, + "step": 3911 + }, + { + "epoch": 22.354285714285716, + "grad_norm": 92.46276092529297, + "learning_rate": 3.0717460317460314e-05, + "loss": 0.2886, + "step": 3912 + }, + { + "epoch": 22.36, + "grad_norm": 238.04917907714844, + "learning_rate": 3.0711111111111114e-05, + "loss": 0.3307, + "step": 3913 + }, + { + "epoch": 22.365714285714287, + "grad_norm": 177.84490966796875, + "learning_rate": 3.0704761904761906e-05, + "loss": 0.3596, + "step": 3914 + }, + { + "epoch": 22.37142857142857, + "grad_norm": 70.74510955810547, + "learning_rate": 3.06984126984127e-05, + "loss": 0.411, + "step": 3915 + }, + { + "epoch": 22.377142857142857, + "grad_norm": 36.48595428466797, + "learning_rate": 3.069206349206349e-05, + "loss": 0.3127, + "step": 3916 + }, + { + "epoch": 22.382857142857144, + "grad_norm": 45.07102584838867, + "learning_rate": 3.068571428571429e-05, + "loss": 0.3441, + "step": 3917 + }, + { + "epoch": 22.388571428571428, + "grad_norm": 57.4742546081543, + "learning_rate": 3.067936507936508e-05, + "loss": 0.2636, + "step": 3918 + }, + { + "epoch": 22.394285714285715, + "grad_norm": 41.72212219238281, + "learning_rate": 3.0673015873015876e-05, + "loss": 0.2729, + "step": 3919 + }, + { + "epoch": 22.4, + "grad_norm": 25.12485122680664, + "learning_rate": 3.066666666666667e-05, + "loss": 0.3471, + "step": 3920 + }, + { + "epoch": 22.405714285714286, + "grad_norm": 18.712438583374023, + "learning_rate": 3.066031746031746e-05, + "loss": 0.3291, + "step": 3921 + }, + { + "epoch": 22.411428571428573, + "grad_norm": 63.36475372314453, + "learning_rate": 3.0653968253968254e-05, + "loss": 0.4245, + "step": 3922 + }, + { + "epoch": 22.417142857142856, + "grad_norm": 72.96314239501953, + "learning_rate": 3.064761904761905e-05, + "loss": 0.7622, + "step": 3923 + }, + { + "epoch": 22.422857142857143, + "grad_norm": 81.22702026367188, + "learning_rate": 3.0641269841269846e-05, + "loss": 0.4028, + "step": 3924 + }, + { + "epoch": 22.428571428571427, + "grad_norm": 102.31585693359375, + "learning_rate": 3.063492063492064e-05, + "loss": 0.3337, + "step": 3925 + }, + { + "epoch": 22.434285714285714, + "grad_norm": 182.3521270751953, + "learning_rate": 3.062857142857143e-05, + "loss": 0.3027, + "step": 3926 + }, + { + "epoch": 22.44, + "grad_norm": 70.28782653808594, + "learning_rate": 3.0622222222222224e-05, + "loss": 0.5345, + "step": 3927 + }, + { + "epoch": 22.445714285714285, + "grad_norm": 42.58792495727539, + "learning_rate": 3.061587301587302e-05, + "loss": 0.3515, + "step": 3928 + }, + { + "epoch": 22.451428571428572, + "grad_norm": 37.07721710205078, + "learning_rate": 3.060952380952381e-05, + "loss": 0.3843, + "step": 3929 + }, + { + "epoch": 22.457142857142856, + "grad_norm": 37.0380744934082, + "learning_rate": 3.060317460317461e-05, + "loss": 0.2544, + "step": 3930 + }, + { + "epoch": 22.462857142857143, + "grad_norm": 52.82063674926758, + "learning_rate": 3.0596825396825395e-05, + "loss": 0.2882, + "step": 3931 + }, + { + "epoch": 22.46857142857143, + "grad_norm": 52.426612854003906, + "learning_rate": 3.0590476190476194e-05, + "loss": 0.3228, + "step": 3932 + }, + { + "epoch": 22.474285714285713, + "grad_norm": 34.03739547729492, + "learning_rate": 3.058412698412699e-05, + "loss": 0.2281, + "step": 3933 + }, + { + "epoch": 22.48, + "grad_norm": 98.39401245117188, + "learning_rate": 3.057777777777778e-05, + "loss": 0.4055, + "step": 3934 + }, + { + "epoch": 22.485714285714284, + "grad_norm": 45.16268539428711, + "learning_rate": 3.057142857142857e-05, + "loss": 0.3098, + "step": 3935 + }, + { + "epoch": 22.49142857142857, + "grad_norm": 18.381845474243164, + "learning_rate": 3.056507936507937e-05, + "loss": 0.3035, + "step": 3936 + }, + { + "epoch": 22.497142857142858, + "grad_norm": 41.476539611816406, + "learning_rate": 3.055873015873016e-05, + "loss": 0.3262, + "step": 3937 + }, + { + "epoch": 22.502857142857142, + "grad_norm": 90.57671356201172, + "learning_rate": 3.055238095238096e-05, + "loss": 0.3315, + "step": 3938 + }, + { + "epoch": 22.50857142857143, + "grad_norm": 61.76097106933594, + "learning_rate": 3.054603174603175e-05, + "loss": 0.4332, + "step": 3939 + }, + { + "epoch": 22.514285714285712, + "grad_norm": 50.836605072021484, + "learning_rate": 3.053968253968254e-05, + "loss": 0.252, + "step": 3940 + }, + { + "epoch": 22.52, + "grad_norm": 245.84751892089844, + "learning_rate": 3.0533333333333335e-05, + "loss": 0.3185, + "step": 3941 + }, + { + "epoch": 22.525714285714287, + "grad_norm": 83.79264068603516, + "learning_rate": 3.052698412698413e-05, + "loss": 0.443, + "step": 3942 + }, + { + "epoch": 22.53142857142857, + "grad_norm": 84.7953872680664, + "learning_rate": 3.052063492063492e-05, + "loss": 0.3408, + "step": 3943 + }, + { + "epoch": 22.537142857142857, + "grad_norm": 57.39402770996094, + "learning_rate": 3.0514285714285716e-05, + "loss": 0.5653, + "step": 3944 + }, + { + "epoch": 22.542857142857144, + "grad_norm": 331.8702392578125, + "learning_rate": 3.050793650793651e-05, + "loss": 0.4169, + "step": 3945 + }, + { + "epoch": 22.548571428571428, + "grad_norm": 374.3634033203125, + "learning_rate": 3.0501587301587305e-05, + "loss": 0.3585, + "step": 3946 + }, + { + "epoch": 22.554285714285715, + "grad_norm": 77.23312377929688, + "learning_rate": 3.04952380952381e-05, + "loss": 0.4017, + "step": 3947 + }, + { + "epoch": 22.56, + "grad_norm": 34.115482330322266, + "learning_rate": 3.048888888888889e-05, + "loss": 0.2754, + "step": 3948 + }, + { + "epoch": 22.565714285714286, + "grad_norm": 390.7008056640625, + "learning_rate": 3.0482539682539686e-05, + "loss": 0.3646, + "step": 3949 + }, + { + "epoch": 22.571428571428573, + "grad_norm": 142.5330352783203, + "learning_rate": 3.0476190476190482e-05, + "loss": 0.3656, + "step": 3950 + }, + { + "epoch": 22.577142857142857, + "grad_norm": 308.1759948730469, + "learning_rate": 3.046984126984127e-05, + "loss": 0.4462, + "step": 3951 + }, + { + "epoch": 22.582857142857144, + "grad_norm": 48.4372444152832, + "learning_rate": 3.0463492063492067e-05, + "loss": 0.3191, + "step": 3952 + }, + { + "epoch": 22.588571428571427, + "grad_norm": 51.79970932006836, + "learning_rate": 3.0457142857142856e-05, + "loss": 0.3815, + "step": 3953 + }, + { + "epoch": 22.594285714285714, + "grad_norm": 85.287109375, + "learning_rate": 3.0450793650793652e-05, + "loss": 0.263, + "step": 3954 + }, + { + "epoch": 22.6, + "grad_norm": 50.578041076660156, + "learning_rate": 3.044444444444445e-05, + "loss": 0.4532, + "step": 3955 + }, + { + "epoch": 22.605714285714285, + "grad_norm": 45.909027099609375, + "learning_rate": 3.0438095238095238e-05, + "loss": 0.2848, + "step": 3956 + }, + { + "epoch": 22.611428571428572, + "grad_norm": 33.778865814208984, + "learning_rate": 3.0431746031746034e-05, + "loss": 0.3687, + "step": 3957 + }, + { + "epoch": 22.617142857142856, + "grad_norm": 30.09535789489746, + "learning_rate": 3.042539682539683e-05, + "loss": 0.3757, + "step": 3958 + }, + { + "epoch": 22.622857142857143, + "grad_norm": 27.8607120513916, + "learning_rate": 3.041904761904762e-05, + "loss": 0.3275, + "step": 3959 + }, + { + "epoch": 22.62857142857143, + "grad_norm": 187.89405822753906, + "learning_rate": 3.0412698412698415e-05, + "loss": 0.2528, + "step": 3960 + }, + { + "epoch": 22.634285714285713, + "grad_norm": 29.064250946044922, + "learning_rate": 3.040634920634921e-05, + "loss": 0.304, + "step": 3961 + }, + { + "epoch": 22.64, + "grad_norm": 40.578895568847656, + "learning_rate": 3.04e-05, + "loss": 0.3312, + "step": 3962 + }, + { + "epoch": 22.645714285714284, + "grad_norm": 56.08921432495117, + "learning_rate": 3.0393650793650796e-05, + "loss": 0.2642, + "step": 3963 + }, + { + "epoch": 22.65142857142857, + "grad_norm": 68.52364349365234, + "learning_rate": 3.0387301587301586e-05, + "loss": 0.2772, + "step": 3964 + }, + { + "epoch": 22.65714285714286, + "grad_norm": 26.927160263061523, + "learning_rate": 3.038095238095238e-05, + "loss": 0.2455, + "step": 3965 + }, + { + "epoch": 22.662857142857142, + "grad_norm": 28.958812713623047, + "learning_rate": 3.0374603174603178e-05, + "loss": 0.3878, + "step": 3966 + }, + { + "epoch": 22.66857142857143, + "grad_norm": 85.509765625, + "learning_rate": 3.0368253968253967e-05, + "loss": 0.3657, + "step": 3967 + }, + { + "epoch": 22.674285714285713, + "grad_norm": 59.687705993652344, + "learning_rate": 3.0361904761904763e-05, + "loss": 0.3652, + "step": 3968 + }, + { + "epoch": 22.68, + "grad_norm": 55.201026916503906, + "learning_rate": 3.035555555555556e-05, + "loss": 0.316, + "step": 3969 + }, + { + "epoch": 22.685714285714287, + "grad_norm": 177.66893005371094, + "learning_rate": 3.0349206349206348e-05, + "loss": 0.4029, + "step": 3970 + }, + { + "epoch": 22.69142857142857, + "grad_norm": 65.45475769042969, + "learning_rate": 3.0342857142857144e-05, + "loss": 0.395, + "step": 3971 + }, + { + "epoch": 22.697142857142858, + "grad_norm": 65.41983795166016, + "learning_rate": 3.0336507936507937e-05, + "loss": 0.2951, + "step": 3972 + }, + { + "epoch": 22.70285714285714, + "grad_norm": 27.596040725708008, + "learning_rate": 3.033015873015873e-05, + "loss": 0.3872, + "step": 3973 + }, + { + "epoch": 22.708571428571428, + "grad_norm": 66.64160919189453, + "learning_rate": 3.0323809523809526e-05, + "loss": 0.4295, + "step": 3974 + }, + { + "epoch": 22.714285714285715, + "grad_norm": 27.690946578979492, + "learning_rate": 3.0317460317460318e-05, + "loss": 0.3481, + "step": 3975 + }, + { + "epoch": 22.72, + "grad_norm": 24.905649185180664, + "learning_rate": 3.031111111111111e-05, + "loss": 0.2619, + "step": 3976 + }, + { + "epoch": 22.725714285714286, + "grad_norm": 50.19453048706055, + "learning_rate": 3.0304761904761907e-05, + "loss": 0.2237, + "step": 3977 + }, + { + "epoch": 22.731428571428573, + "grad_norm": 48.230037689208984, + "learning_rate": 3.02984126984127e-05, + "loss": 0.4105, + "step": 3978 + }, + { + "epoch": 22.737142857142857, + "grad_norm": 59.16750717163086, + "learning_rate": 3.0292063492063495e-05, + "loss": 0.2397, + "step": 3979 + }, + { + "epoch": 22.742857142857144, + "grad_norm": 41.24590301513672, + "learning_rate": 3.0285714285714288e-05, + "loss": 0.2296, + "step": 3980 + }, + { + "epoch": 22.748571428571427, + "grad_norm": 27.61407470703125, + "learning_rate": 3.027936507936508e-05, + "loss": 0.2562, + "step": 3981 + }, + { + "epoch": 22.754285714285714, + "grad_norm": 79.87749481201172, + "learning_rate": 3.0273015873015877e-05, + "loss": 0.2065, + "step": 3982 + }, + { + "epoch": 22.76, + "grad_norm": 28.355274200439453, + "learning_rate": 3.0266666666666666e-05, + "loss": 0.2726, + "step": 3983 + }, + { + "epoch": 22.765714285714285, + "grad_norm": 69.86193084716797, + "learning_rate": 3.0260317460317462e-05, + "loss": 0.3554, + "step": 3984 + }, + { + "epoch": 22.771428571428572, + "grad_norm": 69.75241088867188, + "learning_rate": 3.0253968253968258e-05, + "loss": 0.3991, + "step": 3985 + }, + { + "epoch": 22.777142857142856, + "grad_norm": 18.34147834777832, + "learning_rate": 3.0247619047619047e-05, + "loss": 0.2868, + "step": 3986 + }, + { + "epoch": 22.782857142857143, + "grad_norm": 54.24711608886719, + "learning_rate": 3.0241269841269843e-05, + "loss": 0.2794, + "step": 3987 + }, + { + "epoch": 22.78857142857143, + "grad_norm": 60.45413589477539, + "learning_rate": 3.023492063492064e-05, + "loss": 0.3577, + "step": 3988 + }, + { + "epoch": 22.794285714285714, + "grad_norm": 61.43553924560547, + "learning_rate": 3.022857142857143e-05, + "loss": 0.3516, + "step": 3989 + }, + { + "epoch": 22.8, + "grad_norm": 78.0551986694336, + "learning_rate": 3.0222222222222225e-05, + "loss": 0.2044, + "step": 3990 + }, + { + "epoch": 22.805714285714284, + "grad_norm": 50.30086135864258, + "learning_rate": 3.021587301587302e-05, + "loss": 0.2837, + "step": 3991 + }, + { + "epoch": 22.81142857142857, + "grad_norm": 44.484046936035156, + "learning_rate": 3.020952380952381e-05, + "loss": 0.3038, + "step": 3992 + }, + { + "epoch": 22.81714285714286, + "grad_norm": 162.52215576171875, + "learning_rate": 3.0203174603174606e-05, + "loss": 0.44, + "step": 3993 + }, + { + "epoch": 22.822857142857142, + "grad_norm": 87.21377563476562, + "learning_rate": 3.0196825396825395e-05, + "loss": 0.2657, + "step": 3994 + }, + { + "epoch": 22.82857142857143, + "grad_norm": 51.34219741821289, + "learning_rate": 3.019047619047619e-05, + "loss": 0.2638, + "step": 3995 + }, + { + "epoch": 22.834285714285713, + "grad_norm": 198.4999237060547, + "learning_rate": 3.0184126984126987e-05, + "loss": 0.386, + "step": 3996 + }, + { + "epoch": 22.84, + "grad_norm": 83.12291717529297, + "learning_rate": 3.0177777777777776e-05, + "loss": 0.4847, + "step": 3997 + }, + { + "epoch": 22.845714285714287, + "grad_norm": 130.7611846923828, + "learning_rate": 3.0171428571428572e-05, + "loss": 0.3893, + "step": 3998 + }, + { + "epoch": 22.85142857142857, + "grad_norm": 86.97884368896484, + "learning_rate": 3.016507936507937e-05, + "loss": 0.3274, + "step": 3999 + }, + { + "epoch": 22.857142857142858, + "grad_norm": 77.94489288330078, + "learning_rate": 3.0158730158730158e-05, + "loss": 0.3596, + "step": 4000 + }, + { + "epoch": 22.86285714285714, + "grad_norm": 86.53071594238281, + "learning_rate": 3.0152380952380954e-05, + "loss": 0.5372, + "step": 4001 + }, + { + "epoch": 22.86857142857143, + "grad_norm": 60.08096694946289, + "learning_rate": 3.014603174603175e-05, + "loss": 0.2962, + "step": 4002 + }, + { + "epoch": 22.874285714285715, + "grad_norm": 74.86428833007812, + "learning_rate": 3.013968253968254e-05, + "loss": 0.4112, + "step": 4003 + }, + { + "epoch": 22.88, + "grad_norm": 27.846323013305664, + "learning_rate": 3.0133333333333335e-05, + "loss": 0.3109, + "step": 4004 + }, + { + "epoch": 22.885714285714286, + "grad_norm": 40.8173713684082, + "learning_rate": 3.0126984126984124e-05, + "loss": 0.4139, + "step": 4005 + }, + { + "epoch": 22.89142857142857, + "grad_norm": 42.435585021972656, + "learning_rate": 3.012063492063492e-05, + "loss": 0.2861, + "step": 4006 + }, + { + "epoch": 22.897142857142857, + "grad_norm": 83.25191497802734, + "learning_rate": 3.0114285714285716e-05, + "loss": 0.3462, + "step": 4007 + }, + { + "epoch": 22.902857142857144, + "grad_norm": 45.708099365234375, + "learning_rate": 3.0107936507936506e-05, + "loss": 0.2537, + "step": 4008 + }, + { + "epoch": 22.908571428571427, + "grad_norm": 77.79122161865234, + "learning_rate": 3.01015873015873e-05, + "loss": 0.3789, + "step": 4009 + }, + { + "epoch": 22.914285714285715, + "grad_norm": 25.502317428588867, + "learning_rate": 3.0095238095238098e-05, + "loss": 0.3658, + "step": 4010 + }, + { + "epoch": 22.92, + "grad_norm": 37.519325256347656, + "learning_rate": 3.008888888888889e-05, + "loss": 0.422, + "step": 4011 + }, + { + "epoch": 22.925714285714285, + "grad_norm": 2517.453125, + "learning_rate": 3.0082539682539683e-05, + "loss": 0.2673, + "step": 4012 + }, + { + "epoch": 22.931428571428572, + "grad_norm": 110.73004150390625, + "learning_rate": 3.007619047619048e-05, + "loss": 0.2519, + "step": 4013 + }, + { + "epoch": 22.937142857142856, + "grad_norm": 97.83757019042969, + "learning_rate": 3.006984126984127e-05, + "loss": 0.2666, + "step": 4014 + }, + { + "epoch": 22.942857142857143, + "grad_norm": 37.127960205078125, + "learning_rate": 3.0063492063492064e-05, + "loss": 0.2609, + "step": 4015 + }, + { + "epoch": 22.94857142857143, + "grad_norm": 40.58960723876953, + "learning_rate": 3.0057142857142857e-05, + "loss": 0.3827, + "step": 4016 + }, + { + "epoch": 22.954285714285714, + "grad_norm": 50.95119094848633, + "learning_rate": 3.0050793650793653e-05, + "loss": 0.3445, + "step": 4017 + }, + { + "epoch": 22.96, + "grad_norm": 39.06296157836914, + "learning_rate": 3.004444444444445e-05, + "loss": 0.3327, + "step": 4018 + }, + { + "epoch": 22.965714285714284, + "grad_norm": 40.81393814086914, + "learning_rate": 3.0038095238095238e-05, + "loss": 0.3127, + "step": 4019 + }, + { + "epoch": 22.97142857142857, + "grad_norm": 156.51710510253906, + "learning_rate": 3.0031746031746034e-05, + "loss": 0.3226, + "step": 4020 + }, + { + "epoch": 22.97714285714286, + "grad_norm": 19.79698944091797, + "learning_rate": 3.002539682539683e-05, + "loss": 0.2417, + "step": 4021 + }, + { + "epoch": 22.982857142857142, + "grad_norm": 26.006929397583008, + "learning_rate": 3.001904761904762e-05, + "loss": 0.358, + "step": 4022 + }, + { + "epoch": 22.98857142857143, + "grad_norm": 72.33332824707031, + "learning_rate": 3.0012698412698415e-05, + "loss": 0.2902, + "step": 4023 + }, + { + "epoch": 22.994285714285713, + "grad_norm": 66.47919464111328, + "learning_rate": 3.000634920634921e-05, + "loss": 0.3277, + "step": 4024 + }, + { + "epoch": 23.0, + "grad_norm": 62.8366813659668, + "learning_rate": 3e-05, + "loss": 0.4013, + "step": 4025 + }, + { + "epoch": 23.0, + "eval_classes": 0, + "eval_loss": 0.6572927832603455, + "eval_map": 0.9134, + "eval_map_50": 0.9638, + "eval_map_75": 0.946, + "eval_map_large": 0.9134, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9134, + "eval_map_small": -1.0, + "eval_mar_1": 0.7857, + "eval_mar_10": 0.9635, + "eval_mar_100": 0.9702, + "eval_mar_100_per_class": 0.9702, + "eval_mar_large": 0.9702, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 14.5071, + "eval_samples_per_second": 20.266, + "eval_steps_per_second": 2.55, + "step": 4025 + }, + { + "epoch": 23.005714285714287, + "grad_norm": 30.199420928955078, + "learning_rate": 2.9993650793650797e-05, + "loss": 0.3206, + "step": 4026 + }, + { + "epoch": 23.01142857142857, + "grad_norm": 75.57009887695312, + "learning_rate": 2.9987301587301586e-05, + "loss": 0.2991, + "step": 4027 + }, + { + "epoch": 23.017142857142858, + "grad_norm": 44.21976089477539, + "learning_rate": 2.9980952380952382e-05, + "loss": 0.286, + "step": 4028 + }, + { + "epoch": 23.02285714285714, + "grad_norm": 81.70066833496094, + "learning_rate": 2.9974603174603178e-05, + "loss": 0.2852, + "step": 4029 + }, + { + "epoch": 23.02857142857143, + "grad_norm": 50.564395904541016, + "learning_rate": 2.9968253968253967e-05, + "loss": 0.5235, + "step": 4030 + }, + { + "epoch": 23.034285714285716, + "grad_norm": 29.517118453979492, + "learning_rate": 2.9961904761904763e-05, + "loss": 0.3634, + "step": 4031 + }, + { + "epoch": 23.04, + "grad_norm": 48.26054382324219, + "learning_rate": 2.995555555555556e-05, + "loss": 0.32, + "step": 4032 + }, + { + "epoch": 23.045714285714286, + "grad_norm": 54.40578079223633, + "learning_rate": 2.994920634920635e-05, + "loss": 0.3863, + "step": 4033 + }, + { + "epoch": 23.05142857142857, + "grad_norm": 85.53402709960938, + "learning_rate": 2.9942857142857145e-05, + "loss": 0.2024, + "step": 4034 + }, + { + "epoch": 23.057142857142857, + "grad_norm": 315.6854553222656, + "learning_rate": 2.993650793650794e-05, + "loss": 0.2238, + "step": 4035 + }, + { + "epoch": 23.062857142857144, + "grad_norm": 48.27422332763672, + "learning_rate": 2.993015873015873e-05, + "loss": 0.3596, + "step": 4036 + }, + { + "epoch": 23.068571428571428, + "grad_norm": 33.01654815673828, + "learning_rate": 2.9923809523809526e-05, + "loss": 0.3188, + "step": 4037 + }, + { + "epoch": 23.074285714285715, + "grad_norm": 40.44371032714844, + "learning_rate": 2.9917460317460315e-05, + "loss": 0.3801, + "step": 4038 + }, + { + "epoch": 23.08, + "grad_norm": 49.689483642578125, + "learning_rate": 2.991111111111111e-05, + "loss": 0.4179, + "step": 4039 + }, + { + "epoch": 23.085714285714285, + "grad_norm": 67.72065734863281, + "learning_rate": 2.9904761904761907e-05, + "loss": 0.2863, + "step": 4040 + }, + { + "epoch": 23.091428571428573, + "grad_norm": 48.59077835083008, + "learning_rate": 2.9898412698412696e-05, + "loss": 0.3233, + "step": 4041 + }, + { + "epoch": 23.097142857142856, + "grad_norm": 62.68111038208008, + "learning_rate": 2.9892063492063493e-05, + "loss": 0.2945, + "step": 4042 + }, + { + "epoch": 23.102857142857143, + "grad_norm": 48.72473907470703, + "learning_rate": 2.988571428571429e-05, + "loss": 0.2933, + "step": 4043 + }, + { + "epoch": 23.10857142857143, + "grad_norm": 180.3035125732422, + "learning_rate": 2.9879365079365078e-05, + "loss": 0.3224, + "step": 4044 + }, + { + "epoch": 23.114285714285714, + "grad_norm": 122.58573150634766, + "learning_rate": 2.9873015873015874e-05, + "loss": 0.4185, + "step": 4045 + }, + { + "epoch": 23.12, + "grad_norm": 96.61558532714844, + "learning_rate": 2.986666666666667e-05, + "loss": 0.3958, + "step": 4046 + }, + { + "epoch": 23.125714285714285, + "grad_norm": 49.80635452270508, + "learning_rate": 2.986031746031746e-05, + "loss": 0.2921, + "step": 4047 + }, + { + "epoch": 23.13142857142857, + "grad_norm": 58.02566146850586, + "learning_rate": 2.9853968253968255e-05, + "loss": 0.2497, + "step": 4048 + }, + { + "epoch": 23.13714285714286, + "grad_norm": 22.018583297729492, + "learning_rate": 2.9847619047619048e-05, + "loss": 0.3285, + "step": 4049 + }, + { + "epoch": 23.142857142857142, + "grad_norm": 19.503503799438477, + "learning_rate": 2.9841269841269844e-05, + "loss": 0.2849, + "step": 4050 + }, + { + "epoch": 23.14857142857143, + "grad_norm": 250.73367309570312, + "learning_rate": 2.9834920634920636e-05, + "loss": 0.4356, + "step": 4051 + }, + { + "epoch": 23.154285714285713, + "grad_norm": 69.83318328857422, + "learning_rate": 2.982857142857143e-05, + "loss": 0.3187, + "step": 4052 + }, + { + "epoch": 23.16, + "grad_norm": 42.90557861328125, + "learning_rate": 2.9822222222222225e-05, + "loss": 0.278, + "step": 4053 + }, + { + "epoch": 23.165714285714287, + "grad_norm": 65.743408203125, + "learning_rate": 2.9815873015873018e-05, + "loss": 0.2933, + "step": 4054 + }, + { + "epoch": 23.17142857142857, + "grad_norm": 24.971521377563477, + "learning_rate": 2.980952380952381e-05, + "loss": 0.2419, + "step": 4055 + }, + { + "epoch": 23.177142857142858, + "grad_norm": 83.3399429321289, + "learning_rate": 2.9803174603174606e-05, + "loss": 0.3094, + "step": 4056 + }, + { + "epoch": 23.18285714285714, + "grad_norm": 56.092254638671875, + "learning_rate": 2.9796825396825402e-05, + "loss": 0.2357, + "step": 4057 + }, + { + "epoch": 23.18857142857143, + "grad_norm": 65.36167907714844, + "learning_rate": 2.979047619047619e-05, + "loss": 0.2966, + "step": 4058 + }, + { + "epoch": 23.194285714285716, + "grad_norm": 77.52240753173828, + "learning_rate": 2.9784126984126988e-05, + "loss": 0.254, + "step": 4059 + }, + { + "epoch": 23.2, + "grad_norm": 39.724891662597656, + "learning_rate": 2.9777777777777777e-05, + "loss": 0.28, + "step": 4060 + }, + { + "epoch": 23.205714285714286, + "grad_norm": 89.53458404541016, + "learning_rate": 2.9771428571428573e-05, + "loss": 0.3436, + "step": 4061 + }, + { + "epoch": 23.21142857142857, + "grad_norm": 33.58918762207031, + "learning_rate": 2.976507936507937e-05, + "loss": 0.2707, + "step": 4062 + }, + { + "epoch": 23.217142857142857, + "grad_norm": 99.25462341308594, + "learning_rate": 2.9758730158730158e-05, + "loss": 0.2268, + "step": 4063 + }, + { + "epoch": 23.222857142857144, + "grad_norm": 39.316444396972656, + "learning_rate": 2.9752380952380954e-05, + "loss": 0.34, + "step": 4064 + }, + { + "epoch": 23.228571428571428, + "grad_norm": 234.89569091796875, + "learning_rate": 2.974603174603175e-05, + "loss": 0.3746, + "step": 4065 + }, + { + "epoch": 23.234285714285715, + "grad_norm": 29.803430557250977, + "learning_rate": 2.973968253968254e-05, + "loss": 0.2864, + "step": 4066 + }, + { + "epoch": 23.24, + "grad_norm": 197.21961975097656, + "learning_rate": 2.9733333333333336e-05, + "loss": 0.2753, + "step": 4067 + }, + { + "epoch": 23.245714285714286, + "grad_norm": 34.24064254760742, + "learning_rate": 2.972698412698413e-05, + "loss": 0.2701, + "step": 4068 + }, + { + "epoch": 23.251428571428573, + "grad_norm": 31.376087188720703, + "learning_rate": 2.972063492063492e-05, + "loss": 0.3328, + "step": 4069 + }, + { + "epoch": 23.257142857142856, + "grad_norm": 44.968936920166016, + "learning_rate": 2.9714285714285717e-05, + "loss": 0.252, + "step": 4070 + }, + { + "epoch": 23.262857142857143, + "grad_norm": 90.7536392211914, + "learning_rate": 2.9707936507936506e-05, + "loss": 0.2172, + "step": 4071 + }, + { + "epoch": 23.268571428571427, + "grad_norm": 76.8214340209961, + "learning_rate": 2.9701587301587302e-05, + "loss": 0.2644, + "step": 4072 + }, + { + "epoch": 23.274285714285714, + "grad_norm": 49.46097946166992, + "learning_rate": 2.9695238095238098e-05, + "loss": 0.3203, + "step": 4073 + }, + { + "epoch": 23.28, + "grad_norm": 62.46669006347656, + "learning_rate": 2.9688888888888887e-05, + "loss": 0.3691, + "step": 4074 + }, + { + "epoch": 23.285714285714285, + "grad_norm": 68.3512191772461, + "learning_rate": 2.9682539682539683e-05, + "loss": 0.2607, + "step": 4075 + }, + { + "epoch": 23.291428571428572, + "grad_norm": 29.4173641204834, + "learning_rate": 2.967619047619048e-05, + "loss": 0.2843, + "step": 4076 + }, + { + "epoch": 23.29714285714286, + "grad_norm": 542.9811401367188, + "learning_rate": 2.966984126984127e-05, + "loss": 0.2393, + "step": 4077 + }, + { + "epoch": 23.302857142857142, + "grad_norm": 47.83937072753906, + "learning_rate": 2.9663492063492065e-05, + "loss": 0.3395, + "step": 4078 + }, + { + "epoch": 23.30857142857143, + "grad_norm": 18.514507293701172, + "learning_rate": 2.965714285714286e-05, + "loss": 0.2663, + "step": 4079 + }, + { + "epoch": 23.314285714285713, + "grad_norm": 69.58287048339844, + "learning_rate": 2.965079365079365e-05, + "loss": 0.3558, + "step": 4080 + }, + { + "epoch": 23.32, + "grad_norm": 37.93904495239258, + "learning_rate": 2.9644444444444446e-05, + "loss": 0.2761, + "step": 4081 + }, + { + "epoch": 23.325714285714287, + "grad_norm": 29.72590446472168, + "learning_rate": 2.963809523809524e-05, + "loss": 0.284, + "step": 4082 + }, + { + "epoch": 23.33142857142857, + "grad_norm": 90.10633087158203, + "learning_rate": 2.963174603174603e-05, + "loss": 0.3729, + "step": 4083 + }, + { + "epoch": 23.337142857142858, + "grad_norm": 32.455440521240234, + "learning_rate": 2.9625396825396827e-05, + "loss": 0.3237, + "step": 4084 + }, + { + "epoch": 23.34285714285714, + "grad_norm": 29.6805477142334, + "learning_rate": 2.961904761904762e-05, + "loss": 0.2234, + "step": 4085 + }, + { + "epoch": 23.34857142857143, + "grad_norm": 56.32411193847656, + "learning_rate": 2.9612698412698413e-05, + "loss": 0.3353, + "step": 4086 + }, + { + "epoch": 23.354285714285716, + "grad_norm": 50.53952407836914, + "learning_rate": 2.960634920634921e-05, + "loss": 0.2391, + "step": 4087 + }, + { + "epoch": 23.36, + "grad_norm": 33.48997116088867, + "learning_rate": 2.96e-05, + "loss": 0.2906, + "step": 4088 + }, + { + "epoch": 23.365714285714287, + "grad_norm": 37.64202117919922, + "learning_rate": 2.9593650793650797e-05, + "loss": 0.3691, + "step": 4089 + }, + { + "epoch": 23.37142857142857, + "grad_norm": 56.41489028930664, + "learning_rate": 2.958730158730159e-05, + "loss": 0.2553, + "step": 4090 + }, + { + "epoch": 23.377142857142857, + "grad_norm": 34.01242446899414, + "learning_rate": 2.9580952380952382e-05, + "loss": 0.3105, + "step": 4091 + }, + { + "epoch": 23.382857142857144, + "grad_norm": 282.2917785644531, + "learning_rate": 2.957460317460318e-05, + "loss": 0.3843, + "step": 4092 + }, + { + "epoch": 23.388571428571428, + "grad_norm": 82.39885711669922, + "learning_rate": 2.9568253968253968e-05, + "loss": 0.2232, + "step": 4093 + }, + { + "epoch": 23.394285714285715, + "grad_norm": 37.49563980102539, + "learning_rate": 2.9561904761904764e-05, + "loss": 0.274, + "step": 4094 + }, + { + "epoch": 23.4, + "grad_norm": 34.51573944091797, + "learning_rate": 2.955555555555556e-05, + "loss": 0.3468, + "step": 4095 + }, + { + "epoch": 23.405714285714286, + "grad_norm": 54.154293060302734, + "learning_rate": 2.954920634920635e-05, + "loss": 0.3724, + "step": 4096 + }, + { + "epoch": 23.411428571428573, + "grad_norm": 57.695987701416016, + "learning_rate": 2.9542857142857145e-05, + "loss": 0.3271, + "step": 4097 + }, + { + "epoch": 23.417142857142856, + "grad_norm": 68.31269836425781, + "learning_rate": 2.953650793650794e-05, + "loss": 0.2621, + "step": 4098 + }, + { + "epoch": 23.422857142857143, + "grad_norm": 103.53577423095703, + "learning_rate": 2.953015873015873e-05, + "loss": 0.3015, + "step": 4099 + }, + { + "epoch": 23.428571428571427, + "grad_norm": 453.0390930175781, + "learning_rate": 2.9523809523809526e-05, + "loss": 0.4082, + "step": 4100 + }, + { + "epoch": 23.434285714285714, + "grad_norm": 32.948158264160156, + "learning_rate": 2.9517460317460322e-05, + "loss": 0.3961, + "step": 4101 + }, + { + "epoch": 23.44, + "grad_norm": 54.90577697753906, + "learning_rate": 2.951111111111111e-05, + "loss": 0.1926, + "step": 4102 + }, + { + "epoch": 23.445714285714285, + "grad_norm": 66.09902954101562, + "learning_rate": 2.9504761904761908e-05, + "loss": 0.5602, + "step": 4103 + }, + { + "epoch": 23.451428571428572, + "grad_norm": 40.43222427368164, + "learning_rate": 2.9498412698412697e-05, + "loss": 0.1848, + "step": 4104 + }, + { + "epoch": 23.457142857142856, + "grad_norm": 35.24291229248047, + "learning_rate": 2.9492063492063493e-05, + "loss": 0.2214, + "step": 4105 + }, + { + "epoch": 23.462857142857143, + "grad_norm": 54.03268814086914, + "learning_rate": 2.948571428571429e-05, + "loss": 0.2811, + "step": 4106 + }, + { + "epoch": 23.46857142857143, + "grad_norm": 27.69357681274414, + "learning_rate": 2.9479365079365078e-05, + "loss": 0.3433, + "step": 4107 + }, + { + "epoch": 23.474285714285713, + "grad_norm": 116.31719970703125, + "learning_rate": 2.9473015873015874e-05, + "loss": 0.3817, + "step": 4108 + }, + { + "epoch": 23.48, + "grad_norm": 106.09890747070312, + "learning_rate": 2.946666666666667e-05, + "loss": 0.391, + "step": 4109 + }, + { + "epoch": 23.485714285714284, + "grad_norm": 30.089054107666016, + "learning_rate": 2.946031746031746e-05, + "loss": 0.3046, + "step": 4110 + }, + { + "epoch": 23.49142857142857, + "grad_norm": 73.0106201171875, + "learning_rate": 2.9453968253968256e-05, + "loss": 0.2402, + "step": 4111 + }, + { + "epoch": 23.497142857142858, + "grad_norm": 45.55817794799805, + "learning_rate": 2.944761904761905e-05, + "loss": 0.1807, + "step": 4112 + }, + { + "epoch": 23.502857142857142, + "grad_norm": 45.446956634521484, + "learning_rate": 2.944126984126984e-05, + "loss": 0.2182, + "step": 4113 + }, + { + "epoch": 23.50857142857143, + "grad_norm": 150.46876525878906, + "learning_rate": 2.9434920634920637e-05, + "loss": 0.328, + "step": 4114 + }, + { + "epoch": 23.514285714285712, + "grad_norm": 82.34252166748047, + "learning_rate": 2.9428571428571426e-05, + "loss": 0.2811, + "step": 4115 + }, + { + "epoch": 23.52, + "grad_norm": 36.25138473510742, + "learning_rate": 2.9422222222222222e-05, + "loss": 0.2981, + "step": 4116 + }, + { + "epoch": 23.525714285714287, + "grad_norm": 26.288358688354492, + "learning_rate": 2.9415873015873018e-05, + "loss": 0.3378, + "step": 4117 + }, + { + "epoch": 23.53142857142857, + "grad_norm": 83.09166717529297, + "learning_rate": 2.9409523809523807e-05, + "loss": 0.215, + "step": 4118 + }, + { + "epoch": 23.537142857142857, + "grad_norm": 35.448184967041016, + "learning_rate": 2.9403174603174603e-05, + "loss": 0.3778, + "step": 4119 + }, + { + "epoch": 23.542857142857144, + "grad_norm": 41.17313003540039, + "learning_rate": 2.93968253968254e-05, + "loss": 0.3844, + "step": 4120 + }, + { + "epoch": 23.548571428571428, + "grad_norm": 35.156951904296875, + "learning_rate": 2.9390476190476192e-05, + "loss": 0.3097, + "step": 4121 + }, + { + "epoch": 23.554285714285715, + "grad_norm": 39.88846969604492, + "learning_rate": 2.9384126984126985e-05, + "loss": 0.2651, + "step": 4122 + }, + { + "epoch": 23.56, + "grad_norm": 50.63812255859375, + "learning_rate": 2.937777777777778e-05, + "loss": 0.3026, + "step": 4123 + }, + { + "epoch": 23.565714285714286, + "grad_norm": 41.88031005859375, + "learning_rate": 2.9371428571428573e-05, + "loss": 0.243, + "step": 4124 + }, + { + "epoch": 23.571428571428573, + "grad_norm": 56.02881622314453, + "learning_rate": 2.9365079365079366e-05, + "loss": 0.28, + "step": 4125 + }, + { + "epoch": 23.577142857142857, + "grad_norm": 32.54145812988281, + "learning_rate": 2.935873015873016e-05, + "loss": 0.5015, + "step": 4126 + }, + { + "epoch": 23.582857142857144, + "grad_norm": 29.771968841552734, + "learning_rate": 2.9352380952380955e-05, + "loss": 0.2976, + "step": 4127 + }, + { + "epoch": 23.588571428571427, + "grad_norm": 62.9817008972168, + "learning_rate": 2.934603174603175e-05, + "loss": 0.3482, + "step": 4128 + }, + { + "epoch": 23.594285714285714, + "grad_norm": 78.29792022705078, + "learning_rate": 2.933968253968254e-05, + "loss": 0.2816, + "step": 4129 + }, + { + "epoch": 23.6, + "grad_norm": 68.27775573730469, + "learning_rate": 2.9333333333333336e-05, + "loss": 0.4067, + "step": 4130 + }, + { + "epoch": 23.605714285714285, + "grad_norm": 61.55348587036133, + "learning_rate": 2.9326984126984132e-05, + "loss": 0.2233, + "step": 4131 + }, + { + "epoch": 23.611428571428572, + "grad_norm": 52.80277633666992, + "learning_rate": 2.932063492063492e-05, + "loss": 0.3024, + "step": 4132 + }, + { + "epoch": 23.617142857142856, + "grad_norm": 58.86653137207031, + "learning_rate": 2.9314285714285717e-05, + "loss": 0.274, + "step": 4133 + }, + { + "epoch": 23.622857142857143, + "grad_norm": 74.1474838256836, + "learning_rate": 2.9307936507936513e-05, + "loss": 0.3088, + "step": 4134 + }, + { + "epoch": 23.62857142857143, + "grad_norm": 61.79986572265625, + "learning_rate": 2.9301587301587303e-05, + "loss": 0.3552, + "step": 4135 + }, + { + "epoch": 23.634285714285713, + "grad_norm": 74.90558624267578, + "learning_rate": 2.92952380952381e-05, + "loss": 0.3745, + "step": 4136 + }, + { + "epoch": 23.64, + "grad_norm": 46.57097244262695, + "learning_rate": 2.9288888888888888e-05, + "loss": 0.3227, + "step": 4137 + }, + { + "epoch": 23.645714285714284, + "grad_norm": 17.17082977294922, + "learning_rate": 2.9282539682539684e-05, + "loss": 0.2491, + "step": 4138 + }, + { + "epoch": 23.65142857142857, + "grad_norm": 49.79542541503906, + "learning_rate": 2.927619047619048e-05, + "loss": 0.3576, + "step": 4139 + }, + { + "epoch": 23.65714285714286, + "grad_norm": 26.572895050048828, + "learning_rate": 2.926984126984127e-05, + "loss": 0.2167, + "step": 4140 + }, + { + "epoch": 23.662857142857142, + "grad_norm": 252.93934631347656, + "learning_rate": 2.9263492063492065e-05, + "loss": 0.4041, + "step": 4141 + }, + { + "epoch": 23.66857142857143, + "grad_norm": 50.79920959472656, + "learning_rate": 2.925714285714286e-05, + "loss": 0.3337, + "step": 4142 + }, + { + "epoch": 23.674285714285713, + "grad_norm": 40.481346130371094, + "learning_rate": 2.925079365079365e-05, + "loss": 0.2801, + "step": 4143 + }, + { + "epoch": 23.68, + "grad_norm": 65.705078125, + "learning_rate": 2.9244444444444446e-05, + "loss": 0.3622, + "step": 4144 + }, + { + "epoch": 23.685714285714287, + "grad_norm": 92.2256088256836, + "learning_rate": 2.9238095238095242e-05, + "loss": 0.3038, + "step": 4145 + }, + { + "epoch": 23.69142857142857, + "grad_norm": 556.6972045898438, + "learning_rate": 2.923174603174603e-05, + "loss": 0.5332, + "step": 4146 + }, + { + "epoch": 23.697142857142858, + "grad_norm": 70.96257019042969, + "learning_rate": 2.9225396825396828e-05, + "loss": 0.4394, + "step": 4147 + }, + { + "epoch": 23.70285714285714, + "grad_norm": 86.98827362060547, + "learning_rate": 2.9219047619047617e-05, + "loss": 0.3198, + "step": 4148 + }, + { + "epoch": 23.708571428571428, + "grad_norm": 40.2372932434082, + "learning_rate": 2.9212698412698413e-05, + "loss": 0.2527, + "step": 4149 + }, + { + "epoch": 23.714285714285715, + "grad_norm": 36.18385314941406, + "learning_rate": 2.920634920634921e-05, + "loss": 0.235, + "step": 4150 + }, + { + "epoch": 23.72, + "grad_norm": 46.50616455078125, + "learning_rate": 2.9199999999999998e-05, + "loss": 0.324, + "step": 4151 + }, + { + "epoch": 23.725714285714286, + "grad_norm": 32.6524772644043, + "learning_rate": 2.9193650793650794e-05, + "loss": 0.275, + "step": 4152 + }, + { + "epoch": 23.731428571428573, + "grad_norm": 52.58832931518555, + "learning_rate": 2.918730158730159e-05, + "loss": 0.4361, + "step": 4153 + }, + { + "epoch": 23.737142857142857, + "grad_norm": 54.013526916503906, + "learning_rate": 2.918095238095238e-05, + "loss": 0.331, + "step": 4154 + }, + { + "epoch": 23.742857142857144, + "grad_norm": 96.2431411743164, + "learning_rate": 2.9174603174603176e-05, + "loss": 0.4995, + "step": 4155 + }, + { + "epoch": 23.748571428571427, + "grad_norm": 181.16592407226562, + "learning_rate": 2.916825396825397e-05, + "loss": 0.4459, + "step": 4156 + }, + { + "epoch": 23.754285714285714, + "grad_norm": 40.125282287597656, + "learning_rate": 2.916190476190476e-05, + "loss": 0.3106, + "step": 4157 + }, + { + "epoch": 23.76, + "grad_norm": 34.627464294433594, + "learning_rate": 2.9155555555555557e-05, + "loss": 0.457, + "step": 4158 + }, + { + "epoch": 23.765714285714285, + "grad_norm": 222.72634887695312, + "learning_rate": 2.914920634920635e-05, + "loss": 0.3446, + "step": 4159 + }, + { + "epoch": 23.771428571428572, + "grad_norm": 47.79024887084961, + "learning_rate": 2.9142857142857146e-05, + "loss": 0.4671, + "step": 4160 + }, + { + "epoch": 23.777142857142856, + "grad_norm": 29.11241912841797, + "learning_rate": 2.9136507936507938e-05, + "loss": 0.2407, + "step": 4161 + }, + { + "epoch": 23.782857142857143, + "grad_norm": 301.6238098144531, + "learning_rate": 2.913015873015873e-05, + "loss": 0.2996, + "step": 4162 + }, + { + "epoch": 23.78857142857143, + "grad_norm": 58.332515716552734, + "learning_rate": 2.9123809523809527e-05, + "loss": 0.3433, + "step": 4163 + }, + { + "epoch": 23.794285714285714, + "grad_norm": 27.199840545654297, + "learning_rate": 2.911746031746032e-05, + "loss": 0.2256, + "step": 4164 + }, + { + "epoch": 23.8, + "grad_norm": 72.84584045410156, + "learning_rate": 2.9111111111111112e-05, + "loss": 0.3094, + "step": 4165 + }, + { + "epoch": 23.805714285714284, + "grad_norm": 49.55764389038086, + "learning_rate": 2.9104761904761908e-05, + "loss": 0.3032, + "step": 4166 + }, + { + "epoch": 23.81142857142857, + "grad_norm": 43.26686477661133, + "learning_rate": 2.9098412698412704e-05, + "loss": 0.5507, + "step": 4167 + }, + { + "epoch": 23.81714285714286, + "grad_norm": 53.073814392089844, + "learning_rate": 2.9092063492063493e-05, + "loss": 0.1837, + "step": 4168 + }, + { + "epoch": 23.822857142857142, + "grad_norm": 36.02912139892578, + "learning_rate": 2.908571428571429e-05, + "loss": 0.2756, + "step": 4169 + }, + { + "epoch": 23.82857142857143, + "grad_norm": 61.297542572021484, + "learning_rate": 2.907936507936508e-05, + "loss": 0.2412, + "step": 4170 + }, + { + "epoch": 23.834285714285713, + "grad_norm": 40.57884216308594, + "learning_rate": 2.9073015873015875e-05, + "loss": 0.3359, + "step": 4171 + }, + { + "epoch": 23.84, + "grad_norm": 39.376338958740234, + "learning_rate": 2.906666666666667e-05, + "loss": 0.2095, + "step": 4172 + }, + { + "epoch": 23.845714285714287, + "grad_norm": 42.68909454345703, + "learning_rate": 2.906031746031746e-05, + "loss": 0.3273, + "step": 4173 + }, + { + "epoch": 23.85142857142857, + "grad_norm": 30.186763763427734, + "learning_rate": 2.9053968253968256e-05, + "loss": 0.2665, + "step": 4174 + }, + { + "epoch": 23.857142857142858, + "grad_norm": 33.17063522338867, + "learning_rate": 2.9047619047619052e-05, + "loss": 0.2003, + "step": 4175 + }, + { + "epoch": 23.86285714285714, + "grad_norm": 36.41645050048828, + "learning_rate": 2.904126984126984e-05, + "loss": 0.2562, + "step": 4176 + }, + { + "epoch": 23.86857142857143, + "grad_norm": 366.9606628417969, + "learning_rate": 2.9034920634920637e-05, + "loss": 0.4117, + "step": 4177 + }, + { + "epoch": 23.874285714285715, + "grad_norm": 58.57237243652344, + "learning_rate": 2.9028571428571427e-05, + "loss": 0.3746, + "step": 4178 + }, + { + "epoch": 23.88, + "grad_norm": 51.41328811645508, + "learning_rate": 2.9022222222222223e-05, + "loss": 0.4567, + "step": 4179 + }, + { + "epoch": 23.885714285714286, + "grad_norm": 43.09410858154297, + "learning_rate": 2.901587301587302e-05, + "loss": 0.202, + "step": 4180 + }, + { + "epoch": 23.89142857142857, + "grad_norm": 122.54499053955078, + "learning_rate": 2.9009523809523808e-05, + "loss": 0.3366, + "step": 4181 + }, + { + "epoch": 23.897142857142857, + "grad_norm": 58.41392517089844, + "learning_rate": 2.9003174603174604e-05, + "loss": 0.2232, + "step": 4182 + }, + { + "epoch": 23.902857142857144, + "grad_norm": 61.96669387817383, + "learning_rate": 2.89968253968254e-05, + "loss": 0.325, + "step": 4183 + }, + { + "epoch": 23.908571428571427, + "grad_norm": 50.98522186279297, + "learning_rate": 2.899047619047619e-05, + "loss": 0.255, + "step": 4184 + }, + { + "epoch": 23.914285714285715, + "grad_norm": 83.81291961669922, + "learning_rate": 2.8984126984126985e-05, + "loss": 0.3386, + "step": 4185 + }, + { + "epoch": 23.92, + "grad_norm": 13.118070602416992, + "learning_rate": 2.897777777777778e-05, + "loss": 0.4992, + "step": 4186 + }, + { + "epoch": 23.925714285714285, + "grad_norm": 237.41595458984375, + "learning_rate": 2.897142857142857e-05, + "loss": 0.3331, + "step": 4187 + }, + { + "epoch": 23.931428571428572, + "grad_norm": 66.728759765625, + "learning_rate": 2.8965079365079366e-05, + "loss": 0.2556, + "step": 4188 + }, + { + "epoch": 23.937142857142856, + "grad_norm": 61.510650634765625, + "learning_rate": 2.8958730158730156e-05, + "loss": 0.3102, + "step": 4189 + }, + { + "epoch": 23.942857142857143, + "grad_norm": 38.73003387451172, + "learning_rate": 2.8952380952380952e-05, + "loss": 0.2483, + "step": 4190 + }, + { + "epoch": 23.94857142857143, + "grad_norm": 59.62195587158203, + "learning_rate": 2.8946031746031748e-05, + "loss": 0.4213, + "step": 4191 + }, + { + "epoch": 23.954285714285714, + "grad_norm": 42.30493927001953, + "learning_rate": 2.893968253968254e-05, + "loss": 0.2779, + "step": 4192 + }, + { + "epoch": 23.96, + "grad_norm": 94.05848693847656, + "learning_rate": 2.8933333333333333e-05, + "loss": 0.3609, + "step": 4193 + }, + { + "epoch": 23.965714285714284, + "grad_norm": 28.656415939331055, + "learning_rate": 2.892698412698413e-05, + "loss": 0.3449, + "step": 4194 + }, + { + "epoch": 23.97142857142857, + "grad_norm": 41.065467834472656, + "learning_rate": 2.892063492063492e-05, + "loss": 0.2239, + "step": 4195 + }, + { + "epoch": 23.97714285714286, + "grad_norm": 50.81985855102539, + "learning_rate": 2.8914285714285714e-05, + "loss": 0.3053, + "step": 4196 + }, + { + "epoch": 23.982857142857142, + "grad_norm": 91.52791595458984, + "learning_rate": 2.890793650793651e-05, + "loss": 0.3412, + "step": 4197 + }, + { + "epoch": 23.98857142857143, + "grad_norm": 142.12220764160156, + "learning_rate": 2.8901587301587303e-05, + "loss": 0.4178, + "step": 4198 + }, + { + "epoch": 23.994285714285713, + "grad_norm": 54.66435623168945, + "learning_rate": 2.88952380952381e-05, + "loss": 0.4585, + "step": 4199 + }, + { + "epoch": 24.0, + "grad_norm": 37.650115966796875, + "learning_rate": 2.8888888888888888e-05, + "loss": 0.381, + "step": 4200 + }, + { + "epoch": 24.0, + "eval_classes": 0, + "eval_loss": 0.6580873727798462, + "eval_map": 0.9096, + "eval_map_50": 0.9685, + "eval_map_75": 0.9554, + "eval_map_large": 0.9099, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9096, + "eval_map_small": -1.0, + "eval_mar_1": 0.7806, + "eval_mar_10": 0.9578, + "eval_mar_100": 0.9641, + "eval_mar_100_per_class": 0.9641, + "eval_mar_large": 0.9641, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.4963, + "eval_samples_per_second": 21.784, + "eval_steps_per_second": 2.741, + "step": 4200 + }, + { + "epoch": 24.005714285714287, + "grad_norm": 27.211124420166016, + "learning_rate": 2.8882539682539684e-05, + "loss": 0.3187, + "step": 4201 + }, + { + "epoch": 24.01142857142857, + "grad_norm": 55.33848571777344, + "learning_rate": 2.887619047619048e-05, + "loss": 0.4081, + "step": 4202 + }, + { + "epoch": 24.017142857142858, + "grad_norm": 69.4387435913086, + "learning_rate": 2.886984126984127e-05, + "loss": 0.4005, + "step": 4203 + }, + { + "epoch": 24.02285714285714, + "grad_norm": 166.2154541015625, + "learning_rate": 2.8863492063492066e-05, + "loss": 0.3173, + "step": 4204 + }, + { + "epoch": 24.02857142857143, + "grad_norm": 25.793365478515625, + "learning_rate": 2.885714285714286e-05, + "loss": 0.4004, + "step": 4205 + }, + { + "epoch": 24.034285714285716, + "grad_norm": 22.807405471801758, + "learning_rate": 2.885079365079365e-05, + "loss": 0.2878, + "step": 4206 + }, + { + "epoch": 24.04, + "grad_norm": 27.175668716430664, + "learning_rate": 2.8844444444444447e-05, + "loss": 0.4303, + "step": 4207 + }, + { + "epoch": 24.045714285714286, + "grad_norm": 404.7174072265625, + "learning_rate": 2.8838095238095243e-05, + "loss": 0.364, + "step": 4208 + }, + { + "epoch": 24.05142857142857, + "grad_norm": 34.13227081298828, + "learning_rate": 2.8831746031746032e-05, + "loss": 0.5157, + "step": 4209 + }, + { + "epoch": 24.057142857142857, + "grad_norm": 48.720848083496094, + "learning_rate": 2.8825396825396828e-05, + "loss": 0.3891, + "step": 4210 + }, + { + "epoch": 24.062857142857144, + "grad_norm": 57.76936340332031, + "learning_rate": 2.8819047619047617e-05, + "loss": 0.408, + "step": 4211 + }, + { + "epoch": 24.068571428571428, + "grad_norm": 89.69650268554688, + "learning_rate": 2.8812698412698413e-05, + "loss": 0.377, + "step": 4212 + }, + { + "epoch": 24.074285714285715, + "grad_norm": 56.81898498535156, + "learning_rate": 2.880634920634921e-05, + "loss": 0.3412, + "step": 4213 + }, + { + "epoch": 24.08, + "grad_norm": 23.175161361694336, + "learning_rate": 2.88e-05, + "loss": 0.3675, + "step": 4214 + }, + { + "epoch": 24.085714285714285, + "grad_norm": 55.028072357177734, + "learning_rate": 2.8793650793650795e-05, + "loss": 0.3423, + "step": 4215 + }, + { + "epoch": 24.091428571428573, + "grad_norm": 21.8044376373291, + "learning_rate": 2.878730158730159e-05, + "loss": 0.3916, + "step": 4216 + }, + { + "epoch": 24.097142857142856, + "grad_norm": 32.57919692993164, + "learning_rate": 2.878095238095238e-05, + "loss": 0.3125, + "step": 4217 + }, + { + "epoch": 24.102857142857143, + "grad_norm": 58.032657623291016, + "learning_rate": 2.8774603174603176e-05, + "loss": 0.2729, + "step": 4218 + }, + { + "epoch": 24.10857142857143, + "grad_norm": 138.78158569335938, + "learning_rate": 2.8768253968253972e-05, + "loss": 0.2627, + "step": 4219 + }, + { + "epoch": 24.114285714285714, + "grad_norm": 61.15944290161133, + "learning_rate": 2.876190476190476e-05, + "loss": 0.3373, + "step": 4220 + }, + { + "epoch": 24.12, + "grad_norm": 32.430580139160156, + "learning_rate": 2.8755555555555557e-05, + "loss": 0.3092, + "step": 4221 + }, + { + "epoch": 24.125714285714285, + "grad_norm": 86.44486236572266, + "learning_rate": 2.8749206349206347e-05, + "loss": 0.2722, + "step": 4222 + }, + { + "epoch": 24.13142857142857, + "grad_norm": 20.431638717651367, + "learning_rate": 2.8742857142857143e-05, + "loss": 0.2748, + "step": 4223 + }, + { + "epoch": 24.13714285714286, + "grad_norm": 194.9654998779297, + "learning_rate": 2.873650793650794e-05, + "loss": 0.3542, + "step": 4224 + }, + { + "epoch": 24.142857142857142, + "grad_norm": 189.65545654296875, + "learning_rate": 2.8730158730158728e-05, + "loss": 0.2563, + "step": 4225 + }, + { + "epoch": 24.14857142857143, + "grad_norm": 53.960079193115234, + "learning_rate": 2.8723809523809524e-05, + "loss": 0.3252, + "step": 4226 + }, + { + "epoch": 24.154285714285713, + "grad_norm": 25.061969757080078, + "learning_rate": 2.871746031746032e-05, + "loss": 0.3335, + "step": 4227 + }, + { + "epoch": 24.16, + "grad_norm": 113.84751892089844, + "learning_rate": 2.8711111111111113e-05, + "loss": 0.3427, + "step": 4228 + }, + { + "epoch": 24.165714285714287, + "grad_norm": 22.992420196533203, + "learning_rate": 2.8704761904761905e-05, + "loss": 0.4027, + "step": 4229 + }, + { + "epoch": 24.17142857142857, + "grad_norm": 33.864959716796875, + "learning_rate": 2.86984126984127e-05, + "loss": 0.36, + "step": 4230 + }, + { + "epoch": 24.177142857142858, + "grad_norm": 208.66458129882812, + "learning_rate": 2.8692063492063494e-05, + "loss": 0.413, + "step": 4231 + }, + { + "epoch": 24.18285714285714, + "grad_norm": 47.48231887817383, + "learning_rate": 2.8685714285714286e-05, + "loss": 0.2793, + "step": 4232 + }, + { + "epoch": 24.18857142857143, + "grad_norm": 62.76450729370117, + "learning_rate": 2.867936507936508e-05, + "loss": 0.1819, + "step": 4233 + }, + { + "epoch": 24.194285714285716, + "grad_norm": 55.089698791503906, + "learning_rate": 2.8673015873015875e-05, + "loss": 0.2865, + "step": 4234 + }, + { + "epoch": 24.2, + "grad_norm": 143.07089233398438, + "learning_rate": 2.8666666666666668e-05, + "loss": 0.3126, + "step": 4235 + }, + { + "epoch": 24.205714285714286, + "grad_norm": 45.78302764892578, + "learning_rate": 2.866031746031746e-05, + "loss": 0.2982, + "step": 4236 + }, + { + "epoch": 24.21142857142857, + "grad_norm": 104.21949005126953, + "learning_rate": 2.8653968253968256e-05, + "loss": 0.4371, + "step": 4237 + }, + { + "epoch": 24.217142857142857, + "grad_norm": 41.76313018798828, + "learning_rate": 2.8647619047619052e-05, + "loss": 0.2361, + "step": 4238 + }, + { + "epoch": 24.222857142857144, + "grad_norm": 37.7957649230957, + "learning_rate": 2.8641269841269842e-05, + "loss": 0.3493, + "step": 4239 + }, + { + "epoch": 24.228571428571428, + "grad_norm": 61.35261917114258, + "learning_rate": 2.8634920634920638e-05, + "loss": 0.3329, + "step": 4240 + }, + { + "epoch": 24.234285714285715, + "grad_norm": 42.222930908203125, + "learning_rate": 2.8628571428571434e-05, + "loss": 0.267, + "step": 4241 + }, + { + "epoch": 24.24, + "grad_norm": 25.356447219848633, + "learning_rate": 2.8622222222222223e-05, + "loss": 0.3173, + "step": 4242 + }, + { + "epoch": 24.245714285714286, + "grad_norm": 29.31715202331543, + "learning_rate": 2.861587301587302e-05, + "loss": 0.2556, + "step": 4243 + }, + { + "epoch": 24.251428571428573, + "grad_norm": 78.12251281738281, + "learning_rate": 2.8609523809523808e-05, + "loss": 0.2284, + "step": 4244 + }, + { + "epoch": 24.257142857142856, + "grad_norm": 38.94855880737305, + "learning_rate": 2.8603174603174604e-05, + "loss": 0.2179, + "step": 4245 + }, + { + "epoch": 24.262857142857143, + "grad_norm": 28.624399185180664, + "learning_rate": 2.85968253968254e-05, + "loss": 0.4882, + "step": 4246 + }, + { + "epoch": 24.268571428571427, + "grad_norm": 60.395606994628906, + "learning_rate": 2.859047619047619e-05, + "loss": 0.2819, + "step": 4247 + }, + { + "epoch": 24.274285714285714, + "grad_norm": 42.161983489990234, + "learning_rate": 2.8584126984126986e-05, + "loss": 0.2336, + "step": 4248 + }, + { + "epoch": 24.28, + "grad_norm": 45.44225311279297, + "learning_rate": 2.857777777777778e-05, + "loss": 0.3845, + "step": 4249 + }, + { + "epoch": 24.285714285714285, + "grad_norm": 54.161983489990234, + "learning_rate": 2.857142857142857e-05, + "loss": 0.2777, + "step": 4250 + }, + { + "epoch": 24.291428571428572, + "grad_norm": 213.60040283203125, + "learning_rate": 2.8565079365079367e-05, + "loss": 0.4929, + "step": 4251 + }, + { + "epoch": 24.29714285714286, + "grad_norm": 61.110660552978516, + "learning_rate": 2.8558730158730163e-05, + "loss": 0.3036, + "step": 4252 + }, + { + "epoch": 24.302857142857142, + "grad_norm": 38.8966064453125, + "learning_rate": 2.8552380952380952e-05, + "loss": 0.3094, + "step": 4253 + }, + { + "epoch": 24.30857142857143, + "grad_norm": 62.35374450683594, + "learning_rate": 2.8546031746031748e-05, + "loss": 0.3099, + "step": 4254 + }, + { + "epoch": 24.314285714285713, + "grad_norm": 96.93309020996094, + "learning_rate": 2.8539682539682537e-05, + "loss": 0.3475, + "step": 4255 + }, + { + "epoch": 24.32, + "grad_norm": 37.66231918334961, + "learning_rate": 2.8533333333333333e-05, + "loss": 0.3573, + "step": 4256 + }, + { + "epoch": 24.325714285714287, + "grad_norm": 51.56706237792969, + "learning_rate": 2.852698412698413e-05, + "loss": 0.2146, + "step": 4257 + }, + { + "epoch": 24.33142857142857, + "grad_norm": 20.347219467163086, + "learning_rate": 2.852063492063492e-05, + "loss": 0.2294, + "step": 4258 + }, + { + "epoch": 24.337142857142858, + "grad_norm": 46.085289001464844, + "learning_rate": 2.8514285714285715e-05, + "loss": 0.2955, + "step": 4259 + }, + { + "epoch": 24.34285714285714, + "grad_norm": 45.59817123413086, + "learning_rate": 2.850793650793651e-05, + "loss": 0.2426, + "step": 4260 + }, + { + "epoch": 24.34857142857143, + "grad_norm": 48.673213958740234, + "learning_rate": 2.85015873015873e-05, + "loss": 0.2566, + "step": 4261 + }, + { + "epoch": 24.354285714285716, + "grad_norm": 44.20500564575195, + "learning_rate": 2.8495238095238096e-05, + "loss": 0.2162, + "step": 4262 + }, + { + "epoch": 24.36, + "grad_norm": 33.636749267578125, + "learning_rate": 2.8488888888888892e-05, + "loss": 0.4338, + "step": 4263 + }, + { + "epoch": 24.365714285714287, + "grad_norm": 57.70370101928711, + "learning_rate": 2.848253968253968e-05, + "loss": 0.3726, + "step": 4264 + }, + { + "epoch": 24.37142857142857, + "grad_norm": 34.58115768432617, + "learning_rate": 2.8476190476190477e-05, + "loss": 0.2692, + "step": 4265 + }, + { + "epoch": 24.377142857142857, + "grad_norm": 140.1950225830078, + "learning_rate": 2.846984126984127e-05, + "loss": 0.2758, + "step": 4266 + }, + { + "epoch": 24.382857142857144, + "grad_norm": 24.006532669067383, + "learning_rate": 2.8463492063492066e-05, + "loss": 0.1841, + "step": 4267 + }, + { + "epoch": 24.388571428571428, + "grad_norm": 26.133983612060547, + "learning_rate": 2.845714285714286e-05, + "loss": 0.28, + "step": 4268 + }, + { + "epoch": 24.394285714285715, + "grad_norm": 42.2238655090332, + "learning_rate": 2.845079365079365e-05, + "loss": 0.1865, + "step": 4269 + }, + { + "epoch": 24.4, + "grad_norm": 29.878080368041992, + "learning_rate": 2.8444444444444447e-05, + "loss": 0.4168, + "step": 4270 + }, + { + "epoch": 24.405714285714286, + "grad_norm": 35.7902946472168, + "learning_rate": 2.843809523809524e-05, + "loss": 0.2483, + "step": 4271 + }, + { + "epoch": 24.411428571428573, + "grad_norm": 70.40202331542969, + "learning_rate": 2.8431746031746033e-05, + "loss": 0.5446, + "step": 4272 + }, + { + "epoch": 24.417142857142856, + "grad_norm": 48.534523010253906, + "learning_rate": 2.842539682539683e-05, + "loss": 0.3008, + "step": 4273 + }, + { + "epoch": 24.422857142857143, + "grad_norm": 106.88329315185547, + "learning_rate": 2.841904761904762e-05, + "loss": 0.2192, + "step": 4274 + }, + { + "epoch": 24.428571428571427, + "grad_norm": 33.87653732299805, + "learning_rate": 2.8412698412698414e-05, + "loss": 0.2836, + "step": 4275 + }, + { + "epoch": 24.434285714285714, + "grad_norm": 70.60962677001953, + "learning_rate": 2.840634920634921e-05, + "loss": 0.3215, + "step": 4276 + }, + { + "epoch": 24.44, + "grad_norm": 69.09635162353516, + "learning_rate": 2.84e-05, + "loss": 0.2381, + "step": 4277 + }, + { + "epoch": 24.445714285714285, + "grad_norm": 28.32038116455078, + "learning_rate": 2.8393650793650795e-05, + "loss": 0.3397, + "step": 4278 + }, + { + "epoch": 24.451428571428572, + "grad_norm": 70.71125030517578, + "learning_rate": 2.838730158730159e-05, + "loss": 0.5062, + "step": 4279 + }, + { + "epoch": 24.457142857142856, + "grad_norm": 48.60917282104492, + "learning_rate": 2.838095238095238e-05, + "loss": 0.3211, + "step": 4280 + }, + { + "epoch": 24.462857142857143, + "grad_norm": 312.99127197265625, + "learning_rate": 2.8374603174603176e-05, + "loss": 0.238, + "step": 4281 + }, + { + "epoch": 24.46857142857143, + "grad_norm": 30.63831901550293, + "learning_rate": 2.8368253968253972e-05, + "loss": 0.3494, + "step": 4282 + }, + { + "epoch": 24.474285714285713, + "grad_norm": 78.44001770019531, + "learning_rate": 2.8361904761904762e-05, + "loss": 0.2799, + "step": 4283 + }, + { + "epoch": 24.48, + "grad_norm": 28.793020248413086, + "learning_rate": 2.8355555555555558e-05, + "loss": 0.2468, + "step": 4284 + }, + { + "epoch": 24.485714285714284, + "grad_norm": 122.70006561279297, + "learning_rate": 2.8349206349206354e-05, + "loss": 0.2959, + "step": 4285 + }, + { + "epoch": 24.49142857142857, + "grad_norm": 24.2781982421875, + "learning_rate": 2.8342857142857143e-05, + "loss": 0.2785, + "step": 4286 + }, + { + "epoch": 24.497142857142858, + "grad_norm": 24.19087791442871, + "learning_rate": 2.833650793650794e-05, + "loss": 0.3076, + "step": 4287 + }, + { + "epoch": 24.502857142857142, + "grad_norm": 54.276973724365234, + "learning_rate": 2.8330158730158728e-05, + "loss": 0.2332, + "step": 4288 + }, + { + "epoch": 24.50857142857143, + "grad_norm": 49.06976318359375, + "learning_rate": 2.8323809523809524e-05, + "loss": 0.2764, + "step": 4289 + }, + { + "epoch": 24.514285714285712, + "grad_norm": 79.82915496826172, + "learning_rate": 2.831746031746032e-05, + "loss": 0.2594, + "step": 4290 + }, + { + "epoch": 24.52, + "grad_norm": 65.11991882324219, + "learning_rate": 2.831111111111111e-05, + "loss": 0.2255, + "step": 4291 + }, + { + "epoch": 24.525714285714287, + "grad_norm": 53.068416595458984, + "learning_rate": 2.8304761904761906e-05, + "loss": 0.2646, + "step": 4292 + }, + { + "epoch": 24.53142857142857, + "grad_norm": 53.878868103027344, + "learning_rate": 2.82984126984127e-05, + "loss": 0.3159, + "step": 4293 + }, + { + "epoch": 24.537142857142857, + "grad_norm": 81.9829330444336, + "learning_rate": 2.829206349206349e-05, + "loss": 0.3541, + "step": 4294 + }, + { + "epoch": 24.542857142857144, + "grad_norm": 42.4346923828125, + "learning_rate": 2.8285714285714287e-05, + "loss": 0.3058, + "step": 4295 + }, + { + "epoch": 24.548571428571428, + "grad_norm": 45.353668212890625, + "learning_rate": 2.8279365079365083e-05, + "loss": 0.2678, + "step": 4296 + }, + { + "epoch": 24.554285714285715, + "grad_norm": 74.28866577148438, + "learning_rate": 2.8273015873015872e-05, + "loss": 0.2563, + "step": 4297 + }, + { + "epoch": 24.56, + "grad_norm": 27.684419631958008, + "learning_rate": 2.8266666666666668e-05, + "loss": 0.2522, + "step": 4298 + }, + { + "epoch": 24.565714285714286, + "grad_norm": 76.28163146972656, + "learning_rate": 2.826031746031746e-05, + "loss": 0.2642, + "step": 4299 + }, + { + "epoch": 24.571428571428573, + "grad_norm": 52.390769958496094, + "learning_rate": 2.8253968253968253e-05, + "loss": 0.2927, + "step": 4300 + }, + { + "epoch": 24.577142857142857, + "grad_norm": 29.362049102783203, + "learning_rate": 2.824761904761905e-05, + "loss": 0.2513, + "step": 4301 + }, + { + "epoch": 24.582857142857144, + "grad_norm": 58.900634765625, + "learning_rate": 2.8241269841269842e-05, + "loss": 0.2636, + "step": 4302 + }, + { + "epoch": 24.588571428571427, + "grad_norm": 76.04304504394531, + "learning_rate": 2.8234920634920635e-05, + "loss": 0.3211, + "step": 4303 + }, + { + "epoch": 24.594285714285714, + "grad_norm": 36.00731658935547, + "learning_rate": 2.822857142857143e-05, + "loss": 0.2781, + "step": 4304 + }, + { + "epoch": 24.6, + "grad_norm": 142.11961364746094, + "learning_rate": 2.8222222222222223e-05, + "loss": 0.3775, + "step": 4305 + }, + { + "epoch": 24.605714285714285, + "grad_norm": 47.81526565551758, + "learning_rate": 2.821587301587302e-05, + "loss": 0.3731, + "step": 4306 + }, + { + "epoch": 24.611428571428572, + "grad_norm": 35.750518798828125, + "learning_rate": 2.8209523809523812e-05, + "loss": 0.4115, + "step": 4307 + }, + { + "epoch": 24.617142857142856, + "grad_norm": 110.9820556640625, + "learning_rate": 2.8203174603174605e-05, + "loss": 0.3973, + "step": 4308 + }, + { + "epoch": 24.622857142857143, + "grad_norm": 43.487083435058594, + "learning_rate": 2.81968253968254e-05, + "loss": 0.2279, + "step": 4309 + }, + { + "epoch": 24.62857142857143, + "grad_norm": 58.84871292114258, + "learning_rate": 2.819047619047619e-05, + "loss": 0.308, + "step": 4310 + }, + { + "epoch": 24.634285714285713, + "grad_norm": 39.39466857910156, + "learning_rate": 2.8184126984126986e-05, + "loss": 0.2654, + "step": 4311 + }, + { + "epoch": 24.64, + "grad_norm": 73.448486328125, + "learning_rate": 2.8177777777777782e-05, + "loss": 0.2046, + "step": 4312 + }, + { + "epoch": 24.645714285714284, + "grad_norm": 34.662811279296875, + "learning_rate": 2.817142857142857e-05, + "loss": 0.3182, + "step": 4313 + }, + { + "epoch": 24.65142857142857, + "grad_norm": 92.18864440917969, + "learning_rate": 2.8165079365079367e-05, + "loss": 0.3164, + "step": 4314 + }, + { + "epoch": 24.65714285714286, + "grad_norm": 122.65282440185547, + "learning_rate": 2.8158730158730163e-05, + "loss": 0.3399, + "step": 4315 + }, + { + "epoch": 24.662857142857142, + "grad_norm": 73.22319793701172, + "learning_rate": 2.8152380952380953e-05, + "loss": 0.3516, + "step": 4316 + }, + { + "epoch": 24.66857142857143, + "grad_norm": 49.231956481933594, + "learning_rate": 2.814603174603175e-05, + "loss": 0.3248, + "step": 4317 + }, + { + "epoch": 24.674285714285713, + "grad_norm": 59.13899612426758, + "learning_rate": 2.8139682539682545e-05, + "loss": 0.3696, + "step": 4318 + }, + { + "epoch": 24.68, + "grad_norm": 24.286306381225586, + "learning_rate": 2.8133333333333334e-05, + "loss": 0.2992, + "step": 4319 + }, + { + "epoch": 24.685714285714287, + "grad_norm": 75.39820098876953, + "learning_rate": 2.812698412698413e-05, + "loss": 0.2236, + "step": 4320 + }, + { + "epoch": 24.69142857142857, + "grad_norm": 61.131248474121094, + "learning_rate": 2.812063492063492e-05, + "loss": 0.3016, + "step": 4321 + }, + { + "epoch": 24.697142857142858, + "grad_norm": 124.00677490234375, + "learning_rate": 2.8114285714285715e-05, + "loss": 0.4102, + "step": 4322 + }, + { + "epoch": 24.70285714285714, + "grad_norm": 96.09242248535156, + "learning_rate": 2.810793650793651e-05, + "loss": 0.2678, + "step": 4323 + }, + { + "epoch": 24.708571428571428, + "grad_norm": 26.77962303161621, + "learning_rate": 2.81015873015873e-05, + "loss": 0.294, + "step": 4324 + }, + { + "epoch": 24.714285714285715, + "grad_norm": 30.745365142822266, + "learning_rate": 2.8095238095238096e-05, + "loss": 0.2074, + "step": 4325 + }, + { + "epoch": 24.72, + "grad_norm": 63.81066131591797, + "learning_rate": 2.8088888888888893e-05, + "loss": 0.2527, + "step": 4326 + }, + { + "epoch": 24.725714285714286, + "grad_norm": 15.541707992553711, + "learning_rate": 2.8082539682539682e-05, + "loss": 0.2539, + "step": 4327 + }, + { + "epoch": 24.731428571428573, + "grad_norm": 41.85258102416992, + "learning_rate": 2.8076190476190478e-05, + "loss": 0.3006, + "step": 4328 + }, + { + "epoch": 24.737142857142857, + "grad_norm": 102.15760040283203, + "learning_rate": 2.8069841269841274e-05, + "loss": 0.2942, + "step": 4329 + }, + { + "epoch": 24.742857142857144, + "grad_norm": 28.917007446289062, + "learning_rate": 2.8063492063492063e-05, + "loss": 0.4143, + "step": 4330 + }, + { + "epoch": 24.748571428571427, + "grad_norm": 37.711669921875, + "learning_rate": 2.805714285714286e-05, + "loss": 0.2402, + "step": 4331 + }, + { + "epoch": 24.754285714285714, + "grad_norm": 120.78079223632812, + "learning_rate": 2.805079365079365e-05, + "loss": 0.2288, + "step": 4332 + }, + { + "epoch": 24.76, + "grad_norm": 25.932483673095703, + "learning_rate": 2.8044444444444444e-05, + "loss": 0.2307, + "step": 4333 + }, + { + "epoch": 24.765714285714285, + "grad_norm": 26.200965881347656, + "learning_rate": 2.803809523809524e-05, + "loss": 0.213, + "step": 4334 + }, + { + "epoch": 24.771428571428572, + "grad_norm": 58.8943977355957, + "learning_rate": 2.803174603174603e-05, + "loss": 0.475, + "step": 4335 + }, + { + "epoch": 24.777142857142856, + "grad_norm": 46.189361572265625, + "learning_rate": 2.8025396825396826e-05, + "loss": 0.2892, + "step": 4336 + }, + { + "epoch": 24.782857142857143, + "grad_norm": 26.55647087097168, + "learning_rate": 2.801904761904762e-05, + "loss": 0.3676, + "step": 4337 + }, + { + "epoch": 24.78857142857143, + "grad_norm": 30.934398651123047, + "learning_rate": 2.8012698412698414e-05, + "loss": 0.3309, + "step": 4338 + }, + { + "epoch": 24.794285714285714, + "grad_norm": 104.68560791015625, + "learning_rate": 2.8006349206349207e-05, + "loss": 0.2351, + "step": 4339 + }, + { + "epoch": 24.8, + "grad_norm": 52.946502685546875, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.2199, + "step": 4340 + }, + { + "epoch": 24.805714285714284, + "grad_norm": 59.598533630371094, + "learning_rate": 2.7993650793650796e-05, + "loss": 0.4019, + "step": 4341 + }, + { + "epoch": 24.81142857142857, + "grad_norm": 31.779327392578125, + "learning_rate": 2.7987301587301588e-05, + "loss": 0.4388, + "step": 4342 + }, + { + "epoch": 24.81714285714286, + "grad_norm": 104.10421752929688, + "learning_rate": 2.798095238095238e-05, + "loss": 0.3035, + "step": 4343 + }, + { + "epoch": 24.822857142857142, + "grad_norm": 71.49649047851562, + "learning_rate": 2.7974603174603177e-05, + "loss": 0.3221, + "step": 4344 + }, + { + "epoch": 24.82857142857143, + "grad_norm": 49.50043869018555, + "learning_rate": 2.7968253968253973e-05, + "loss": 0.1884, + "step": 4345 + }, + { + "epoch": 24.834285714285713, + "grad_norm": 31.241344451904297, + "learning_rate": 2.7961904761904762e-05, + "loss": 0.2233, + "step": 4346 + }, + { + "epoch": 24.84, + "grad_norm": 66.66368865966797, + "learning_rate": 2.7955555555555558e-05, + "loss": 0.3339, + "step": 4347 + }, + { + "epoch": 24.845714285714287, + "grad_norm": 1207.3427734375, + "learning_rate": 2.7949206349206354e-05, + "loss": 0.2911, + "step": 4348 + }, + { + "epoch": 24.85142857142857, + "grad_norm": 38.64277648925781, + "learning_rate": 2.7942857142857143e-05, + "loss": 0.2442, + "step": 4349 + }, + { + "epoch": 24.857142857142858, + "grad_norm": 26.282543182373047, + "learning_rate": 2.793650793650794e-05, + "loss": 0.2649, + "step": 4350 + }, + { + "epoch": 24.86285714285714, + "grad_norm": 28.724164962768555, + "learning_rate": 2.7930158730158736e-05, + "loss": 0.3553, + "step": 4351 + }, + { + "epoch": 24.86857142857143, + "grad_norm": 30.950960159301758, + "learning_rate": 2.7923809523809525e-05, + "loss": 0.521, + "step": 4352 + }, + { + "epoch": 24.874285714285715, + "grad_norm": 35.390445709228516, + "learning_rate": 2.791746031746032e-05, + "loss": 0.3175, + "step": 4353 + }, + { + "epoch": 24.88, + "grad_norm": 268.7099304199219, + "learning_rate": 2.791111111111111e-05, + "loss": 0.419, + "step": 4354 + }, + { + "epoch": 24.885714285714286, + "grad_norm": 71.29864501953125, + "learning_rate": 2.7904761904761906e-05, + "loss": 0.2923, + "step": 4355 + }, + { + "epoch": 24.89142857142857, + "grad_norm": 133.0418243408203, + "learning_rate": 2.7898412698412702e-05, + "loss": 0.2464, + "step": 4356 + }, + { + "epoch": 24.897142857142857, + "grad_norm": 48.5760612487793, + "learning_rate": 2.789206349206349e-05, + "loss": 0.2966, + "step": 4357 + }, + { + "epoch": 24.902857142857144, + "grad_norm": 79.38921356201172, + "learning_rate": 2.7885714285714287e-05, + "loss": 0.3203, + "step": 4358 + }, + { + "epoch": 24.908571428571427, + "grad_norm": 41.58376693725586, + "learning_rate": 2.7879365079365083e-05, + "loss": 0.4117, + "step": 4359 + }, + { + "epoch": 24.914285714285715, + "grad_norm": 31.19057273864746, + "learning_rate": 2.7873015873015873e-05, + "loss": 0.3405, + "step": 4360 + }, + { + "epoch": 24.92, + "grad_norm": 26.2551212310791, + "learning_rate": 2.786666666666667e-05, + "loss": 0.2894, + "step": 4361 + }, + { + "epoch": 24.925714285714285, + "grad_norm": 56.0374641418457, + "learning_rate": 2.7860317460317465e-05, + "loss": 0.3965, + "step": 4362 + }, + { + "epoch": 24.931428571428572, + "grad_norm": 36.64375686645508, + "learning_rate": 2.7853968253968254e-05, + "loss": 0.3491, + "step": 4363 + }, + { + "epoch": 24.937142857142856, + "grad_norm": 90.42372131347656, + "learning_rate": 2.784761904761905e-05, + "loss": 0.2957, + "step": 4364 + }, + { + "epoch": 24.942857142857143, + "grad_norm": 45.03855895996094, + "learning_rate": 2.784126984126984e-05, + "loss": 0.2678, + "step": 4365 + }, + { + "epoch": 24.94857142857143, + "grad_norm": 23.052322387695312, + "learning_rate": 2.7834920634920635e-05, + "loss": 0.2565, + "step": 4366 + }, + { + "epoch": 24.954285714285714, + "grad_norm": 160.03709411621094, + "learning_rate": 2.782857142857143e-05, + "loss": 0.2906, + "step": 4367 + }, + { + "epoch": 24.96, + "grad_norm": 35.61563491821289, + "learning_rate": 2.782222222222222e-05, + "loss": 0.2952, + "step": 4368 + }, + { + "epoch": 24.965714285714284, + "grad_norm": 39.50947189331055, + "learning_rate": 2.7815873015873017e-05, + "loss": 0.2457, + "step": 4369 + }, + { + "epoch": 24.97142857142857, + "grad_norm": 20.795259475708008, + "learning_rate": 2.7809523809523813e-05, + "loss": 0.318, + "step": 4370 + }, + { + "epoch": 24.97714285714286, + "grad_norm": 427.93792724609375, + "learning_rate": 2.7803174603174602e-05, + "loss": 0.3685, + "step": 4371 + }, + { + "epoch": 24.982857142857142, + "grad_norm": 65.03075408935547, + "learning_rate": 2.7796825396825398e-05, + "loss": 0.2777, + "step": 4372 + }, + { + "epoch": 24.98857142857143, + "grad_norm": 80.09349822998047, + "learning_rate": 2.7790476190476194e-05, + "loss": 0.2586, + "step": 4373 + }, + { + "epoch": 24.994285714285713, + "grad_norm": 52.3007698059082, + "learning_rate": 2.7784126984126983e-05, + "loss": 0.2647, + "step": 4374 + }, + { + "epoch": 25.0, + "grad_norm": 96.0640640258789, + "learning_rate": 2.777777777777778e-05, + "loss": 0.2789, + "step": 4375 + }, + { + "epoch": 25.0, + "eval_classes": 0, + "eval_loss": 0.6246998906135559, + "eval_map": 0.9312, + "eval_map_50": 0.9691, + "eval_map_75": 0.9566, + "eval_map_large": 0.9313, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9312, + "eval_map_small": -1.0, + "eval_mar_1": 0.7794, + "eval_mar_10": 0.9724, + "eval_mar_100": 0.9759, + "eval_mar_100_per_class": 0.9759, + "eval_mar_large": 0.9759, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.7404, + "eval_samples_per_second": 21.397, + "eval_steps_per_second": 2.693, + "step": 4375 + }, + { + "epoch": 25.005714285714287, + "grad_norm": 43.88606262207031, + "learning_rate": 2.7771428571428572e-05, + "loss": 0.2644, + "step": 4376 + }, + { + "epoch": 25.01142857142857, + "grad_norm": 61.545406341552734, + "learning_rate": 2.7765079365079368e-05, + "loss": 0.3157, + "step": 4377 + }, + { + "epoch": 25.017142857142858, + "grad_norm": 21.369365692138672, + "learning_rate": 2.775873015873016e-05, + "loss": 0.3259, + "step": 4378 + }, + { + "epoch": 25.02285714285714, + "grad_norm": 59.84233093261719, + "learning_rate": 2.7752380952380953e-05, + "loss": 0.2608, + "step": 4379 + }, + { + "epoch": 25.02857142857143, + "grad_norm": 51.466552734375, + "learning_rate": 2.774603174603175e-05, + "loss": 0.3375, + "step": 4380 + }, + { + "epoch": 25.034285714285716, + "grad_norm": 85.64466094970703, + "learning_rate": 2.7739682539682542e-05, + "loss": 0.2167, + "step": 4381 + }, + { + "epoch": 25.04, + "grad_norm": 22.61601448059082, + "learning_rate": 2.7733333333333334e-05, + "loss": 0.274, + "step": 4382 + }, + { + "epoch": 25.045714285714286, + "grad_norm": 39.32375717163086, + "learning_rate": 2.772698412698413e-05, + "loss": 0.2653, + "step": 4383 + }, + { + "epoch": 25.05142857142857, + "grad_norm": 62.446346282958984, + "learning_rate": 2.7720634920634926e-05, + "loss": 0.2597, + "step": 4384 + }, + { + "epoch": 25.057142857142857, + "grad_norm": 57.022151947021484, + "learning_rate": 2.7714285714285716e-05, + "loss": 0.3768, + "step": 4385 + }, + { + "epoch": 25.062857142857144, + "grad_norm": 29.207191467285156, + "learning_rate": 2.770793650793651e-05, + "loss": 0.296, + "step": 4386 + }, + { + "epoch": 25.068571428571428, + "grad_norm": 67.32698822021484, + "learning_rate": 2.77015873015873e-05, + "loss": 0.257, + "step": 4387 + }, + { + "epoch": 25.074285714285715, + "grad_norm": 73.68763732910156, + "learning_rate": 2.7695238095238097e-05, + "loss": 0.2113, + "step": 4388 + }, + { + "epoch": 25.08, + "grad_norm": 58.43614196777344, + "learning_rate": 2.7688888888888893e-05, + "loss": 0.2352, + "step": 4389 + }, + { + "epoch": 25.085714285714285, + "grad_norm": 25.05832290649414, + "learning_rate": 2.7682539682539682e-05, + "loss": 0.2948, + "step": 4390 + }, + { + "epoch": 25.091428571428573, + "grad_norm": 47.36033630371094, + "learning_rate": 2.7676190476190478e-05, + "loss": 0.3172, + "step": 4391 + }, + { + "epoch": 25.097142857142856, + "grad_norm": 32.92298126220703, + "learning_rate": 2.7669841269841274e-05, + "loss": 0.3204, + "step": 4392 + }, + { + "epoch": 25.102857142857143, + "grad_norm": 51.99428939819336, + "learning_rate": 2.7663492063492063e-05, + "loss": 0.3066, + "step": 4393 + }, + { + "epoch": 25.10857142857143, + "grad_norm": 14.884100914001465, + "learning_rate": 2.765714285714286e-05, + "loss": 0.3006, + "step": 4394 + }, + { + "epoch": 25.114285714285714, + "grad_norm": 25.125812530517578, + "learning_rate": 2.765079365079365e-05, + "loss": 0.3255, + "step": 4395 + }, + { + "epoch": 25.12, + "grad_norm": 51.343177795410156, + "learning_rate": 2.7644444444444445e-05, + "loss": 0.3462, + "step": 4396 + }, + { + "epoch": 25.125714285714285, + "grad_norm": 72.3733901977539, + "learning_rate": 2.763809523809524e-05, + "loss": 0.621, + "step": 4397 + }, + { + "epoch": 25.13142857142857, + "grad_norm": 78.37907409667969, + "learning_rate": 2.763174603174603e-05, + "loss": 0.4284, + "step": 4398 + }, + { + "epoch": 25.13714285714286, + "grad_norm": 144.72216796875, + "learning_rate": 2.7625396825396826e-05, + "loss": 0.3259, + "step": 4399 + }, + { + "epoch": 25.142857142857142, + "grad_norm": 47.657203674316406, + "learning_rate": 2.7619047619047622e-05, + "loss": 0.3515, + "step": 4400 + }, + { + "epoch": 25.14857142857143, + "grad_norm": 29.482498168945312, + "learning_rate": 2.761269841269841e-05, + "loss": 0.3151, + "step": 4401 + }, + { + "epoch": 25.154285714285713, + "grad_norm": 43.94999694824219, + "learning_rate": 2.7606349206349207e-05, + "loss": 0.2682, + "step": 4402 + }, + { + "epoch": 25.16, + "grad_norm": 43.80424880981445, + "learning_rate": 2.7600000000000003e-05, + "loss": 0.2339, + "step": 4403 + }, + { + "epoch": 25.165714285714287, + "grad_norm": 56.889461517333984, + "learning_rate": 2.7593650793650793e-05, + "loss": 0.3043, + "step": 4404 + }, + { + "epoch": 25.17142857142857, + "grad_norm": 378.54022216796875, + "learning_rate": 2.758730158730159e-05, + "loss": 0.3664, + "step": 4405 + }, + { + "epoch": 25.177142857142858, + "grad_norm": 47.72706985473633, + "learning_rate": 2.7580952380952378e-05, + "loss": 0.2012, + "step": 4406 + }, + { + "epoch": 25.18285714285714, + "grad_norm": 116.41458129882812, + "learning_rate": 2.7574603174603174e-05, + "loss": 0.3051, + "step": 4407 + }, + { + "epoch": 25.18857142857143, + "grad_norm": 29.590312957763672, + "learning_rate": 2.756825396825397e-05, + "loss": 0.2562, + "step": 4408 + }, + { + "epoch": 25.194285714285716, + "grad_norm": 36.94352722167969, + "learning_rate": 2.7561904761904763e-05, + "loss": 0.2394, + "step": 4409 + }, + { + "epoch": 25.2, + "grad_norm": 19.118684768676758, + "learning_rate": 2.7555555555555555e-05, + "loss": 0.1968, + "step": 4410 + }, + { + "epoch": 25.205714285714286, + "grad_norm": 23.596872329711914, + "learning_rate": 2.754920634920635e-05, + "loss": 0.2467, + "step": 4411 + }, + { + "epoch": 25.21142857142857, + "grad_norm": 45.40176010131836, + "learning_rate": 2.7542857142857144e-05, + "loss": 0.3012, + "step": 4412 + }, + { + "epoch": 25.217142857142857, + "grad_norm": 132.94227600097656, + "learning_rate": 2.7536507936507937e-05, + "loss": 0.2807, + "step": 4413 + }, + { + "epoch": 25.222857142857144, + "grad_norm": 34.401912689208984, + "learning_rate": 2.7530158730158733e-05, + "loss": 0.292, + "step": 4414 + }, + { + "epoch": 25.228571428571428, + "grad_norm": 24.77764129638672, + "learning_rate": 2.7523809523809525e-05, + "loss": 0.2501, + "step": 4415 + }, + { + "epoch": 25.234285714285715, + "grad_norm": 19.19468116760254, + "learning_rate": 2.751746031746032e-05, + "loss": 0.3163, + "step": 4416 + }, + { + "epoch": 25.24, + "grad_norm": 292.3793029785156, + "learning_rate": 2.751111111111111e-05, + "loss": 0.2731, + "step": 4417 + }, + { + "epoch": 25.245714285714286, + "grad_norm": 52.74235153198242, + "learning_rate": 2.7504761904761907e-05, + "loss": 0.3278, + "step": 4418 + }, + { + "epoch": 25.251428571428573, + "grad_norm": 28.179885864257812, + "learning_rate": 2.7498412698412703e-05, + "loss": 0.3716, + "step": 4419 + }, + { + "epoch": 25.257142857142856, + "grad_norm": 12.870906829833984, + "learning_rate": 2.7492063492063492e-05, + "loss": 0.307, + "step": 4420 + }, + { + "epoch": 25.262857142857143, + "grad_norm": 34.991180419921875, + "learning_rate": 2.7485714285714288e-05, + "loss": 0.2501, + "step": 4421 + }, + { + "epoch": 25.268571428571427, + "grad_norm": 25.55196189880371, + "learning_rate": 2.7479365079365084e-05, + "loss": 0.2556, + "step": 4422 + }, + { + "epoch": 25.274285714285714, + "grad_norm": 38.679012298583984, + "learning_rate": 2.7473015873015873e-05, + "loss": 0.3446, + "step": 4423 + }, + { + "epoch": 25.28, + "grad_norm": 54.33946990966797, + "learning_rate": 2.746666666666667e-05, + "loss": 0.1951, + "step": 4424 + }, + { + "epoch": 25.285714285714285, + "grad_norm": 268.55279541015625, + "learning_rate": 2.7460317460317465e-05, + "loss": 0.4006, + "step": 4425 + }, + { + "epoch": 25.291428571428572, + "grad_norm": 33.12761306762695, + "learning_rate": 2.7453968253968254e-05, + "loss": 0.3447, + "step": 4426 + }, + { + "epoch": 25.29714285714286, + "grad_norm": 35.51462936401367, + "learning_rate": 2.744761904761905e-05, + "loss": 0.1925, + "step": 4427 + }, + { + "epoch": 25.302857142857142, + "grad_norm": 352.35626220703125, + "learning_rate": 2.744126984126984e-05, + "loss": 0.3144, + "step": 4428 + }, + { + "epoch": 25.30857142857143, + "grad_norm": 28.301921844482422, + "learning_rate": 2.7434920634920636e-05, + "loss": 0.2511, + "step": 4429 + }, + { + "epoch": 25.314285714285713, + "grad_norm": 63.250953674316406, + "learning_rate": 2.742857142857143e-05, + "loss": 0.2418, + "step": 4430 + }, + { + "epoch": 25.32, + "grad_norm": 37.64375305175781, + "learning_rate": 2.742222222222222e-05, + "loss": 0.2395, + "step": 4431 + }, + { + "epoch": 25.325714285714287, + "grad_norm": 25.647315979003906, + "learning_rate": 2.7415873015873017e-05, + "loss": 0.2811, + "step": 4432 + }, + { + "epoch": 25.33142857142857, + "grad_norm": 65.68405151367188, + "learning_rate": 2.7409523809523813e-05, + "loss": 0.3045, + "step": 4433 + }, + { + "epoch": 25.337142857142858, + "grad_norm": 56.48481750488281, + "learning_rate": 2.7403174603174602e-05, + "loss": 0.2342, + "step": 4434 + }, + { + "epoch": 25.34285714285714, + "grad_norm": 56.583316802978516, + "learning_rate": 2.7396825396825398e-05, + "loss": 0.2946, + "step": 4435 + }, + { + "epoch": 25.34857142857143, + "grad_norm": 334.7755432128906, + "learning_rate": 2.7390476190476194e-05, + "loss": 0.3795, + "step": 4436 + }, + { + "epoch": 25.354285714285716, + "grad_norm": 28.795318603515625, + "learning_rate": 2.7384126984126984e-05, + "loss": 0.1844, + "step": 4437 + }, + { + "epoch": 25.36, + "grad_norm": 547.7698974609375, + "learning_rate": 2.737777777777778e-05, + "loss": 0.3293, + "step": 4438 + }, + { + "epoch": 25.365714285714287, + "grad_norm": 129.0718536376953, + "learning_rate": 2.737142857142857e-05, + "loss": 0.3313, + "step": 4439 + }, + { + "epoch": 25.37142857142857, + "grad_norm": 98.94517517089844, + "learning_rate": 2.7365079365079365e-05, + "loss": 0.3156, + "step": 4440 + }, + { + "epoch": 25.377142857142857, + "grad_norm": 43.74811935424805, + "learning_rate": 2.735873015873016e-05, + "loss": 0.3324, + "step": 4441 + }, + { + "epoch": 25.382857142857144, + "grad_norm": 68.41603088378906, + "learning_rate": 2.735238095238095e-05, + "loss": 0.3158, + "step": 4442 + }, + { + "epoch": 25.388571428571428, + "grad_norm": 37.616661071777344, + "learning_rate": 2.7346031746031746e-05, + "loss": 0.22, + "step": 4443 + }, + { + "epoch": 25.394285714285715, + "grad_norm": 51.03703689575195, + "learning_rate": 2.7339682539682542e-05, + "loss": 0.3034, + "step": 4444 + }, + { + "epoch": 25.4, + "grad_norm": 13.909053802490234, + "learning_rate": 2.733333333333333e-05, + "loss": 0.2913, + "step": 4445 + }, + { + "epoch": 25.405714285714286, + "grad_norm": 65.43724822998047, + "learning_rate": 2.7326984126984127e-05, + "loss": 0.215, + "step": 4446 + }, + { + "epoch": 25.411428571428573, + "grad_norm": 58.34597396850586, + "learning_rate": 2.7320634920634923e-05, + "loss": 0.2464, + "step": 4447 + }, + { + "epoch": 25.417142857142856, + "grad_norm": 38.75104522705078, + "learning_rate": 2.7314285714285716e-05, + "loss": 0.3302, + "step": 4448 + }, + { + "epoch": 25.422857142857143, + "grad_norm": 19.12841796875, + "learning_rate": 2.730793650793651e-05, + "loss": 0.276, + "step": 4449 + }, + { + "epoch": 25.428571428571427, + "grad_norm": 39.01633834838867, + "learning_rate": 2.73015873015873e-05, + "loss": 0.3066, + "step": 4450 + }, + { + "epoch": 25.434285714285714, + "grad_norm": 29.239784240722656, + "learning_rate": 2.7295238095238097e-05, + "loss": 0.154, + "step": 4451 + }, + { + "epoch": 25.44, + "grad_norm": 69.61752319335938, + "learning_rate": 2.728888888888889e-05, + "loss": 0.4975, + "step": 4452 + }, + { + "epoch": 25.445714285714285, + "grad_norm": 75.92781066894531, + "learning_rate": 2.7282539682539683e-05, + "loss": 0.2437, + "step": 4453 + }, + { + "epoch": 25.451428571428572, + "grad_norm": 86.27715301513672, + "learning_rate": 2.727619047619048e-05, + "loss": 0.4747, + "step": 4454 + }, + { + "epoch": 25.457142857142856, + "grad_norm": 23.397066116333008, + "learning_rate": 2.7269841269841275e-05, + "loss": 0.3549, + "step": 4455 + }, + { + "epoch": 25.462857142857143, + "grad_norm": 24.970991134643555, + "learning_rate": 2.7263492063492064e-05, + "loss": 0.2626, + "step": 4456 + }, + { + "epoch": 25.46857142857143, + "grad_norm": 37.73689651489258, + "learning_rate": 2.725714285714286e-05, + "loss": 0.2319, + "step": 4457 + }, + { + "epoch": 25.474285714285713, + "grad_norm": 46.63981628417969, + "learning_rate": 2.7250793650793656e-05, + "loss": 0.2903, + "step": 4458 + }, + { + "epoch": 25.48, + "grad_norm": 33.05022430419922, + "learning_rate": 2.7244444444444445e-05, + "loss": 0.2529, + "step": 4459 + }, + { + "epoch": 25.485714285714284, + "grad_norm": 52.30170822143555, + "learning_rate": 2.723809523809524e-05, + "loss": 0.3641, + "step": 4460 + }, + { + "epoch": 25.49142857142857, + "grad_norm": 66.10030364990234, + "learning_rate": 2.723174603174603e-05, + "loss": 0.2233, + "step": 4461 + }, + { + "epoch": 25.497142857142858, + "grad_norm": 20.06730079650879, + "learning_rate": 2.7225396825396827e-05, + "loss": 0.2077, + "step": 4462 + }, + { + "epoch": 25.502857142857142, + "grad_norm": 68.0523910522461, + "learning_rate": 2.7219047619047623e-05, + "loss": 0.2541, + "step": 4463 + }, + { + "epoch": 25.50857142857143, + "grad_norm": 46.033531188964844, + "learning_rate": 2.7212698412698412e-05, + "loss": 0.1659, + "step": 4464 + }, + { + "epoch": 25.514285714285712, + "grad_norm": 148.0507049560547, + "learning_rate": 2.7206349206349208e-05, + "loss": 0.2559, + "step": 4465 + }, + { + "epoch": 25.52, + "grad_norm": 211.72640991210938, + "learning_rate": 2.7200000000000004e-05, + "loss": 0.5573, + "step": 4466 + }, + { + "epoch": 25.525714285714287, + "grad_norm": 41.14247512817383, + "learning_rate": 2.7193650793650793e-05, + "loss": 0.234, + "step": 4467 + }, + { + "epoch": 25.53142857142857, + "grad_norm": 40.05872344970703, + "learning_rate": 2.718730158730159e-05, + "loss": 0.2458, + "step": 4468 + }, + { + "epoch": 25.537142857142857, + "grad_norm": 18.45254135131836, + "learning_rate": 2.7180952380952385e-05, + "loss": 0.2387, + "step": 4469 + }, + { + "epoch": 25.542857142857144, + "grad_norm": 75.00344848632812, + "learning_rate": 2.7174603174603174e-05, + "loss": 0.2262, + "step": 4470 + }, + { + "epoch": 25.548571428571428, + "grad_norm": 53.357627868652344, + "learning_rate": 2.716825396825397e-05, + "loss": 0.2403, + "step": 4471 + }, + { + "epoch": 25.554285714285715, + "grad_norm": 57.647972106933594, + "learning_rate": 2.716190476190476e-05, + "loss": 0.242, + "step": 4472 + }, + { + "epoch": 25.56, + "grad_norm": 54.45359420776367, + "learning_rate": 2.7155555555555556e-05, + "loss": 0.2642, + "step": 4473 + }, + { + "epoch": 25.565714285714286, + "grad_norm": 41.4904670715332, + "learning_rate": 2.7149206349206352e-05, + "loss": 0.2704, + "step": 4474 + }, + { + "epoch": 25.571428571428573, + "grad_norm": 29.26909637451172, + "learning_rate": 2.714285714285714e-05, + "loss": 0.3188, + "step": 4475 + }, + { + "epoch": 25.577142857142857, + "grad_norm": 268.87744140625, + "learning_rate": 2.7136507936507937e-05, + "loss": 0.4228, + "step": 4476 + }, + { + "epoch": 25.582857142857144, + "grad_norm": 32.49101638793945, + "learning_rate": 2.7130158730158733e-05, + "loss": 0.2181, + "step": 4477 + }, + { + "epoch": 25.588571428571427, + "grad_norm": 43.50292205810547, + "learning_rate": 2.7123809523809522e-05, + "loss": 0.2767, + "step": 4478 + }, + { + "epoch": 25.594285714285714, + "grad_norm": 41.27511215209961, + "learning_rate": 2.7117460317460318e-05, + "loss": 0.2836, + "step": 4479 + }, + { + "epoch": 25.6, + "grad_norm": 75.72777557373047, + "learning_rate": 2.7111111111111114e-05, + "loss": 0.3726, + "step": 4480 + }, + { + "epoch": 25.605714285714285, + "grad_norm": 25.272321701049805, + "learning_rate": 2.7104761904761904e-05, + "loss": 0.3072, + "step": 4481 + }, + { + "epoch": 25.611428571428572, + "grad_norm": 68.56621551513672, + "learning_rate": 2.70984126984127e-05, + "loss": 0.2443, + "step": 4482 + }, + { + "epoch": 25.617142857142856, + "grad_norm": 27.494661331176758, + "learning_rate": 2.7092063492063492e-05, + "loss": 0.2897, + "step": 4483 + }, + { + "epoch": 25.622857142857143, + "grad_norm": 66.72151184082031, + "learning_rate": 2.7085714285714285e-05, + "loss": 0.2782, + "step": 4484 + }, + { + "epoch": 25.62857142857143, + "grad_norm": 53.125858306884766, + "learning_rate": 2.707936507936508e-05, + "loss": 0.3291, + "step": 4485 + }, + { + "epoch": 25.634285714285713, + "grad_norm": 42.118446350097656, + "learning_rate": 2.7073015873015874e-05, + "loss": 0.2855, + "step": 4486 + }, + { + "epoch": 25.64, + "grad_norm": 25.07311248779297, + "learning_rate": 2.706666666666667e-05, + "loss": 0.3883, + "step": 4487 + }, + { + "epoch": 25.645714285714284, + "grad_norm": 31.401103973388672, + "learning_rate": 2.7060317460317462e-05, + "loss": 0.3002, + "step": 4488 + }, + { + "epoch": 25.65142857142857, + "grad_norm": 28.622156143188477, + "learning_rate": 2.7053968253968255e-05, + "loss": 0.3207, + "step": 4489 + }, + { + "epoch": 25.65714285714286, + "grad_norm": 23.805086135864258, + "learning_rate": 2.704761904761905e-05, + "loss": 0.208, + "step": 4490 + }, + { + "epoch": 25.662857142857142, + "grad_norm": 140.76644897460938, + "learning_rate": 2.7041269841269843e-05, + "loss": 0.2796, + "step": 4491 + }, + { + "epoch": 25.66857142857143, + "grad_norm": 104.07940673828125, + "learning_rate": 2.7034920634920636e-05, + "loss": 0.2813, + "step": 4492 + }, + { + "epoch": 25.674285714285713, + "grad_norm": 985.031982421875, + "learning_rate": 2.7028571428571432e-05, + "loss": 0.2497, + "step": 4493 + }, + { + "epoch": 25.68, + "grad_norm": 36.1199836730957, + "learning_rate": 2.702222222222222e-05, + "loss": 0.2631, + "step": 4494 + }, + { + "epoch": 25.685714285714287, + "grad_norm": 58.97662353515625, + "learning_rate": 2.7015873015873017e-05, + "loss": 0.3707, + "step": 4495 + }, + { + "epoch": 25.69142857142857, + "grad_norm": 24.511669158935547, + "learning_rate": 2.7009523809523813e-05, + "loss": 0.2219, + "step": 4496 + }, + { + "epoch": 25.697142857142858, + "grad_norm": 80.04442596435547, + "learning_rate": 2.7003174603174603e-05, + "loss": 0.3375, + "step": 4497 + }, + { + "epoch": 25.70285714285714, + "grad_norm": 74.19042205810547, + "learning_rate": 2.69968253968254e-05, + "loss": 0.2039, + "step": 4498 + }, + { + "epoch": 25.708571428571428, + "grad_norm": 124.21507263183594, + "learning_rate": 2.6990476190476195e-05, + "loss": 0.4383, + "step": 4499 + }, + { + "epoch": 25.714285714285715, + "grad_norm": 38.0180778503418, + "learning_rate": 2.6984126984126984e-05, + "loss": 0.3133, + "step": 4500 + }, + { + "epoch": 25.72, + "grad_norm": 283.60150146484375, + "learning_rate": 2.697777777777778e-05, + "loss": 0.3557, + "step": 4501 + }, + { + "epoch": 25.725714285714286, + "grad_norm": 33.72235107421875, + "learning_rate": 2.6971428571428576e-05, + "loss": 0.3229, + "step": 4502 + }, + { + "epoch": 25.731428571428573, + "grad_norm": 88.3503189086914, + "learning_rate": 2.6965079365079365e-05, + "loss": 0.2675, + "step": 4503 + }, + { + "epoch": 25.737142857142857, + "grad_norm": 35.932823181152344, + "learning_rate": 2.695873015873016e-05, + "loss": 0.2786, + "step": 4504 + }, + { + "epoch": 25.742857142857144, + "grad_norm": 46.16046142578125, + "learning_rate": 2.695238095238095e-05, + "loss": 0.304, + "step": 4505 + }, + { + "epoch": 25.748571428571427, + "grad_norm": 43.43974685668945, + "learning_rate": 2.6946031746031747e-05, + "loss": 0.2721, + "step": 4506 + }, + { + "epoch": 25.754285714285714, + "grad_norm": 25.076675415039062, + "learning_rate": 2.6939682539682543e-05, + "loss": 0.3251, + "step": 4507 + }, + { + "epoch": 25.76, + "grad_norm": 66.32283020019531, + "learning_rate": 2.6933333333333332e-05, + "loss": 0.3436, + "step": 4508 + }, + { + "epoch": 25.765714285714285, + "grad_norm": 34.4593391418457, + "learning_rate": 2.6926984126984128e-05, + "loss": 0.3827, + "step": 4509 + }, + { + "epoch": 25.771428571428572, + "grad_norm": 41.65052795410156, + "learning_rate": 2.6920634920634924e-05, + "loss": 0.2946, + "step": 4510 + }, + { + "epoch": 25.777142857142856, + "grad_norm": 59.617496490478516, + "learning_rate": 2.6914285714285713e-05, + "loss": 0.2359, + "step": 4511 + }, + { + "epoch": 25.782857142857143, + "grad_norm": 444.38128662109375, + "learning_rate": 2.690793650793651e-05, + "loss": 0.2877, + "step": 4512 + }, + { + "epoch": 25.78857142857143, + "grad_norm": 67.48945617675781, + "learning_rate": 2.6901587301587305e-05, + "loss": 0.3101, + "step": 4513 + }, + { + "epoch": 25.794285714285714, + "grad_norm": 47.88575744628906, + "learning_rate": 2.6895238095238094e-05, + "loss": 0.4117, + "step": 4514 + }, + { + "epoch": 25.8, + "grad_norm": 35.176971435546875, + "learning_rate": 2.688888888888889e-05, + "loss": 0.3138, + "step": 4515 + }, + { + "epoch": 25.805714285714284, + "grad_norm": 69.59241485595703, + "learning_rate": 2.688253968253968e-05, + "loss": 0.3087, + "step": 4516 + }, + { + "epoch": 25.81142857142857, + "grad_norm": 47.38324737548828, + "learning_rate": 2.6876190476190476e-05, + "loss": 0.3266, + "step": 4517 + }, + { + "epoch": 25.81714285714286, + "grad_norm": 66.85245513916016, + "learning_rate": 2.6869841269841272e-05, + "loss": 0.2838, + "step": 4518 + }, + { + "epoch": 25.822857142857142, + "grad_norm": 34.234954833984375, + "learning_rate": 2.6863492063492064e-05, + "loss": 0.237, + "step": 4519 + }, + { + "epoch": 25.82857142857143, + "grad_norm": 37.65780258178711, + "learning_rate": 2.6857142857142857e-05, + "loss": 0.3527, + "step": 4520 + }, + { + "epoch": 25.834285714285713, + "grad_norm": 17.731443405151367, + "learning_rate": 2.6850793650793653e-05, + "loss": 0.3085, + "step": 4521 + }, + { + "epoch": 25.84, + "grad_norm": 54.15608215332031, + "learning_rate": 2.6844444444444446e-05, + "loss": 0.2654, + "step": 4522 + }, + { + "epoch": 25.845714285714287, + "grad_norm": 30.08025550842285, + "learning_rate": 2.683809523809524e-05, + "loss": 0.2865, + "step": 4523 + }, + { + "epoch": 25.85142857142857, + "grad_norm": 105.50086212158203, + "learning_rate": 2.6831746031746034e-05, + "loss": 0.2484, + "step": 4524 + }, + { + "epoch": 25.857142857142858, + "grad_norm": 44.405601501464844, + "learning_rate": 2.6825396825396827e-05, + "loss": 0.3007, + "step": 4525 + }, + { + "epoch": 25.86285714285714, + "grad_norm": 46.13258361816406, + "learning_rate": 2.6819047619047623e-05, + "loss": 0.2646, + "step": 4526 + }, + { + "epoch": 25.86857142857143, + "grad_norm": 32.382591247558594, + "learning_rate": 2.6812698412698412e-05, + "loss": 0.3584, + "step": 4527 + }, + { + "epoch": 25.874285714285715, + "grad_norm": 30.748363494873047, + "learning_rate": 2.6806349206349208e-05, + "loss": 0.3738, + "step": 4528 + }, + { + "epoch": 25.88, + "grad_norm": 180.90086364746094, + "learning_rate": 2.6800000000000004e-05, + "loss": 0.3811, + "step": 4529 + }, + { + "epoch": 25.885714285714286, + "grad_norm": 39.43400192260742, + "learning_rate": 2.6793650793650794e-05, + "loss": 0.4279, + "step": 4530 + }, + { + "epoch": 25.89142857142857, + "grad_norm": 44.61370086669922, + "learning_rate": 2.678730158730159e-05, + "loss": 0.3784, + "step": 4531 + }, + { + "epoch": 25.897142857142857, + "grad_norm": 102.19576263427734, + "learning_rate": 2.6780952380952386e-05, + "loss": 0.2871, + "step": 4532 + }, + { + "epoch": 25.902857142857144, + "grad_norm": 127.11107635498047, + "learning_rate": 2.6774603174603175e-05, + "loss": 0.457, + "step": 4533 + }, + { + "epoch": 25.908571428571427, + "grad_norm": 81.60096740722656, + "learning_rate": 2.676825396825397e-05, + "loss": 0.3186, + "step": 4534 + }, + { + "epoch": 25.914285714285715, + "grad_norm": 721.7100830078125, + "learning_rate": 2.6761904761904767e-05, + "loss": 0.3064, + "step": 4535 + }, + { + "epoch": 25.92, + "grad_norm": 28.72801971435547, + "learning_rate": 2.6755555555555556e-05, + "loss": 0.4099, + "step": 4536 + }, + { + "epoch": 25.925714285714285, + "grad_norm": 44.58869171142578, + "learning_rate": 2.6749206349206352e-05, + "loss": 0.2749, + "step": 4537 + }, + { + "epoch": 25.931428571428572, + "grad_norm": 211.85838317871094, + "learning_rate": 2.674285714285714e-05, + "loss": 0.329, + "step": 4538 + }, + { + "epoch": 25.937142857142856, + "grad_norm": 38.062259674072266, + "learning_rate": 2.6736507936507937e-05, + "loss": 0.2823, + "step": 4539 + }, + { + "epoch": 25.942857142857143, + "grad_norm": 112.61495971679688, + "learning_rate": 2.6730158730158733e-05, + "loss": 0.2566, + "step": 4540 + }, + { + "epoch": 25.94857142857143, + "grad_norm": 40.20820999145508, + "learning_rate": 2.6723809523809523e-05, + "loss": 0.4612, + "step": 4541 + }, + { + "epoch": 25.954285714285714, + "grad_norm": 40.41714096069336, + "learning_rate": 2.671746031746032e-05, + "loss": 0.297, + "step": 4542 + }, + { + "epoch": 25.96, + "grad_norm": 27.733102798461914, + "learning_rate": 2.6711111111111115e-05, + "loss": 0.3345, + "step": 4543 + }, + { + "epoch": 25.965714285714284, + "grad_norm": 36.81207275390625, + "learning_rate": 2.6704761904761904e-05, + "loss": 0.2943, + "step": 4544 + }, + { + "epoch": 25.97142857142857, + "grad_norm": 50.07954025268555, + "learning_rate": 2.66984126984127e-05, + "loss": 0.3336, + "step": 4545 + }, + { + "epoch": 25.97714285714286, + "grad_norm": 62.3792724609375, + "learning_rate": 2.6692063492063496e-05, + "loss": 0.3614, + "step": 4546 + }, + { + "epoch": 25.982857142857142, + "grad_norm": 101.2016830444336, + "learning_rate": 2.6685714285714285e-05, + "loss": 0.3905, + "step": 4547 + }, + { + "epoch": 25.98857142857143, + "grad_norm": 17.869173049926758, + "learning_rate": 2.667936507936508e-05, + "loss": 0.3525, + "step": 4548 + }, + { + "epoch": 25.994285714285713, + "grad_norm": 59.47468948364258, + "learning_rate": 2.667301587301587e-05, + "loss": 0.2708, + "step": 4549 + }, + { + "epoch": 26.0, + "grad_norm": 67.94355010986328, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.2781, + "step": 4550 + }, + { + "epoch": 26.0, + "eval_classes": 0, + "eval_loss": 0.6311623454093933, + "eval_map": 0.9147, + "eval_map_50": 0.9617, + "eval_map_75": 0.9475, + "eval_map_large": 0.9147, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9147, + "eval_map_small": -1.0, + "eval_mar_1": 0.7863, + "eval_mar_10": 0.9635, + "eval_mar_100": 0.9679, + "eval_mar_100_per_class": 0.9679, + "eval_mar_large": 0.9679, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.6762, + "eval_samples_per_second": 21.497, + "eval_steps_per_second": 2.705, + "step": 4550 + }, + { + "epoch": 26.005714285714287, + "grad_norm": 36.41327667236328, + "learning_rate": 2.6660317460317463e-05, + "loss": 0.3754, + "step": 4551 + }, + { + "epoch": 26.01142857142857, + "grad_norm": 28.15723991394043, + "learning_rate": 2.6653968253968252e-05, + "loss": 0.3568, + "step": 4552 + }, + { + "epoch": 26.017142857142858, + "grad_norm": 24.46324348449707, + "learning_rate": 2.6647619047619048e-05, + "loss": 0.2012, + "step": 4553 + }, + { + "epoch": 26.02285714285714, + "grad_norm": 51.601173400878906, + "learning_rate": 2.6641269841269844e-05, + "loss": 0.2799, + "step": 4554 + }, + { + "epoch": 26.02857142857143, + "grad_norm": 24.85300636291504, + "learning_rate": 2.6634920634920633e-05, + "loss": 0.3831, + "step": 4555 + }, + { + "epoch": 26.034285714285716, + "grad_norm": 36.120201110839844, + "learning_rate": 2.662857142857143e-05, + "loss": 0.224, + "step": 4556 + }, + { + "epoch": 26.04, + "grad_norm": 34.02216339111328, + "learning_rate": 2.6622222222222225e-05, + "loss": 0.3498, + "step": 4557 + }, + { + "epoch": 26.045714285714286, + "grad_norm": 37.88076400756836, + "learning_rate": 2.6615873015873018e-05, + "loss": 0.2839, + "step": 4558 + }, + { + "epoch": 26.05142857142857, + "grad_norm": 39.69442367553711, + "learning_rate": 2.660952380952381e-05, + "loss": 0.3258, + "step": 4559 + }, + { + "epoch": 26.057142857142857, + "grad_norm": 85.76815795898438, + "learning_rate": 2.6603174603174603e-05, + "loss": 0.3377, + "step": 4560 + }, + { + "epoch": 26.062857142857144, + "grad_norm": 49.02735900878906, + "learning_rate": 2.65968253968254e-05, + "loss": 0.2274, + "step": 4561 + }, + { + "epoch": 26.068571428571428, + "grad_norm": 46.21623611450195, + "learning_rate": 2.6590476190476192e-05, + "loss": 0.3173, + "step": 4562 + }, + { + "epoch": 26.074285714285715, + "grad_norm": 19.301591873168945, + "learning_rate": 2.6584126984126984e-05, + "loss": 0.1969, + "step": 4563 + }, + { + "epoch": 26.08, + "grad_norm": 44.840816497802734, + "learning_rate": 2.657777777777778e-05, + "loss": 0.2008, + "step": 4564 + }, + { + "epoch": 26.085714285714285, + "grad_norm": 36.26298141479492, + "learning_rate": 2.6571428571428576e-05, + "loss": 0.2551, + "step": 4565 + }, + { + "epoch": 26.091428571428573, + "grad_norm": 28.671228408813477, + "learning_rate": 2.6565079365079366e-05, + "loss": 0.2116, + "step": 4566 + }, + { + "epoch": 26.097142857142856, + "grad_norm": 30.666757583618164, + "learning_rate": 2.6558730158730162e-05, + "loss": 0.3992, + "step": 4567 + }, + { + "epoch": 26.102857142857143, + "grad_norm": 48.448524475097656, + "learning_rate": 2.6552380952380958e-05, + "loss": 0.2654, + "step": 4568 + }, + { + "epoch": 26.10857142857143, + "grad_norm": 69.3399429321289, + "learning_rate": 2.6546031746031747e-05, + "loss": 0.2669, + "step": 4569 + }, + { + "epoch": 26.114285714285714, + "grad_norm": 55.04347229003906, + "learning_rate": 2.6539682539682543e-05, + "loss": 0.3934, + "step": 4570 + }, + { + "epoch": 26.12, + "grad_norm": 86.17486572265625, + "learning_rate": 2.6533333333333332e-05, + "loss": 0.3426, + "step": 4571 + }, + { + "epoch": 26.125714285714285, + "grad_norm": 61.36354064941406, + "learning_rate": 2.6526984126984128e-05, + "loss": 0.2632, + "step": 4572 + }, + { + "epoch": 26.13142857142857, + "grad_norm": 51.69411087036133, + "learning_rate": 2.6520634920634924e-05, + "loss": 0.2911, + "step": 4573 + }, + { + "epoch": 26.13714285714286, + "grad_norm": 29.80560874938965, + "learning_rate": 2.6514285714285714e-05, + "loss": 0.2377, + "step": 4574 + }, + { + "epoch": 26.142857142857142, + "grad_norm": 70.0278091430664, + "learning_rate": 2.650793650793651e-05, + "loss": 0.3496, + "step": 4575 + }, + { + "epoch": 26.14857142857143, + "grad_norm": 67.60846710205078, + "learning_rate": 2.6501587301587306e-05, + "loss": 0.3502, + "step": 4576 + }, + { + "epoch": 26.154285714285713, + "grad_norm": 67.3626708984375, + "learning_rate": 2.6495238095238095e-05, + "loss": 0.2504, + "step": 4577 + }, + { + "epoch": 26.16, + "grad_norm": 38.45746612548828, + "learning_rate": 2.648888888888889e-05, + "loss": 0.2712, + "step": 4578 + }, + { + "epoch": 26.165714285714287, + "grad_norm": 27.64004135131836, + "learning_rate": 2.6482539682539687e-05, + "loss": 0.3435, + "step": 4579 + }, + { + "epoch": 26.17142857142857, + "grad_norm": 42.48225784301758, + "learning_rate": 2.6476190476190476e-05, + "loss": 0.3141, + "step": 4580 + }, + { + "epoch": 26.177142857142858, + "grad_norm": 35.910362243652344, + "learning_rate": 2.6469841269841272e-05, + "loss": 0.2417, + "step": 4581 + }, + { + "epoch": 26.18285714285714, + "grad_norm": 63.82694625854492, + "learning_rate": 2.646349206349206e-05, + "loss": 0.353, + "step": 4582 + }, + { + "epoch": 26.18857142857143, + "grad_norm": 2869.86474609375, + "learning_rate": 2.6457142857142857e-05, + "loss": 0.4538, + "step": 4583 + }, + { + "epoch": 26.194285714285716, + "grad_norm": 97.55684661865234, + "learning_rate": 2.6450793650793653e-05, + "loss": 0.2897, + "step": 4584 + }, + { + "epoch": 26.2, + "grad_norm": 60.941158294677734, + "learning_rate": 2.6444444444444443e-05, + "loss": 0.2114, + "step": 4585 + }, + { + "epoch": 26.205714285714286, + "grad_norm": 69.35711669921875, + "learning_rate": 2.643809523809524e-05, + "loss": 0.2418, + "step": 4586 + }, + { + "epoch": 26.21142857142857, + "grad_norm": 129.8416290283203, + "learning_rate": 2.6431746031746035e-05, + "loss": 0.3354, + "step": 4587 + }, + { + "epoch": 26.217142857142857, + "grad_norm": 55.6038703918457, + "learning_rate": 2.6425396825396824e-05, + "loss": 0.2319, + "step": 4588 + }, + { + "epoch": 26.222857142857144, + "grad_norm": 30.773601531982422, + "learning_rate": 2.641904761904762e-05, + "loss": 0.3119, + "step": 4589 + }, + { + "epoch": 26.228571428571428, + "grad_norm": 64.36360168457031, + "learning_rate": 2.6412698412698416e-05, + "loss": 0.257, + "step": 4590 + }, + { + "epoch": 26.234285714285715, + "grad_norm": 19.576568603515625, + "learning_rate": 2.6406349206349205e-05, + "loss": 0.3647, + "step": 4591 + }, + { + "epoch": 26.24, + "grad_norm": 52.89550018310547, + "learning_rate": 2.64e-05, + "loss": 0.2763, + "step": 4592 + }, + { + "epoch": 26.245714285714286, + "grad_norm": 51.489906311035156, + "learning_rate": 2.6393650793650794e-05, + "loss": 0.2594, + "step": 4593 + }, + { + "epoch": 26.251428571428573, + "grad_norm": 47.094451904296875, + "learning_rate": 2.6387301587301587e-05, + "loss": 0.4294, + "step": 4594 + }, + { + "epoch": 26.257142857142856, + "grad_norm": 89.28340148925781, + "learning_rate": 2.6380952380952383e-05, + "loss": 0.3631, + "step": 4595 + }, + { + "epoch": 26.262857142857143, + "grad_norm": 49.36043167114258, + "learning_rate": 2.6374603174603175e-05, + "loss": 0.2873, + "step": 4596 + }, + { + "epoch": 26.268571428571427, + "grad_norm": 99.70654296875, + "learning_rate": 2.636825396825397e-05, + "loss": 0.2918, + "step": 4597 + }, + { + "epoch": 26.274285714285714, + "grad_norm": 33.33317565917969, + "learning_rate": 2.6361904761904764e-05, + "loss": 0.354, + "step": 4598 + }, + { + "epoch": 26.28, + "grad_norm": 42.30352020263672, + "learning_rate": 2.6355555555555557e-05, + "loss": 0.2752, + "step": 4599 + }, + { + "epoch": 26.285714285714285, + "grad_norm": 49.61213302612305, + "learning_rate": 2.6349206349206353e-05, + "loss": 0.2889, + "step": 4600 + }, + { + "epoch": 26.291428571428572, + "grad_norm": 46.809486389160156, + "learning_rate": 2.6342857142857142e-05, + "loss": 0.263, + "step": 4601 + }, + { + "epoch": 26.29714285714286, + "grad_norm": 14.17546272277832, + "learning_rate": 2.6336507936507938e-05, + "loss": 0.222, + "step": 4602 + }, + { + "epoch": 26.302857142857142, + "grad_norm": 109.38255310058594, + "learning_rate": 2.6330158730158734e-05, + "loss": 0.2071, + "step": 4603 + }, + { + "epoch": 26.30857142857143, + "grad_norm": 40.216156005859375, + "learning_rate": 2.6323809523809523e-05, + "loss": 0.2199, + "step": 4604 + }, + { + "epoch": 26.314285714285713, + "grad_norm": 69.14410400390625, + "learning_rate": 2.631746031746032e-05, + "loss": 0.3123, + "step": 4605 + }, + { + "epoch": 26.32, + "grad_norm": 55.33283233642578, + "learning_rate": 2.6311111111111115e-05, + "loss": 0.2798, + "step": 4606 + }, + { + "epoch": 26.325714285714287, + "grad_norm": 44.87740707397461, + "learning_rate": 2.6304761904761904e-05, + "loss": 0.3386, + "step": 4607 + }, + { + "epoch": 26.33142857142857, + "grad_norm": 54.66011047363281, + "learning_rate": 2.62984126984127e-05, + "loss": 0.2192, + "step": 4608 + }, + { + "epoch": 26.337142857142858, + "grad_norm": 44.17807388305664, + "learning_rate": 2.6292063492063496e-05, + "loss": 0.222, + "step": 4609 + }, + { + "epoch": 26.34285714285714, + "grad_norm": 61.853721618652344, + "learning_rate": 2.6285714285714286e-05, + "loss": 0.1909, + "step": 4610 + }, + { + "epoch": 26.34857142857143, + "grad_norm": 127.42079162597656, + "learning_rate": 2.6279365079365082e-05, + "loss": 0.2718, + "step": 4611 + }, + { + "epoch": 26.354285714285716, + "grad_norm": 35.26097869873047, + "learning_rate": 2.627301587301587e-05, + "loss": 0.3606, + "step": 4612 + }, + { + "epoch": 26.36, + "grad_norm": 32.439327239990234, + "learning_rate": 2.6266666666666667e-05, + "loss": 0.2152, + "step": 4613 + }, + { + "epoch": 26.365714285714287, + "grad_norm": 60.07107925415039, + "learning_rate": 2.6260317460317463e-05, + "loss": 0.2751, + "step": 4614 + }, + { + "epoch": 26.37142857142857, + "grad_norm": 591.93017578125, + "learning_rate": 2.6253968253968252e-05, + "loss": 0.4014, + "step": 4615 + }, + { + "epoch": 26.377142857142857, + "grad_norm": 45.13702392578125, + "learning_rate": 2.624761904761905e-05, + "loss": 0.3403, + "step": 4616 + }, + { + "epoch": 26.382857142857144, + "grad_norm": 52.521602630615234, + "learning_rate": 2.6241269841269844e-05, + "loss": 0.2272, + "step": 4617 + }, + { + "epoch": 26.388571428571428, + "grad_norm": 90.63419342041016, + "learning_rate": 2.6234920634920634e-05, + "loss": 0.266, + "step": 4618 + }, + { + "epoch": 26.394285714285715, + "grad_norm": 25.051990509033203, + "learning_rate": 2.622857142857143e-05, + "loss": 0.3204, + "step": 4619 + }, + { + "epoch": 26.4, + "grad_norm": 18.78619384765625, + "learning_rate": 2.6222222222222226e-05, + "loss": 0.4196, + "step": 4620 + }, + { + "epoch": 26.405714285714286, + "grad_norm": 35.6497688293457, + "learning_rate": 2.6215873015873015e-05, + "loss": 0.196, + "step": 4621 + }, + { + "epoch": 26.411428571428573, + "grad_norm": 65.88414001464844, + "learning_rate": 2.620952380952381e-05, + "loss": 0.158, + "step": 4622 + }, + { + "epoch": 26.417142857142856, + "grad_norm": 26.555614471435547, + "learning_rate": 2.62031746031746e-05, + "loss": 0.2567, + "step": 4623 + }, + { + "epoch": 26.422857142857143, + "grad_norm": 408.59307861328125, + "learning_rate": 2.6196825396825396e-05, + "loss": 0.3711, + "step": 4624 + }, + { + "epoch": 26.428571428571427, + "grad_norm": 83.31832122802734, + "learning_rate": 2.6190476190476192e-05, + "loss": 0.3444, + "step": 4625 + }, + { + "epoch": 26.434285714285714, + "grad_norm": 50.338050842285156, + "learning_rate": 2.6184126984126985e-05, + "loss": 0.3384, + "step": 4626 + }, + { + "epoch": 26.44, + "grad_norm": 37.324745178222656, + "learning_rate": 2.6177777777777777e-05, + "loss": 0.3156, + "step": 4627 + }, + { + "epoch": 26.445714285714285, + "grad_norm": 60.125518798828125, + "learning_rate": 2.6171428571428574e-05, + "loss": 0.2744, + "step": 4628 + }, + { + "epoch": 26.451428571428572, + "grad_norm": 35.94612503051758, + "learning_rate": 2.6165079365079366e-05, + "loss": 0.2808, + "step": 4629 + }, + { + "epoch": 26.457142857142856, + "grad_norm": 50.03565979003906, + "learning_rate": 2.615873015873016e-05, + "loss": 0.3164, + "step": 4630 + }, + { + "epoch": 26.462857142857143, + "grad_norm": 36.93522262573242, + "learning_rate": 2.6152380952380955e-05, + "loss": 0.3094, + "step": 4631 + }, + { + "epoch": 26.46857142857143, + "grad_norm": 47.484981536865234, + "learning_rate": 2.6146031746031747e-05, + "loss": 0.1839, + "step": 4632 + }, + { + "epoch": 26.474285714285713, + "grad_norm": 35.224239349365234, + "learning_rate": 2.613968253968254e-05, + "loss": 0.3052, + "step": 4633 + }, + { + "epoch": 26.48, + "grad_norm": 41.6689567565918, + "learning_rate": 2.6133333333333333e-05, + "loss": 0.1908, + "step": 4634 + }, + { + "epoch": 26.485714285714284, + "grad_norm": 91.45073699951172, + "learning_rate": 2.612698412698413e-05, + "loss": 0.232, + "step": 4635 + }, + { + "epoch": 26.49142857142857, + "grad_norm": 41.19544982910156, + "learning_rate": 2.6120634920634925e-05, + "loss": 0.4264, + "step": 4636 + }, + { + "epoch": 26.497142857142858, + "grad_norm": 21.225603103637695, + "learning_rate": 2.6114285714285714e-05, + "loss": 0.2383, + "step": 4637 + }, + { + "epoch": 26.502857142857142, + "grad_norm": 72.92811584472656, + "learning_rate": 2.610793650793651e-05, + "loss": 0.3611, + "step": 4638 + }, + { + "epoch": 26.50857142857143, + "grad_norm": 144.8211669921875, + "learning_rate": 2.6101587301587306e-05, + "loss": 0.3354, + "step": 4639 + }, + { + "epoch": 26.514285714285712, + "grad_norm": 232.2416229248047, + "learning_rate": 2.6095238095238095e-05, + "loss": 0.2467, + "step": 4640 + }, + { + "epoch": 26.52, + "grad_norm": 29.52247428894043, + "learning_rate": 2.608888888888889e-05, + "loss": 0.2913, + "step": 4641 + }, + { + "epoch": 26.525714285714287, + "grad_norm": 1025.2799072265625, + "learning_rate": 2.6082539682539687e-05, + "loss": 0.3663, + "step": 4642 + }, + { + "epoch": 26.53142857142857, + "grad_norm": 34.98089599609375, + "learning_rate": 2.6076190476190477e-05, + "loss": 0.3618, + "step": 4643 + }, + { + "epoch": 26.537142857142857, + "grad_norm": 24.808778762817383, + "learning_rate": 2.6069841269841273e-05, + "loss": 0.4146, + "step": 4644 + }, + { + "epoch": 26.542857142857144, + "grad_norm": 39.9876708984375, + "learning_rate": 2.6063492063492062e-05, + "loss": 0.2713, + "step": 4645 + }, + { + "epoch": 26.548571428571428, + "grad_norm": 32.00506591796875, + "learning_rate": 2.6057142857142858e-05, + "loss": 0.3541, + "step": 4646 + }, + { + "epoch": 26.554285714285715, + "grad_norm": 19.72596549987793, + "learning_rate": 2.6050793650793654e-05, + "loss": 0.2096, + "step": 4647 + }, + { + "epoch": 26.56, + "grad_norm": 43.73262023925781, + "learning_rate": 2.6044444444444443e-05, + "loss": 0.282, + "step": 4648 + }, + { + "epoch": 26.565714285714286, + "grad_norm": 47.079261779785156, + "learning_rate": 2.603809523809524e-05, + "loss": 0.3678, + "step": 4649 + }, + { + "epoch": 26.571428571428573, + "grad_norm": 43.06452560424805, + "learning_rate": 2.6031746031746035e-05, + "loss": 0.3325, + "step": 4650 + }, + { + "epoch": 26.577142857142857, + "grad_norm": 69.7562255859375, + "learning_rate": 2.6025396825396824e-05, + "loss": 0.3022, + "step": 4651 + }, + { + "epoch": 26.582857142857144, + "grad_norm": 60.522830963134766, + "learning_rate": 2.601904761904762e-05, + "loss": 0.325, + "step": 4652 + }, + { + "epoch": 26.588571428571427, + "grad_norm": 336.8451843261719, + "learning_rate": 2.6012698412698417e-05, + "loss": 0.3255, + "step": 4653 + }, + { + "epoch": 26.594285714285714, + "grad_norm": 34.48298263549805, + "learning_rate": 2.6006349206349206e-05, + "loss": 0.4453, + "step": 4654 + }, + { + "epoch": 26.6, + "grad_norm": 53.4842529296875, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.3246, + "step": 4655 + }, + { + "epoch": 26.605714285714285, + "grad_norm": 36.113494873046875, + "learning_rate": 2.599365079365079e-05, + "loss": 0.3546, + "step": 4656 + }, + { + "epoch": 26.611428571428572, + "grad_norm": 86.10785675048828, + "learning_rate": 2.5987301587301587e-05, + "loss": 0.2551, + "step": 4657 + }, + { + "epoch": 26.617142857142856, + "grad_norm": 98.80118560791016, + "learning_rate": 2.5980952380952383e-05, + "loss": 0.3381, + "step": 4658 + }, + { + "epoch": 26.622857142857143, + "grad_norm": 46.648921966552734, + "learning_rate": 2.5974603174603172e-05, + "loss": 0.28, + "step": 4659 + }, + { + "epoch": 26.62857142857143, + "grad_norm": 59.09379577636719, + "learning_rate": 2.596825396825397e-05, + "loss": 0.2914, + "step": 4660 + }, + { + "epoch": 26.634285714285713, + "grad_norm": 166.9357147216797, + "learning_rate": 2.5961904761904764e-05, + "loss": 0.356, + "step": 4661 + }, + { + "epoch": 26.64, + "grad_norm": 119.84436798095703, + "learning_rate": 2.5955555555555554e-05, + "loss": 0.2871, + "step": 4662 + }, + { + "epoch": 26.645714285714284, + "grad_norm": 21.690603256225586, + "learning_rate": 2.594920634920635e-05, + "loss": 0.2786, + "step": 4663 + }, + { + "epoch": 26.65142857142857, + "grad_norm": 77.54032135009766, + "learning_rate": 2.5942857142857146e-05, + "loss": 0.3975, + "step": 4664 + }, + { + "epoch": 26.65714285714286, + "grad_norm": 54.16943359375, + "learning_rate": 2.593650793650794e-05, + "loss": 0.2498, + "step": 4665 + }, + { + "epoch": 26.662857142857142, + "grad_norm": 30.922557830810547, + "learning_rate": 2.593015873015873e-05, + "loss": 0.2723, + "step": 4666 + }, + { + "epoch": 26.66857142857143, + "grad_norm": 67.56193542480469, + "learning_rate": 2.5923809523809524e-05, + "loss": 0.3129, + "step": 4667 + }, + { + "epoch": 26.674285714285713, + "grad_norm": 38.84308624267578, + "learning_rate": 2.591746031746032e-05, + "loss": 0.4121, + "step": 4668 + }, + { + "epoch": 26.68, + "grad_norm": 21.277053833007812, + "learning_rate": 2.5911111111111112e-05, + "loss": 0.2938, + "step": 4669 + }, + { + "epoch": 26.685714285714287, + "grad_norm": 45.40497589111328, + "learning_rate": 2.5904761904761905e-05, + "loss": 0.3152, + "step": 4670 + }, + { + "epoch": 26.69142857142857, + "grad_norm": 42.738285064697266, + "learning_rate": 2.58984126984127e-05, + "loss": 0.2402, + "step": 4671 + }, + { + "epoch": 26.697142857142858, + "grad_norm": 26.55962562561035, + "learning_rate": 2.5892063492063497e-05, + "loss": 0.4168, + "step": 4672 + }, + { + "epoch": 26.70285714285714, + "grad_norm": 68.99372863769531, + "learning_rate": 2.5885714285714286e-05, + "loss": 0.1987, + "step": 4673 + }, + { + "epoch": 26.708571428571428, + "grad_norm": 30.963071823120117, + "learning_rate": 2.5879365079365082e-05, + "loss": 0.2378, + "step": 4674 + }, + { + "epoch": 26.714285714285715, + "grad_norm": 127.34288787841797, + "learning_rate": 2.5873015873015878e-05, + "loss": 0.2533, + "step": 4675 + }, + { + "epoch": 26.72, + "grad_norm": 41.56283950805664, + "learning_rate": 2.5866666666666667e-05, + "loss": 0.2984, + "step": 4676 + }, + { + "epoch": 26.725714285714286, + "grad_norm": 36.975067138671875, + "learning_rate": 2.5860317460317463e-05, + "loss": 0.3104, + "step": 4677 + }, + { + "epoch": 26.731428571428573, + "grad_norm": 29.948862075805664, + "learning_rate": 2.5853968253968253e-05, + "loss": 0.2908, + "step": 4678 + }, + { + "epoch": 26.737142857142857, + "grad_norm": 88.34810638427734, + "learning_rate": 2.584761904761905e-05, + "loss": 0.276, + "step": 4679 + }, + { + "epoch": 26.742857142857144, + "grad_norm": 72.71587371826172, + "learning_rate": 2.5841269841269845e-05, + "loss": 0.2923, + "step": 4680 + }, + { + "epoch": 26.748571428571427, + "grad_norm": 219.08258056640625, + "learning_rate": 2.5834920634920634e-05, + "loss": 0.3756, + "step": 4681 + }, + { + "epoch": 26.754285714285714, + "grad_norm": 60.98505783081055, + "learning_rate": 2.582857142857143e-05, + "loss": 0.2535, + "step": 4682 + }, + { + "epoch": 26.76, + "grad_norm": 25.74429702758789, + "learning_rate": 2.5822222222222226e-05, + "loss": 0.2586, + "step": 4683 + }, + { + "epoch": 26.765714285714285, + "grad_norm": 33.48048782348633, + "learning_rate": 2.5815873015873015e-05, + "loss": 0.3317, + "step": 4684 + }, + { + "epoch": 26.771428571428572, + "grad_norm": 25.574949264526367, + "learning_rate": 2.580952380952381e-05, + "loss": 0.447, + "step": 4685 + }, + { + "epoch": 26.777142857142856, + "grad_norm": 341.3732604980469, + "learning_rate": 2.5803174603174607e-05, + "loss": 0.2951, + "step": 4686 + }, + { + "epoch": 26.782857142857143, + "grad_norm": 131.79299926757812, + "learning_rate": 2.5796825396825397e-05, + "loss": 0.3094, + "step": 4687 + }, + { + "epoch": 26.78857142857143, + "grad_norm": 56.57413864135742, + "learning_rate": 2.5790476190476193e-05, + "loss": 0.2468, + "step": 4688 + }, + { + "epoch": 26.794285714285714, + "grad_norm": 47.77552795410156, + "learning_rate": 2.5784126984126982e-05, + "loss": 0.3021, + "step": 4689 + }, + { + "epoch": 26.8, + "grad_norm": 29.353164672851562, + "learning_rate": 2.5777777777777778e-05, + "loss": 0.2925, + "step": 4690 + }, + { + "epoch": 26.805714285714284, + "grad_norm": 54.12332534790039, + "learning_rate": 2.5771428571428574e-05, + "loss": 0.2763, + "step": 4691 + }, + { + "epoch": 26.81142857142857, + "grad_norm": 48.31441116333008, + "learning_rate": 2.5765079365079363e-05, + "loss": 0.3282, + "step": 4692 + }, + { + "epoch": 26.81714285714286, + "grad_norm": 55.70726776123047, + "learning_rate": 2.575873015873016e-05, + "loss": 0.3505, + "step": 4693 + }, + { + "epoch": 26.822857142857142, + "grad_norm": 18.00375747680664, + "learning_rate": 2.5752380952380955e-05, + "loss": 0.2845, + "step": 4694 + }, + { + "epoch": 26.82857142857143, + "grad_norm": 19.55474281311035, + "learning_rate": 2.5746031746031744e-05, + "loss": 0.3077, + "step": 4695 + }, + { + "epoch": 26.834285714285713, + "grad_norm": 57.00762176513672, + "learning_rate": 2.573968253968254e-05, + "loss": 0.3885, + "step": 4696 + }, + { + "epoch": 26.84, + "grad_norm": 437.8184814453125, + "learning_rate": 2.5733333333333337e-05, + "loss": 0.3292, + "step": 4697 + }, + { + "epoch": 26.845714285714287, + "grad_norm": 22.36211395263672, + "learning_rate": 2.5726984126984126e-05, + "loss": 0.2513, + "step": 4698 + }, + { + "epoch": 26.85142857142857, + "grad_norm": 71.45623016357422, + "learning_rate": 2.5720634920634922e-05, + "loss": 0.3406, + "step": 4699 + }, + { + "epoch": 26.857142857142858, + "grad_norm": 43.11255645751953, + "learning_rate": 2.5714285714285714e-05, + "loss": 0.2955, + "step": 4700 + }, + { + "epoch": 26.86285714285714, + "grad_norm": 30.780973434448242, + "learning_rate": 2.5707936507936507e-05, + "loss": 0.2453, + "step": 4701 + }, + { + "epoch": 26.86857142857143, + "grad_norm": 23.691740036010742, + "learning_rate": 2.5701587301587303e-05, + "loss": 0.2932, + "step": 4702 + }, + { + "epoch": 26.874285714285715, + "grad_norm": 30.1962890625, + "learning_rate": 2.5695238095238096e-05, + "loss": 0.2652, + "step": 4703 + }, + { + "epoch": 26.88, + "grad_norm": 60.63984680175781, + "learning_rate": 2.5688888888888892e-05, + "loss": 0.2017, + "step": 4704 + }, + { + "epoch": 26.885714285714286, + "grad_norm": 27.217247009277344, + "learning_rate": 2.5682539682539684e-05, + "loss": 0.3772, + "step": 4705 + }, + { + "epoch": 26.89142857142857, + "grad_norm": 35.93013000488281, + "learning_rate": 2.5676190476190477e-05, + "loss": 0.2693, + "step": 4706 + }, + { + "epoch": 26.897142857142857, + "grad_norm": 230.4045867919922, + "learning_rate": 2.5669841269841273e-05, + "loss": 0.6108, + "step": 4707 + }, + { + "epoch": 26.902857142857144, + "grad_norm": 40.505592346191406, + "learning_rate": 2.5663492063492066e-05, + "loss": 0.3169, + "step": 4708 + }, + { + "epoch": 26.908571428571427, + "grad_norm": 20.808183670043945, + "learning_rate": 2.565714285714286e-05, + "loss": 0.3663, + "step": 4709 + }, + { + "epoch": 26.914285714285715, + "grad_norm": 71.28199768066406, + "learning_rate": 2.5650793650793654e-05, + "loss": 0.2616, + "step": 4710 + }, + { + "epoch": 26.92, + "grad_norm": 46.240116119384766, + "learning_rate": 2.5644444444444444e-05, + "loss": 0.1703, + "step": 4711 + }, + { + "epoch": 26.925714285714285, + "grad_norm": 58.91728591918945, + "learning_rate": 2.563809523809524e-05, + "loss": 0.2939, + "step": 4712 + }, + { + "epoch": 26.931428571428572, + "grad_norm": 24.967037200927734, + "learning_rate": 2.5631746031746036e-05, + "loss": 0.2953, + "step": 4713 + }, + { + "epoch": 26.937142857142856, + "grad_norm": 62.452796936035156, + "learning_rate": 2.5625396825396825e-05, + "loss": 0.2894, + "step": 4714 + }, + { + "epoch": 26.942857142857143, + "grad_norm": 41.568992614746094, + "learning_rate": 2.561904761904762e-05, + "loss": 0.2523, + "step": 4715 + }, + { + "epoch": 26.94857142857143, + "grad_norm": 29.147098541259766, + "learning_rate": 2.5612698412698417e-05, + "loss": 0.2588, + "step": 4716 + }, + { + "epoch": 26.954285714285714, + "grad_norm": 52.80727005004883, + "learning_rate": 2.5606349206349206e-05, + "loss": 0.1472, + "step": 4717 + }, + { + "epoch": 26.96, + "grad_norm": 28.822208404541016, + "learning_rate": 2.5600000000000002e-05, + "loss": 0.3417, + "step": 4718 + }, + { + "epoch": 26.965714285714284, + "grad_norm": 103.94974517822266, + "learning_rate": 2.5593650793650798e-05, + "loss": 0.2922, + "step": 4719 + }, + { + "epoch": 26.97142857142857, + "grad_norm": 45.41557693481445, + "learning_rate": 2.5587301587301588e-05, + "loss": 0.2904, + "step": 4720 + }, + { + "epoch": 26.97714285714286, + "grad_norm": 53.98274612426758, + "learning_rate": 2.5580952380952384e-05, + "loss": 0.309, + "step": 4721 + }, + { + "epoch": 26.982857142857142, + "grad_norm": 56.60956573486328, + "learning_rate": 2.5574603174603173e-05, + "loss": 0.3258, + "step": 4722 + }, + { + "epoch": 26.98857142857143, + "grad_norm": 34.62663269042969, + "learning_rate": 2.556825396825397e-05, + "loss": 0.2096, + "step": 4723 + }, + { + "epoch": 26.994285714285713, + "grad_norm": 62.96559143066406, + "learning_rate": 2.5561904761904765e-05, + "loss": 0.3225, + "step": 4724 + }, + { + "epoch": 27.0, + "grad_norm": 43.35622024536133, + "learning_rate": 2.5555555555555554e-05, + "loss": 0.26, + "step": 4725 + }, + { + "epoch": 27.0, + "eval_classes": 0, + "eval_loss": 0.617247998714447, + "eval_map": 0.9077, + "eval_map_50": 0.9546, + "eval_map_75": 0.9384, + "eval_map_large": 0.9083, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9077, + "eval_map_small": -1.0, + "eval_mar_1": 0.7781, + "eval_mar_10": 0.9587, + "eval_mar_100": 0.9663, + "eval_mar_100_per_class": 0.9663, + "eval_mar_large": 0.9663, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.9606, + "eval_samples_per_second": 21.059, + "eval_steps_per_second": 2.65, + "step": 4725 + }, + { + "epoch": 27.005714285714287, + "grad_norm": 264.7518005371094, + "learning_rate": 2.554920634920635e-05, + "loss": 0.2647, + "step": 4726 + }, + { + "epoch": 27.01142857142857, + "grad_norm": 61.20986557006836, + "learning_rate": 2.5542857142857146e-05, + "loss": 0.2843, + "step": 4727 + }, + { + "epoch": 27.017142857142858, + "grad_norm": 46.27261734008789, + "learning_rate": 2.5536507936507935e-05, + "loss": 0.4267, + "step": 4728 + }, + { + "epoch": 27.02285714285714, + "grad_norm": 36.558563232421875, + "learning_rate": 2.553015873015873e-05, + "loss": 0.2965, + "step": 4729 + }, + { + "epoch": 27.02857142857143, + "grad_norm": 39.53204345703125, + "learning_rate": 2.5523809523809527e-05, + "loss": 0.3747, + "step": 4730 + }, + { + "epoch": 27.034285714285716, + "grad_norm": 16.22458839416504, + "learning_rate": 2.5517460317460317e-05, + "loss": 0.2887, + "step": 4731 + }, + { + "epoch": 27.04, + "grad_norm": 81.15853881835938, + "learning_rate": 2.5511111111111113e-05, + "loss": 0.3398, + "step": 4732 + }, + { + "epoch": 27.045714285714286, + "grad_norm": 110.32965087890625, + "learning_rate": 2.5504761904761902e-05, + "loss": 0.3053, + "step": 4733 + }, + { + "epoch": 27.05142857142857, + "grad_norm": 47.38296127319336, + "learning_rate": 2.5498412698412698e-05, + "loss": 0.2774, + "step": 4734 + }, + { + "epoch": 27.057142857142857, + "grad_norm": 1103.1126708984375, + "learning_rate": 2.5492063492063494e-05, + "loss": 0.3639, + "step": 4735 + }, + { + "epoch": 27.062857142857144, + "grad_norm": 128.23031616210938, + "learning_rate": 2.5485714285714287e-05, + "loss": 0.2693, + "step": 4736 + }, + { + "epoch": 27.068571428571428, + "grad_norm": 51.42526626586914, + "learning_rate": 2.547936507936508e-05, + "loss": 0.297, + "step": 4737 + }, + { + "epoch": 27.074285714285715, + "grad_norm": 43.38787841796875, + "learning_rate": 2.5473015873015875e-05, + "loss": 0.2194, + "step": 4738 + }, + { + "epoch": 27.08, + "grad_norm": 108.1216812133789, + "learning_rate": 2.5466666666666668e-05, + "loss": 0.4258, + "step": 4739 + }, + { + "epoch": 27.085714285714285, + "grad_norm": 42.919464111328125, + "learning_rate": 2.546031746031746e-05, + "loss": 0.3384, + "step": 4740 + }, + { + "epoch": 27.091428571428573, + "grad_norm": 42.97194290161133, + "learning_rate": 2.5453968253968257e-05, + "loss": 0.2781, + "step": 4741 + }, + { + "epoch": 27.097142857142856, + "grad_norm": 50.67507553100586, + "learning_rate": 2.544761904761905e-05, + "loss": 0.3613, + "step": 4742 + }, + { + "epoch": 27.102857142857143, + "grad_norm": 1063.8197021484375, + "learning_rate": 2.5441269841269845e-05, + "loss": 0.4082, + "step": 4743 + }, + { + "epoch": 27.10857142857143, + "grad_norm": 39.627647399902344, + "learning_rate": 2.5434920634920634e-05, + "loss": 0.4711, + "step": 4744 + }, + { + "epoch": 27.114285714285714, + "grad_norm": 92.78875732421875, + "learning_rate": 2.542857142857143e-05, + "loss": 0.3292, + "step": 4745 + }, + { + "epoch": 27.12, + "grad_norm": 25.53900718688965, + "learning_rate": 2.5422222222222227e-05, + "loss": 0.2453, + "step": 4746 + }, + { + "epoch": 27.125714285714285, + "grad_norm": 221.59703063964844, + "learning_rate": 2.5415873015873016e-05, + "loss": 0.253, + "step": 4747 + }, + { + "epoch": 27.13142857142857, + "grad_norm": 83.64018249511719, + "learning_rate": 2.5409523809523812e-05, + "loss": 0.2473, + "step": 4748 + }, + { + "epoch": 27.13714285714286, + "grad_norm": 205.5132293701172, + "learning_rate": 2.5403174603174608e-05, + "loss": 0.2953, + "step": 4749 + }, + { + "epoch": 27.142857142857142, + "grad_norm": 50.870399475097656, + "learning_rate": 2.5396825396825397e-05, + "loss": 0.3832, + "step": 4750 + }, + { + "epoch": 27.14857142857143, + "grad_norm": 62.747032165527344, + "learning_rate": 2.5390476190476193e-05, + "loss": 0.2428, + "step": 4751 + }, + { + "epoch": 27.154285714285713, + "grad_norm": 212.9983367919922, + "learning_rate": 2.538412698412699e-05, + "loss": 0.2652, + "step": 4752 + }, + { + "epoch": 27.16, + "grad_norm": 72.1238784790039, + "learning_rate": 2.537777777777778e-05, + "loss": 0.289, + "step": 4753 + }, + { + "epoch": 27.165714285714287, + "grad_norm": 52.40901184082031, + "learning_rate": 2.5371428571428574e-05, + "loss": 0.2059, + "step": 4754 + }, + { + "epoch": 27.17142857142857, + "grad_norm": 74.59566497802734, + "learning_rate": 2.5365079365079364e-05, + "loss": 0.4205, + "step": 4755 + }, + { + "epoch": 27.177142857142858, + "grad_norm": 584.8218383789062, + "learning_rate": 2.535873015873016e-05, + "loss": 0.332, + "step": 4756 + }, + { + "epoch": 27.18285714285714, + "grad_norm": 49.488468170166016, + "learning_rate": 2.5352380952380956e-05, + "loss": 0.2476, + "step": 4757 + }, + { + "epoch": 27.18857142857143, + "grad_norm": 34.9415397644043, + "learning_rate": 2.5346031746031745e-05, + "loss": 0.3947, + "step": 4758 + }, + { + "epoch": 27.194285714285716, + "grad_norm": 36.94355392456055, + "learning_rate": 2.533968253968254e-05, + "loss": 0.3157, + "step": 4759 + }, + { + "epoch": 27.2, + "grad_norm": 66.73297119140625, + "learning_rate": 2.5333333333333337e-05, + "loss": 0.2973, + "step": 4760 + }, + { + "epoch": 27.205714285714286, + "grad_norm": 38.81161117553711, + "learning_rate": 2.5326984126984126e-05, + "loss": 0.3355, + "step": 4761 + }, + { + "epoch": 27.21142857142857, + "grad_norm": 42.80949401855469, + "learning_rate": 2.5320634920634922e-05, + "loss": 0.2284, + "step": 4762 + }, + { + "epoch": 27.217142857142857, + "grad_norm": 69.96595764160156, + "learning_rate": 2.5314285714285718e-05, + "loss": 0.2513, + "step": 4763 + }, + { + "epoch": 27.222857142857144, + "grad_norm": 81.86923217773438, + "learning_rate": 2.5307936507936508e-05, + "loss": 0.2305, + "step": 4764 + }, + { + "epoch": 27.228571428571428, + "grad_norm": 26.642152786254883, + "learning_rate": 2.5301587301587304e-05, + "loss": 0.2809, + "step": 4765 + }, + { + "epoch": 27.234285714285715, + "grad_norm": 41.40145492553711, + "learning_rate": 2.5295238095238093e-05, + "loss": 0.1789, + "step": 4766 + }, + { + "epoch": 27.24, + "grad_norm": 41.42937088012695, + "learning_rate": 2.528888888888889e-05, + "loss": 0.2544, + "step": 4767 + }, + { + "epoch": 27.245714285714286, + "grad_norm": 70.97283935546875, + "learning_rate": 2.5282539682539685e-05, + "loss": 0.3121, + "step": 4768 + }, + { + "epoch": 27.251428571428573, + "grad_norm": 116.86566925048828, + "learning_rate": 2.5276190476190474e-05, + "loss": 0.2777, + "step": 4769 + }, + { + "epoch": 27.257142857142856, + "grad_norm": 34.88475036621094, + "learning_rate": 2.526984126984127e-05, + "loss": 0.309, + "step": 4770 + }, + { + "epoch": 27.262857142857143, + "grad_norm": 146.3785400390625, + "learning_rate": 2.5263492063492066e-05, + "loss": 0.3011, + "step": 4771 + }, + { + "epoch": 27.268571428571427, + "grad_norm": 118.60247802734375, + "learning_rate": 2.5257142857142855e-05, + "loss": 0.2069, + "step": 4772 + }, + { + "epoch": 27.274285714285714, + "grad_norm": 22.586828231811523, + "learning_rate": 2.525079365079365e-05, + "loss": 0.3537, + "step": 4773 + }, + { + "epoch": 27.28, + "grad_norm": 50.62820816040039, + "learning_rate": 2.5244444444444447e-05, + "loss": 0.208, + "step": 4774 + }, + { + "epoch": 27.285714285714285, + "grad_norm": 56.44573974609375, + "learning_rate": 2.523809523809524e-05, + "loss": 0.3518, + "step": 4775 + }, + { + "epoch": 27.291428571428572, + "grad_norm": 371.9031982421875, + "learning_rate": 2.5231746031746033e-05, + "loss": 0.2639, + "step": 4776 + }, + { + "epoch": 27.29714285714286, + "grad_norm": 23.763046264648438, + "learning_rate": 2.5225396825396825e-05, + "loss": 0.312, + "step": 4777 + }, + { + "epoch": 27.302857142857142, + "grad_norm": 21.81557846069336, + "learning_rate": 2.521904761904762e-05, + "loss": 0.2612, + "step": 4778 + }, + { + "epoch": 27.30857142857143, + "grad_norm": 41.35017776489258, + "learning_rate": 2.5212698412698414e-05, + "loss": 0.2606, + "step": 4779 + }, + { + "epoch": 27.314285714285713, + "grad_norm": 82.43899536132812, + "learning_rate": 2.5206349206349207e-05, + "loss": 0.3153, + "step": 4780 + }, + { + "epoch": 27.32, + "grad_norm": 61.49275207519531, + "learning_rate": 2.5200000000000003e-05, + "loss": 0.2535, + "step": 4781 + }, + { + "epoch": 27.325714285714287, + "grad_norm": 20.077241897583008, + "learning_rate": 2.51936507936508e-05, + "loss": 0.2586, + "step": 4782 + }, + { + "epoch": 27.33142857142857, + "grad_norm": 31.274147033691406, + "learning_rate": 2.5187301587301588e-05, + "loss": 0.3254, + "step": 4783 + }, + { + "epoch": 27.337142857142858, + "grad_norm": 414.2837829589844, + "learning_rate": 2.5180952380952384e-05, + "loss": 0.3036, + "step": 4784 + }, + { + "epoch": 27.34285714285714, + "grad_norm": 60.34440612792969, + "learning_rate": 2.517460317460318e-05, + "loss": 0.2722, + "step": 4785 + }, + { + "epoch": 27.34857142857143, + "grad_norm": 56.29039764404297, + "learning_rate": 2.516825396825397e-05, + "loss": 0.4124, + "step": 4786 + }, + { + "epoch": 27.354285714285716, + "grad_norm": 90.10543823242188, + "learning_rate": 2.5161904761904765e-05, + "loss": 0.2382, + "step": 4787 + }, + { + "epoch": 27.36, + "grad_norm": 49.912086486816406, + "learning_rate": 2.5155555555555555e-05, + "loss": 0.2819, + "step": 4788 + }, + { + "epoch": 27.365714285714287, + "grad_norm": 66.22119903564453, + "learning_rate": 2.514920634920635e-05, + "loss": 0.2414, + "step": 4789 + }, + { + "epoch": 27.37142857142857, + "grad_norm": 53.957550048828125, + "learning_rate": 2.5142857142857147e-05, + "loss": 0.3447, + "step": 4790 + }, + { + "epoch": 27.377142857142857, + "grad_norm": 51.856266021728516, + "learning_rate": 2.5136507936507936e-05, + "loss": 0.2213, + "step": 4791 + }, + { + "epoch": 27.382857142857144, + "grad_norm": 33.7740478515625, + "learning_rate": 2.5130158730158732e-05, + "loss": 0.2832, + "step": 4792 + }, + { + "epoch": 27.388571428571428, + "grad_norm": 36.47516632080078, + "learning_rate": 2.5123809523809528e-05, + "loss": 0.288, + "step": 4793 + }, + { + "epoch": 27.394285714285715, + "grad_norm": 43.95291519165039, + "learning_rate": 2.5117460317460317e-05, + "loss": 0.2727, + "step": 4794 + }, + { + "epoch": 27.4, + "grad_norm": 54.80048370361328, + "learning_rate": 2.5111111111111113e-05, + "loss": 0.276, + "step": 4795 + }, + { + "epoch": 27.405714285714286, + "grad_norm": 231.54710388183594, + "learning_rate": 2.510476190476191e-05, + "loss": 0.2558, + "step": 4796 + }, + { + "epoch": 27.411428571428573, + "grad_norm": 56.0648307800293, + "learning_rate": 2.50984126984127e-05, + "loss": 0.2427, + "step": 4797 + }, + { + "epoch": 27.417142857142856, + "grad_norm": 75.80994415283203, + "learning_rate": 2.5092063492063494e-05, + "loss": 0.3139, + "step": 4798 + }, + { + "epoch": 27.422857142857143, + "grad_norm": 34.60862731933594, + "learning_rate": 2.5085714285714284e-05, + "loss": 0.2255, + "step": 4799 + }, + { + "epoch": 27.428571428571427, + "grad_norm": 30.5380859375, + "learning_rate": 2.507936507936508e-05, + "loss": 0.3061, + "step": 4800 + }, + { + "epoch": 27.434285714285714, + "grad_norm": 25.800336837768555, + "learning_rate": 2.5073015873015876e-05, + "loss": 0.2475, + "step": 4801 + }, + { + "epoch": 27.44, + "grad_norm": 31.043546676635742, + "learning_rate": 2.5066666666666665e-05, + "loss": 0.3676, + "step": 4802 + }, + { + "epoch": 27.445714285714285, + "grad_norm": 45.71298599243164, + "learning_rate": 2.506031746031746e-05, + "loss": 0.2559, + "step": 4803 + }, + { + "epoch": 27.451428571428572, + "grad_norm": 24.04212760925293, + "learning_rate": 2.5053968253968257e-05, + "loss": 0.3555, + "step": 4804 + }, + { + "epoch": 27.457142857142856, + "grad_norm": 49.58759689331055, + "learning_rate": 2.5047619047619046e-05, + "loss": 0.3161, + "step": 4805 + }, + { + "epoch": 27.462857142857143, + "grad_norm": 33.77734375, + "learning_rate": 2.5041269841269842e-05, + "loss": 0.2111, + "step": 4806 + }, + { + "epoch": 27.46857142857143, + "grad_norm": 31.39830780029297, + "learning_rate": 2.503492063492064e-05, + "loss": 0.2874, + "step": 4807 + }, + { + "epoch": 27.474285714285713, + "grad_norm": 55.34870529174805, + "learning_rate": 2.5028571428571428e-05, + "loss": 0.2791, + "step": 4808 + }, + { + "epoch": 27.48, + "grad_norm": 239.04185485839844, + "learning_rate": 2.5022222222222224e-05, + "loss": 0.3815, + "step": 4809 + }, + { + "epoch": 27.485714285714284, + "grad_norm": 43.9395637512207, + "learning_rate": 2.5015873015873016e-05, + "loss": 0.2651, + "step": 4810 + }, + { + "epoch": 27.49142857142857, + "grad_norm": 74.31682586669922, + "learning_rate": 2.500952380952381e-05, + "loss": 0.3032, + "step": 4811 + }, + { + "epoch": 27.497142857142858, + "grad_norm": 49.19131851196289, + "learning_rate": 2.5003174603174605e-05, + "loss": 0.3221, + "step": 4812 + }, + { + "epoch": 27.502857142857142, + "grad_norm": 30.429489135742188, + "learning_rate": 2.4996825396825398e-05, + "loss": 0.2028, + "step": 4813 + }, + { + "epoch": 27.50857142857143, + "grad_norm": 235.2591094970703, + "learning_rate": 2.4990476190476194e-05, + "loss": 0.2108, + "step": 4814 + }, + { + "epoch": 27.514285714285712, + "grad_norm": 46.36361312866211, + "learning_rate": 2.4984126984126986e-05, + "loss": 0.4535, + "step": 4815 + }, + { + "epoch": 27.52, + "grad_norm": 46.341285705566406, + "learning_rate": 2.497777777777778e-05, + "loss": 0.2184, + "step": 4816 + }, + { + "epoch": 27.525714285714287, + "grad_norm": 31.94393539428711, + "learning_rate": 2.4971428571428575e-05, + "loss": 0.2327, + "step": 4817 + }, + { + "epoch": 27.53142857142857, + "grad_norm": 39.17048645019531, + "learning_rate": 2.4965079365079367e-05, + "loss": 0.3632, + "step": 4818 + }, + { + "epoch": 27.537142857142857, + "grad_norm": 22.573932647705078, + "learning_rate": 2.495873015873016e-05, + "loss": 0.2425, + "step": 4819 + }, + { + "epoch": 27.542857142857144, + "grad_norm": 172.38751220703125, + "learning_rate": 2.4952380952380956e-05, + "loss": 0.2537, + "step": 4820 + }, + { + "epoch": 27.548571428571428, + "grad_norm": 55.52573013305664, + "learning_rate": 2.494603174603175e-05, + "loss": 0.3031, + "step": 4821 + }, + { + "epoch": 27.554285714285715, + "grad_norm": 48.40682601928711, + "learning_rate": 2.493968253968254e-05, + "loss": 0.2625, + "step": 4822 + }, + { + "epoch": 27.56, + "grad_norm": 30.03643035888672, + "learning_rate": 2.4933333333333334e-05, + "loss": 0.3392, + "step": 4823 + }, + { + "epoch": 27.565714285714286, + "grad_norm": 49.23202896118164, + "learning_rate": 2.492698412698413e-05, + "loss": 0.3327, + "step": 4824 + }, + { + "epoch": 27.571428571428573, + "grad_norm": 36.13849639892578, + "learning_rate": 2.4920634920634923e-05, + "loss": 0.2898, + "step": 4825 + }, + { + "epoch": 27.577142857142857, + "grad_norm": 46.84010314941406, + "learning_rate": 2.4914285714285715e-05, + "loss": 0.3819, + "step": 4826 + }, + { + "epoch": 27.582857142857144, + "grad_norm": 82.81731414794922, + "learning_rate": 2.4907936507936508e-05, + "loss": 0.2455, + "step": 4827 + }, + { + "epoch": 27.588571428571427, + "grad_norm": 50.32649612426758, + "learning_rate": 2.4901587301587304e-05, + "loss": 0.4102, + "step": 4828 + }, + { + "epoch": 27.594285714285714, + "grad_norm": 53.72205352783203, + "learning_rate": 2.4895238095238097e-05, + "loss": 0.2229, + "step": 4829 + }, + { + "epoch": 27.6, + "grad_norm": 23.845911026000977, + "learning_rate": 2.488888888888889e-05, + "loss": 0.2732, + "step": 4830 + }, + { + "epoch": 27.605714285714285, + "grad_norm": 40.13404846191406, + "learning_rate": 2.4882539682539685e-05, + "loss": 0.1945, + "step": 4831 + }, + { + "epoch": 27.611428571428572, + "grad_norm": 22.85449981689453, + "learning_rate": 2.4876190476190478e-05, + "loss": 0.2219, + "step": 4832 + }, + { + "epoch": 27.617142857142856, + "grad_norm": 20.945064544677734, + "learning_rate": 2.486984126984127e-05, + "loss": 0.2263, + "step": 4833 + }, + { + "epoch": 27.622857142857143, + "grad_norm": 173.3744354248047, + "learning_rate": 2.4863492063492063e-05, + "loss": 0.248, + "step": 4834 + }, + { + "epoch": 27.62857142857143, + "grad_norm": 36.34626007080078, + "learning_rate": 2.485714285714286e-05, + "loss": 0.5044, + "step": 4835 + }, + { + "epoch": 27.634285714285713, + "grad_norm": 89.77456665039062, + "learning_rate": 2.4850793650793652e-05, + "loss": 0.2702, + "step": 4836 + }, + { + "epoch": 27.64, + "grad_norm": 72.74186706542969, + "learning_rate": 2.4844444444444444e-05, + "loss": 0.2642, + "step": 4837 + }, + { + "epoch": 27.645714285714284, + "grad_norm": 48.650089263916016, + "learning_rate": 2.4838095238095237e-05, + "loss": 0.253, + "step": 4838 + }, + { + "epoch": 27.65142857142857, + "grad_norm": 43.67725372314453, + "learning_rate": 2.4831746031746033e-05, + "loss": 0.2165, + "step": 4839 + }, + { + "epoch": 27.65714285714286, + "grad_norm": 20.58785629272461, + "learning_rate": 2.4825396825396826e-05, + "loss": 0.2718, + "step": 4840 + }, + { + "epoch": 27.662857142857142, + "grad_norm": 45.65858840942383, + "learning_rate": 2.481904761904762e-05, + "loss": 0.2948, + "step": 4841 + }, + { + "epoch": 27.66857142857143, + "grad_norm": 74.41572570800781, + "learning_rate": 2.4812698412698414e-05, + "loss": 0.3391, + "step": 4842 + }, + { + "epoch": 27.674285714285713, + "grad_norm": 38.63113021850586, + "learning_rate": 2.4806349206349207e-05, + "loss": 0.3389, + "step": 4843 + }, + { + "epoch": 27.68, + "grad_norm": 76.9261245727539, + "learning_rate": 2.48e-05, + "loss": 0.3061, + "step": 4844 + }, + { + "epoch": 27.685714285714287, + "grad_norm": 84.7331771850586, + "learning_rate": 2.4793650793650792e-05, + "loss": 0.2617, + "step": 4845 + }, + { + "epoch": 27.69142857142857, + "grad_norm": 17.409889221191406, + "learning_rate": 2.478730158730159e-05, + "loss": 0.2637, + "step": 4846 + }, + { + "epoch": 27.697142857142858, + "grad_norm": 88.08624267578125, + "learning_rate": 2.478095238095238e-05, + "loss": 0.3734, + "step": 4847 + }, + { + "epoch": 27.70285714285714, + "grad_norm": 56.591800689697266, + "learning_rate": 2.4774603174603177e-05, + "loss": 0.2685, + "step": 4848 + }, + { + "epoch": 27.708571428571428, + "grad_norm": 31.55167579650879, + "learning_rate": 2.476825396825397e-05, + "loss": 0.2221, + "step": 4849 + }, + { + "epoch": 27.714285714285715, + "grad_norm": 40.55548095703125, + "learning_rate": 2.4761904761904762e-05, + "loss": 0.2963, + "step": 4850 + }, + { + "epoch": 27.72, + "grad_norm": 99.02445220947266, + "learning_rate": 2.475555555555556e-05, + "loss": 0.2521, + "step": 4851 + }, + { + "epoch": 27.725714285714286, + "grad_norm": 246.45144653320312, + "learning_rate": 2.474920634920635e-05, + "loss": 0.2875, + "step": 4852 + }, + { + "epoch": 27.731428571428573, + "grad_norm": 39.322391510009766, + "learning_rate": 2.4742857142857147e-05, + "loss": 0.3158, + "step": 4853 + }, + { + "epoch": 27.737142857142857, + "grad_norm": 44.78469467163086, + "learning_rate": 2.473650793650794e-05, + "loss": 0.3035, + "step": 4854 + }, + { + "epoch": 27.742857142857144, + "grad_norm": 349.8539733886719, + "learning_rate": 2.4730158730158732e-05, + "loss": 0.3249, + "step": 4855 + }, + { + "epoch": 27.748571428571427, + "grad_norm": 51.817283630371094, + "learning_rate": 2.4723809523809525e-05, + "loss": 0.3091, + "step": 4856 + }, + { + "epoch": 27.754285714285714, + "grad_norm": 18.263761520385742, + "learning_rate": 2.471746031746032e-05, + "loss": 0.2652, + "step": 4857 + }, + { + "epoch": 27.76, + "grad_norm": 94.6199722290039, + "learning_rate": 2.4711111111111114e-05, + "loss": 0.2096, + "step": 4858 + }, + { + "epoch": 27.765714285714285, + "grad_norm": 621.2186889648438, + "learning_rate": 2.4704761904761906e-05, + "loss": 0.3711, + "step": 4859 + }, + { + "epoch": 27.771428571428572, + "grad_norm": 162.09405517578125, + "learning_rate": 2.46984126984127e-05, + "loss": 0.4606, + "step": 4860 + }, + { + "epoch": 27.777142857142856, + "grad_norm": 92.1196060180664, + "learning_rate": 2.4692063492063495e-05, + "loss": 0.2604, + "step": 4861 + }, + { + "epoch": 27.782857142857143, + "grad_norm": 52.80637741088867, + "learning_rate": 2.4685714285714288e-05, + "loss": 0.2786, + "step": 4862 + }, + { + "epoch": 27.78857142857143, + "grad_norm": 59.52845001220703, + "learning_rate": 2.467936507936508e-05, + "loss": 0.2649, + "step": 4863 + }, + { + "epoch": 27.794285714285714, + "grad_norm": 19.578981399536133, + "learning_rate": 2.4673015873015876e-05, + "loss": 0.2346, + "step": 4864 + }, + { + "epoch": 27.8, + "grad_norm": 35.54555892944336, + "learning_rate": 2.466666666666667e-05, + "loss": 0.4155, + "step": 4865 + }, + { + "epoch": 27.805714285714284, + "grad_norm": 93.74425506591797, + "learning_rate": 2.466031746031746e-05, + "loss": 0.3929, + "step": 4866 + }, + { + "epoch": 27.81142857142857, + "grad_norm": 34.482975006103516, + "learning_rate": 2.4653968253968254e-05, + "loss": 0.2819, + "step": 4867 + }, + { + "epoch": 27.81714285714286, + "grad_norm": 30.5424747467041, + "learning_rate": 2.464761904761905e-05, + "loss": 0.2795, + "step": 4868 + }, + { + "epoch": 27.822857142857142, + "grad_norm": 32.831085205078125, + "learning_rate": 2.4641269841269843e-05, + "loss": 0.2125, + "step": 4869 + }, + { + "epoch": 27.82857142857143, + "grad_norm": 95.80289459228516, + "learning_rate": 2.4634920634920635e-05, + "loss": 0.3558, + "step": 4870 + }, + { + "epoch": 27.834285714285713, + "grad_norm": 97.45088958740234, + "learning_rate": 2.4628571428571428e-05, + "loss": 0.327, + "step": 4871 + }, + { + "epoch": 27.84, + "grad_norm": 27.350614547729492, + "learning_rate": 2.4622222222222224e-05, + "loss": 0.3236, + "step": 4872 + }, + { + "epoch": 27.845714285714287, + "grad_norm": 52.39337158203125, + "learning_rate": 2.4615873015873017e-05, + "loss": 0.376, + "step": 4873 + }, + { + "epoch": 27.85142857142857, + "grad_norm": 27.236066818237305, + "learning_rate": 2.460952380952381e-05, + "loss": 0.5718, + "step": 4874 + }, + { + "epoch": 27.857142857142858, + "grad_norm": 34.752994537353516, + "learning_rate": 2.4603174603174602e-05, + "loss": 0.2736, + "step": 4875 + }, + { + "epoch": 27.86285714285714, + "grad_norm": 23.44194793701172, + "learning_rate": 2.4596825396825398e-05, + "loss": 0.3231, + "step": 4876 + }, + { + "epoch": 27.86857142857143, + "grad_norm": 23.610010147094727, + "learning_rate": 2.459047619047619e-05, + "loss": 0.2362, + "step": 4877 + }, + { + "epoch": 27.874285714285715, + "grad_norm": 18.690441131591797, + "learning_rate": 2.4584126984126983e-05, + "loss": 0.2454, + "step": 4878 + }, + { + "epoch": 27.88, + "grad_norm": 14.173318862915039, + "learning_rate": 2.457777777777778e-05, + "loss": 0.2281, + "step": 4879 + }, + { + "epoch": 27.885714285714286, + "grad_norm": 55.35274887084961, + "learning_rate": 2.4571428571428572e-05, + "loss": 0.4128, + "step": 4880 + }, + { + "epoch": 27.89142857142857, + "grad_norm": 25.49809455871582, + "learning_rate": 2.4565079365079365e-05, + "loss": 0.2281, + "step": 4881 + }, + { + "epoch": 27.897142857142857, + "grad_norm": 32.44559097290039, + "learning_rate": 2.4558730158730157e-05, + "loss": 0.3497, + "step": 4882 + }, + { + "epoch": 27.902857142857144, + "grad_norm": 68.36529541015625, + "learning_rate": 2.4552380952380953e-05, + "loss": 0.3238, + "step": 4883 + }, + { + "epoch": 27.908571428571427, + "grad_norm": 58.09840774536133, + "learning_rate": 2.4546031746031746e-05, + "loss": 0.4048, + "step": 4884 + }, + { + "epoch": 27.914285714285715, + "grad_norm": 98.97122955322266, + "learning_rate": 2.4539682539682542e-05, + "loss": 0.279, + "step": 4885 + }, + { + "epoch": 27.92, + "grad_norm": 35.524574279785156, + "learning_rate": 2.4533333333333334e-05, + "loss": 0.2158, + "step": 4886 + }, + { + "epoch": 27.925714285714285, + "grad_norm": 86.26341247558594, + "learning_rate": 2.452698412698413e-05, + "loss": 0.3683, + "step": 4887 + }, + { + "epoch": 27.931428571428572, + "grad_norm": 73.19541931152344, + "learning_rate": 2.4520634920634923e-05, + "loss": 0.4275, + "step": 4888 + }, + { + "epoch": 27.937142857142856, + "grad_norm": 113.84541320800781, + "learning_rate": 2.4514285714285716e-05, + "loss": 0.2033, + "step": 4889 + }, + { + "epoch": 27.942857142857143, + "grad_norm": 37.84712600708008, + "learning_rate": 2.4507936507936512e-05, + "loss": 0.2811, + "step": 4890 + }, + { + "epoch": 27.94857142857143, + "grad_norm": 35.4206428527832, + "learning_rate": 2.4501587301587304e-05, + "loss": 0.2498, + "step": 4891 + }, + { + "epoch": 27.954285714285714, + "grad_norm": 56.23537826538086, + "learning_rate": 2.4495238095238097e-05, + "loss": 0.2603, + "step": 4892 + }, + { + "epoch": 27.96, + "grad_norm": 92.81861114501953, + "learning_rate": 2.448888888888889e-05, + "loss": 0.304, + "step": 4893 + }, + { + "epoch": 27.965714285714284, + "grad_norm": 48.27447509765625, + "learning_rate": 2.4482539682539686e-05, + "loss": 0.284, + "step": 4894 + }, + { + "epoch": 27.97142857142857, + "grad_norm": 49.307498931884766, + "learning_rate": 2.447619047619048e-05, + "loss": 0.296, + "step": 4895 + }, + { + "epoch": 27.97714285714286, + "grad_norm": 52.156822204589844, + "learning_rate": 2.446984126984127e-05, + "loss": 0.283, + "step": 4896 + }, + { + "epoch": 27.982857142857142, + "grad_norm": 37.40717315673828, + "learning_rate": 2.4463492063492064e-05, + "loss": 0.3139, + "step": 4897 + }, + { + "epoch": 27.98857142857143, + "grad_norm": 29.659826278686523, + "learning_rate": 2.445714285714286e-05, + "loss": 0.2711, + "step": 4898 + }, + { + "epoch": 27.994285714285713, + "grad_norm": 84.78431701660156, + "learning_rate": 2.4450793650793652e-05, + "loss": 0.2837, + "step": 4899 + }, + { + "epoch": 28.0, + "grad_norm": 43.500694274902344, + "learning_rate": 2.4444444444444445e-05, + "loss": 0.2073, + "step": 4900 + }, + { + "epoch": 28.0, + "eval_classes": 0, + "eval_loss": 0.6610374450683594, + "eval_map": 0.9033, + "eval_map_50": 0.9511, + "eval_map_75": 0.9406, + "eval_map_large": 0.9038, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9033, + "eval_map_small": -1.0, + "eval_mar_1": 0.7727, + "eval_mar_10": 0.9635, + "eval_mar_100": 0.9676, + "eval_mar_100_per_class": 0.9676, + "eval_mar_large": 0.9676, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 14.5982, + "eval_samples_per_second": 20.139, + "eval_steps_per_second": 2.535, + "step": 4900 + }, + { + "epoch": 28.005714285714287, + "grad_norm": 25.908740997314453, + "learning_rate": 2.443809523809524e-05, + "loss": 0.2257, + "step": 4901 + }, + { + "epoch": 28.01142857142857, + "grad_norm": 45.944671630859375, + "learning_rate": 2.4431746031746034e-05, + "loss": 0.302, + "step": 4902 + }, + { + "epoch": 28.017142857142858, + "grad_norm": 33.82231903076172, + "learning_rate": 2.4425396825396826e-05, + "loss": 0.2747, + "step": 4903 + }, + { + "epoch": 28.02285714285714, + "grad_norm": 40.96854019165039, + "learning_rate": 2.441904761904762e-05, + "loss": 0.3091, + "step": 4904 + }, + { + "epoch": 28.02857142857143, + "grad_norm": 66.51483154296875, + "learning_rate": 2.4412698412698415e-05, + "loss": 0.3371, + "step": 4905 + }, + { + "epoch": 28.034285714285716, + "grad_norm": 73.85417175292969, + "learning_rate": 2.4406349206349208e-05, + "loss": 0.339, + "step": 4906 + }, + { + "epoch": 28.04, + "grad_norm": 92.99559783935547, + "learning_rate": 2.44e-05, + "loss": 0.4866, + "step": 4907 + }, + { + "epoch": 28.045714285714286, + "grad_norm": 90.50003814697266, + "learning_rate": 2.4393650793650793e-05, + "loss": 0.2672, + "step": 4908 + }, + { + "epoch": 28.05142857142857, + "grad_norm": 245.05653381347656, + "learning_rate": 2.438730158730159e-05, + "loss": 0.2963, + "step": 4909 + }, + { + "epoch": 28.057142857142857, + "grad_norm": 45.6023063659668, + "learning_rate": 2.438095238095238e-05, + "loss": 0.2084, + "step": 4910 + }, + { + "epoch": 28.062857142857144, + "grad_norm": 36.97957992553711, + "learning_rate": 2.4374603174603174e-05, + "loss": 0.3108, + "step": 4911 + }, + { + "epoch": 28.068571428571428, + "grad_norm": 62.7725830078125, + "learning_rate": 2.436825396825397e-05, + "loss": 0.3325, + "step": 4912 + }, + { + "epoch": 28.074285714285715, + "grad_norm": 163.5262908935547, + "learning_rate": 2.4361904761904763e-05, + "loss": 0.185, + "step": 4913 + }, + { + "epoch": 28.08, + "grad_norm": 46.41025161743164, + "learning_rate": 2.4355555555555555e-05, + "loss": 0.3088, + "step": 4914 + }, + { + "epoch": 28.085714285714285, + "grad_norm": 131.6428680419922, + "learning_rate": 2.4349206349206348e-05, + "loss": 0.2327, + "step": 4915 + }, + { + "epoch": 28.091428571428573, + "grad_norm": 67.80531311035156, + "learning_rate": 2.4342857142857144e-05, + "loss": 0.4335, + "step": 4916 + }, + { + "epoch": 28.097142857142856, + "grad_norm": 174.15245056152344, + "learning_rate": 2.4336507936507937e-05, + "loss": 0.2031, + "step": 4917 + }, + { + "epoch": 28.102857142857143, + "grad_norm": 39.03656768798828, + "learning_rate": 2.433015873015873e-05, + "loss": 0.1855, + "step": 4918 + }, + { + "epoch": 28.10857142857143, + "grad_norm": 265.5483703613281, + "learning_rate": 2.4323809523809525e-05, + "loss": 0.2976, + "step": 4919 + }, + { + "epoch": 28.114285714285714, + "grad_norm": 35.574119567871094, + "learning_rate": 2.4317460317460318e-05, + "loss": 0.2286, + "step": 4920 + }, + { + "epoch": 28.12, + "grad_norm": 25.896102905273438, + "learning_rate": 2.431111111111111e-05, + "loss": 0.2065, + "step": 4921 + }, + { + "epoch": 28.125714285714285, + "grad_norm": 60.17288589477539, + "learning_rate": 2.4304761904761907e-05, + "loss": 0.233, + "step": 4922 + }, + { + "epoch": 28.13142857142857, + "grad_norm": 61.1347541809082, + "learning_rate": 2.42984126984127e-05, + "loss": 0.2688, + "step": 4923 + }, + { + "epoch": 28.13714285714286, + "grad_norm": 24.536046981811523, + "learning_rate": 2.4292063492063495e-05, + "loss": 0.2074, + "step": 4924 + }, + { + "epoch": 28.142857142857142, + "grad_norm": 48.766456604003906, + "learning_rate": 2.4285714285714288e-05, + "loss": 0.1893, + "step": 4925 + }, + { + "epoch": 28.14857142857143, + "grad_norm": 77.26349639892578, + "learning_rate": 2.427936507936508e-05, + "loss": 0.2512, + "step": 4926 + }, + { + "epoch": 28.154285714285713, + "grad_norm": 37.16670608520508, + "learning_rate": 2.4273015873015877e-05, + "loss": 0.248, + "step": 4927 + }, + { + "epoch": 28.16, + "grad_norm": 34.46220016479492, + "learning_rate": 2.426666666666667e-05, + "loss": 0.2669, + "step": 4928 + }, + { + "epoch": 28.165714285714287, + "grad_norm": 367.3511962890625, + "learning_rate": 2.4260317460317462e-05, + "loss": 0.2103, + "step": 4929 + }, + { + "epoch": 28.17142857142857, + "grad_norm": 38.58537292480469, + "learning_rate": 2.4253968253968255e-05, + "loss": 0.2382, + "step": 4930 + }, + { + "epoch": 28.177142857142858, + "grad_norm": 58.70418167114258, + "learning_rate": 2.424761904761905e-05, + "loss": 0.329, + "step": 4931 + }, + { + "epoch": 28.18285714285714, + "grad_norm": 561.0505981445312, + "learning_rate": 2.4241269841269843e-05, + "loss": 0.3132, + "step": 4932 + }, + { + "epoch": 28.18857142857143, + "grad_norm": 12.026835441589355, + "learning_rate": 2.4234920634920636e-05, + "loss": 0.2756, + "step": 4933 + }, + { + "epoch": 28.194285714285716, + "grad_norm": 80.02447509765625, + "learning_rate": 2.4228571428571432e-05, + "loss": 0.2718, + "step": 4934 + }, + { + "epoch": 28.2, + "grad_norm": 71.53707885742188, + "learning_rate": 2.4222222222222224e-05, + "loss": 0.3472, + "step": 4935 + }, + { + "epoch": 28.205714285714286, + "grad_norm": 41.7224235534668, + "learning_rate": 2.4215873015873017e-05, + "loss": 0.3375, + "step": 4936 + }, + { + "epoch": 28.21142857142857, + "grad_norm": 33.52546310424805, + "learning_rate": 2.420952380952381e-05, + "loss": 0.227, + "step": 4937 + }, + { + "epoch": 28.217142857142857, + "grad_norm": 52.87554168701172, + "learning_rate": 2.4203174603174606e-05, + "loss": 0.3054, + "step": 4938 + }, + { + "epoch": 28.222857142857144, + "grad_norm": 21.825029373168945, + "learning_rate": 2.41968253968254e-05, + "loss": 0.3198, + "step": 4939 + }, + { + "epoch": 28.228571428571428, + "grad_norm": 53.98521041870117, + "learning_rate": 2.419047619047619e-05, + "loss": 0.2784, + "step": 4940 + }, + { + "epoch": 28.234285714285715, + "grad_norm": 47.195152282714844, + "learning_rate": 2.4184126984126984e-05, + "loss": 0.2392, + "step": 4941 + }, + { + "epoch": 28.24, + "grad_norm": 46.09539031982422, + "learning_rate": 2.417777777777778e-05, + "loss": 0.4016, + "step": 4942 + }, + { + "epoch": 28.245714285714286, + "grad_norm": 32.95344543457031, + "learning_rate": 2.4171428571428572e-05, + "loss": 0.2238, + "step": 4943 + }, + { + "epoch": 28.251428571428573, + "grad_norm": 44.7630500793457, + "learning_rate": 2.4165079365079365e-05, + "loss": 0.2587, + "step": 4944 + }, + { + "epoch": 28.257142857142856, + "grad_norm": 28.06485939025879, + "learning_rate": 2.415873015873016e-05, + "loss": 0.2784, + "step": 4945 + }, + { + "epoch": 28.262857142857143, + "grad_norm": 24.325355529785156, + "learning_rate": 2.4152380952380954e-05, + "loss": 0.3475, + "step": 4946 + }, + { + "epoch": 28.268571428571427, + "grad_norm": 75.55956268310547, + "learning_rate": 2.4146031746031746e-05, + "loss": 0.3422, + "step": 4947 + }, + { + "epoch": 28.274285714285714, + "grad_norm": 50.63954162597656, + "learning_rate": 2.413968253968254e-05, + "loss": 0.3834, + "step": 4948 + }, + { + "epoch": 28.28, + "grad_norm": 62.6453857421875, + "learning_rate": 2.4133333333333335e-05, + "loss": 0.2165, + "step": 4949 + }, + { + "epoch": 28.285714285714285, + "grad_norm": 24.08328628540039, + "learning_rate": 2.4126984126984128e-05, + "loss": 0.2537, + "step": 4950 + }, + { + "epoch": 28.291428571428572, + "grad_norm": 69.02284240722656, + "learning_rate": 2.412063492063492e-05, + "loss": 0.3462, + "step": 4951 + }, + { + "epoch": 28.29714285714286, + "grad_norm": 51.43291473388672, + "learning_rate": 2.4114285714285713e-05, + "loss": 0.2596, + "step": 4952 + }, + { + "epoch": 28.302857142857142, + "grad_norm": 30.58786964416504, + "learning_rate": 2.410793650793651e-05, + "loss": 0.3577, + "step": 4953 + }, + { + "epoch": 28.30857142857143, + "grad_norm": 278.1788330078125, + "learning_rate": 2.41015873015873e-05, + "loss": 0.249, + "step": 4954 + }, + { + "epoch": 28.314285714285713, + "grad_norm": 100.6025390625, + "learning_rate": 2.4095238095238094e-05, + "loss": 0.389, + "step": 4955 + }, + { + "epoch": 28.32, + "grad_norm": 37.11484146118164, + "learning_rate": 2.408888888888889e-05, + "loss": 0.2978, + "step": 4956 + }, + { + "epoch": 28.325714285714287, + "grad_norm": 30.713354110717773, + "learning_rate": 2.4082539682539683e-05, + "loss": 0.2905, + "step": 4957 + }, + { + "epoch": 28.33142857142857, + "grad_norm": 41.22504425048828, + "learning_rate": 2.407619047619048e-05, + "loss": 0.3737, + "step": 4958 + }, + { + "epoch": 28.337142857142858, + "grad_norm": 33.79350280761719, + "learning_rate": 2.406984126984127e-05, + "loss": 0.3313, + "step": 4959 + }, + { + "epoch": 28.34285714285714, + "grad_norm": 29.72348403930664, + "learning_rate": 2.4063492063492064e-05, + "loss": 0.3027, + "step": 4960 + }, + { + "epoch": 28.34857142857143, + "grad_norm": 81.20671081542969, + "learning_rate": 2.405714285714286e-05, + "loss": 0.2993, + "step": 4961 + }, + { + "epoch": 28.354285714285716, + "grad_norm": 53.03126525878906, + "learning_rate": 2.4050793650793653e-05, + "loss": 0.3504, + "step": 4962 + }, + { + "epoch": 28.36, + "grad_norm": 36.0410041809082, + "learning_rate": 2.4044444444444445e-05, + "loss": 0.231, + "step": 4963 + }, + { + "epoch": 28.365714285714287, + "grad_norm": 44.50819396972656, + "learning_rate": 2.403809523809524e-05, + "loss": 0.2163, + "step": 4964 + }, + { + "epoch": 28.37142857142857, + "grad_norm": 49.101707458496094, + "learning_rate": 2.4031746031746034e-05, + "loss": 0.2293, + "step": 4965 + }, + { + "epoch": 28.377142857142857, + "grad_norm": 36.19911575317383, + "learning_rate": 2.4025396825396827e-05, + "loss": 0.2537, + "step": 4966 + }, + { + "epoch": 28.382857142857144, + "grad_norm": 87.96337890625, + "learning_rate": 2.4019047619047623e-05, + "loss": 0.2773, + "step": 4967 + }, + { + "epoch": 28.388571428571428, + "grad_norm": 32.72904586791992, + "learning_rate": 2.4012698412698415e-05, + "loss": 0.3971, + "step": 4968 + }, + { + "epoch": 28.394285714285715, + "grad_norm": 36.359352111816406, + "learning_rate": 2.4006349206349208e-05, + "loss": 0.2817, + "step": 4969 + }, + { + "epoch": 28.4, + "grad_norm": 36.13983917236328, + "learning_rate": 2.4e-05, + "loss": 0.235, + "step": 4970 + }, + { + "epoch": 28.405714285714286, + "grad_norm": 32.429683685302734, + "learning_rate": 2.3993650793650797e-05, + "loss": 0.2048, + "step": 4971 + }, + { + "epoch": 28.411428571428573, + "grad_norm": 285.6448974609375, + "learning_rate": 2.398730158730159e-05, + "loss": 0.275, + "step": 4972 + }, + { + "epoch": 28.417142857142856, + "grad_norm": 24.912200927734375, + "learning_rate": 2.3980952380952382e-05, + "loss": 0.3147, + "step": 4973 + }, + { + "epoch": 28.422857142857143, + "grad_norm": 42.56909942626953, + "learning_rate": 2.3974603174603175e-05, + "loss": 0.2346, + "step": 4974 + }, + { + "epoch": 28.428571428571427, + "grad_norm": 22.605735778808594, + "learning_rate": 2.396825396825397e-05, + "loss": 0.3097, + "step": 4975 + }, + { + "epoch": 28.434285714285714, + "grad_norm": 29.61728286743164, + "learning_rate": 2.3961904761904763e-05, + "loss": 0.2481, + "step": 4976 + }, + { + "epoch": 28.44, + "grad_norm": 81.6629638671875, + "learning_rate": 2.3955555555555556e-05, + "loss": 0.2466, + "step": 4977 + }, + { + "epoch": 28.445714285714285, + "grad_norm": 54.739471435546875, + "learning_rate": 2.394920634920635e-05, + "loss": 0.3339, + "step": 4978 + }, + { + "epoch": 28.451428571428572, + "grad_norm": 41.38652420043945, + "learning_rate": 2.3942857142857144e-05, + "loss": 0.2028, + "step": 4979 + }, + { + "epoch": 28.457142857142856, + "grad_norm": 33.170894622802734, + "learning_rate": 2.3936507936507937e-05, + "loss": 0.2143, + "step": 4980 + }, + { + "epoch": 28.462857142857143, + "grad_norm": 39.60905456542969, + "learning_rate": 2.393015873015873e-05, + "loss": 0.2176, + "step": 4981 + }, + { + "epoch": 28.46857142857143, + "grad_norm": 36.18077850341797, + "learning_rate": 2.3923809523809526e-05, + "loss": 0.402, + "step": 4982 + }, + { + "epoch": 28.474285714285713, + "grad_norm": 29.43824005126953, + "learning_rate": 2.391746031746032e-05, + "loss": 0.2801, + "step": 4983 + }, + { + "epoch": 28.48, + "grad_norm": 109.75287628173828, + "learning_rate": 2.391111111111111e-05, + "loss": 0.2771, + "step": 4984 + }, + { + "epoch": 28.485714285714284, + "grad_norm": 51.55800247192383, + "learning_rate": 2.3904761904761904e-05, + "loss": 0.2601, + "step": 4985 + }, + { + "epoch": 28.49142857142857, + "grad_norm": 46.78274154663086, + "learning_rate": 2.38984126984127e-05, + "loss": 0.2106, + "step": 4986 + }, + { + "epoch": 28.497142857142858, + "grad_norm": 81.14583587646484, + "learning_rate": 2.3892063492063492e-05, + "loss": 0.2288, + "step": 4987 + }, + { + "epoch": 28.502857142857142, + "grad_norm": 421.8680114746094, + "learning_rate": 2.3885714285714285e-05, + "loss": 0.3071, + "step": 4988 + }, + { + "epoch": 28.50857142857143, + "grad_norm": 43.99879455566406, + "learning_rate": 2.3879365079365078e-05, + "loss": 0.2079, + "step": 4989 + }, + { + "epoch": 28.514285714285712, + "grad_norm": 50.826026916503906, + "learning_rate": 2.3873015873015874e-05, + "loss": 0.2747, + "step": 4990 + }, + { + "epoch": 28.52, + "grad_norm": 129.66259765625, + "learning_rate": 2.3866666666666666e-05, + "loss": 0.3396, + "step": 4991 + }, + { + "epoch": 28.525714285714287, + "grad_norm": 36.57542037963867, + "learning_rate": 2.3860317460317462e-05, + "loss": 0.2043, + "step": 4992 + }, + { + "epoch": 28.53142857142857, + "grad_norm": 26.30794334411621, + "learning_rate": 2.3853968253968255e-05, + "loss": 0.1988, + "step": 4993 + }, + { + "epoch": 28.537142857142857, + "grad_norm": 851.5133056640625, + "learning_rate": 2.3847619047619048e-05, + "loss": 0.2133, + "step": 4994 + }, + { + "epoch": 28.542857142857144, + "grad_norm": 84.00727844238281, + "learning_rate": 2.3841269841269844e-05, + "loss": 0.3999, + "step": 4995 + }, + { + "epoch": 28.548571428571428, + "grad_norm": 49.521583557128906, + "learning_rate": 2.3834920634920636e-05, + "loss": 0.2687, + "step": 4996 + }, + { + "epoch": 28.554285714285715, + "grad_norm": 38.404232025146484, + "learning_rate": 2.3828571428571432e-05, + "loss": 0.3111, + "step": 4997 + }, + { + "epoch": 28.56, + "grad_norm": 81.65354919433594, + "learning_rate": 2.3822222222222225e-05, + "loss": 0.3128, + "step": 4998 + }, + { + "epoch": 28.565714285714286, + "grad_norm": 139.422607421875, + "learning_rate": 2.3815873015873018e-05, + "loss": 0.3024, + "step": 4999 + }, + { + "epoch": 28.571428571428573, + "grad_norm": 64.09517669677734, + "learning_rate": 2.380952380952381e-05, + "loss": 0.2531, + "step": 5000 + }, + { + "epoch": 28.577142857142857, + "grad_norm": 725.398681640625, + "learning_rate": 2.3803174603174606e-05, + "loss": 0.3155, + "step": 5001 + }, + { + "epoch": 28.582857142857144, + "grad_norm": 62.54762268066406, + "learning_rate": 2.37968253968254e-05, + "loss": 0.2865, + "step": 5002 + }, + { + "epoch": 28.588571428571427, + "grad_norm": 52.35954284667969, + "learning_rate": 2.379047619047619e-05, + "loss": 0.1661, + "step": 5003 + }, + { + "epoch": 28.594285714285714, + "grad_norm": 21.684301376342773, + "learning_rate": 2.3784126984126988e-05, + "loss": 0.1934, + "step": 5004 + }, + { + "epoch": 28.6, + "grad_norm": 68.8770751953125, + "learning_rate": 2.377777777777778e-05, + "loss": 0.2781, + "step": 5005 + }, + { + "epoch": 28.605714285714285, + "grad_norm": 55.42689514160156, + "learning_rate": 2.3771428571428573e-05, + "loss": 0.1953, + "step": 5006 + }, + { + "epoch": 28.611428571428572, + "grad_norm": 60.84716796875, + "learning_rate": 2.3765079365079365e-05, + "loss": 0.2181, + "step": 5007 + }, + { + "epoch": 28.617142857142856, + "grad_norm": 39.095890045166016, + "learning_rate": 2.375873015873016e-05, + "loss": 0.2337, + "step": 5008 + }, + { + "epoch": 28.622857142857143, + "grad_norm": 43.01543426513672, + "learning_rate": 2.3752380952380954e-05, + "loss": 0.304, + "step": 5009 + }, + { + "epoch": 28.62857142857143, + "grad_norm": 45.74905776977539, + "learning_rate": 2.3746031746031747e-05, + "loss": 0.2976, + "step": 5010 + }, + { + "epoch": 28.634285714285713, + "grad_norm": 34.543521881103516, + "learning_rate": 2.373968253968254e-05, + "loss": 0.2022, + "step": 5011 + }, + { + "epoch": 28.64, + "grad_norm": 35.3597526550293, + "learning_rate": 2.3733333333333335e-05, + "loss": 0.4038, + "step": 5012 + }, + { + "epoch": 28.645714285714284, + "grad_norm": 133.12742614746094, + "learning_rate": 2.3726984126984128e-05, + "loss": 0.3324, + "step": 5013 + }, + { + "epoch": 28.65142857142857, + "grad_norm": 49.119483947753906, + "learning_rate": 2.372063492063492e-05, + "loss": 0.2856, + "step": 5014 + }, + { + "epoch": 28.65714285714286, + "grad_norm": 34.71833038330078, + "learning_rate": 2.3714285714285717e-05, + "loss": 0.1847, + "step": 5015 + }, + { + "epoch": 28.662857142857142, + "grad_norm": 32.97146987915039, + "learning_rate": 2.370793650793651e-05, + "loss": 0.235, + "step": 5016 + }, + { + "epoch": 28.66857142857143, + "grad_norm": 304.8446960449219, + "learning_rate": 2.3701587301587302e-05, + "loss": 0.2851, + "step": 5017 + }, + { + "epoch": 28.674285714285713, + "grad_norm": 37.80881881713867, + "learning_rate": 2.3695238095238095e-05, + "loss": 0.2828, + "step": 5018 + }, + { + "epoch": 28.68, + "grad_norm": 64.08672332763672, + "learning_rate": 2.368888888888889e-05, + "loss": 0.2789, + "step": 5019 + }, + { + "epoch": 28.685714285714287, + "grad_norm": 47.080753326416016, + "learning_rate": 2.3682539682539683e-05, + "loss": 0.3367, + "step": 5020 + }, + { + "epoch": 28.69142857142857, + "grad_norm": 248.08181762695312, + "learning_rate": 2.3676190476190476e-05, + "loss": 0.369, + "step": 5021 + }, + { + "epoch": 28.697142857142858, + "grad_norm": 58.87960433959961, + "learning_rate": 2.366984126984127e-05, + "loss": 0.3662, + "step": 5022 + }, + { + "epoch": 28.70285714285714, + "grad_norm": 69.46220397949219, + "learning_rate": 2.3663492063492065e-05, + "loss": 0.2592, + "step": 5023 + }, + { + "epoch": 28.708571428571428, + "grad_norm": 30.98453140258789, + "learning_rate": 2.3657142857142857e-05, + "loss": 0.2933, + "step": 5024 + }, + { + "epoch": 28.714285714285715, + "grad_norm": 54.25831604003906, + "learning_rate": 2.365079365079365e-05, + "loss": 0.3409, + "step": 5025 + }, + { + "epoch": 28.72, + "grad_norm": 41.06281280517578, + "learning_rate": 2.3644444444444446e-05, + "loss": 0.2852, + "step": 5026 + }, + { + "epoch": 28.725714285714286, + "grad_norm": 17.57217788696289, + "learning_rate": 2.363809523809524e-05, + "loss": 0.2236, + "step": 5027 + }, + { + "epoch": 28.731428571428573, + "grad_norm": 57.584651947021484, + "learning_rate": 2.363174603174603e-05, + "loss": 0.2076, + "step": 5028 + }, + { + "epoch": 28.737142857142857, + "grad_norm": 36.87367248535156, + "learning_rate": 2.3625396825396827e-05, + "loss": 0.2198, + "step": 5029 + }, + { + "epoch": 28.742857142857144, + "grad_norm": 134.8672332763672, + "learning_rate": 2.361904761904762e-05, + "loss": 0.3409, + "step": 5030 + }, + { + "epoch": 28.748571428571427, + "grad_norm": 102.65425872802734, + "learning_rate": 2.3612698412698416e-05, + "loss": 0.2485, + "step": 5031 + }, + { + "epoch": 28.754285714285714, + "grad_norm": 43.4425163269043, + "learning_rate": 2.360634920634921e-05, + "loss": 0.2658, + "step": 5032 + }, + { + "epoch": 28.76, + "grad_norm": 72.87287902832031, + "learning_rate": 2.36e-05, + "loss": 0.307, + "step": 5033 + }, + { + "epoch": 28.765714285714285, + "grad_norm": 70.9742431640625, + "learning_rate": 2.3593650793650797e-05, + "loss": 0.2784, + "step": 5034 + }, + { + "epoch": 28.771428571428572, + "grad_norm": 21.395336151123047, + "learning_rate": 2.358730158730159e-05, + "loss": 0.2039, + "step": 5035 + }, + { + "epoch": 28.777142857142856, + "grad_norm": 38.85607147216797, + "learning_rate": 2.3580952380952382e-05, + "loss": 0.2805, + "step": 5036 + }, + { + "epoch": 28.782857142857143, + "grad_norm": 35.8581657409668, + "learning_rate": 2.357460317460318e-05, + "loss": 0.3868, + "step": 5037 + }, + { + "epoch": 28.78857142857143, + "grad_norm": 36.439613342285156, + "learning_rate": 2.356825396825397e-05, + "loss": 0.2175, + "step": 5038 + }, + { + "epoch": 28.794285714285714, + "grad_norm": 40.9402961730957, + "learning_rate": 2.3561904761904764e-05, + "loss": 0.2621, + "step": 5039 + }, + { + "epoch": 28.8, + "grad_norm": 31.58989715576172, + "learning_rate": 2.3555555555555556e-05, + "loss": 0.2656, + "step": 5040 + }, + { + "epoch": 28.805714285714284, + "grad_norm": 28.273897171020508, + "learning_rate": 2.3549206349206352e-05, + "loss": 0.3005, + "step": 5041 + }, + { + "epoch": 28.81142857142857, + "grad_norm": 31.848737716674805, + "learning_rate": 2.3542857142857145e-05, + "loss": 0.2608, + "step": 5042 + }, + { + "epoch": 28.81714285714286, + "grad_norm": 26.318984985351562, + "learning_rate": 2.3536507936507938e-05, + "loss": 0.2807, + "step": 5043 + }, + { + "epoch": 28.822857142857142, + "grad_norm": 27.732038497924805, + "learning_rate": 2.353015873015873e-05, + "loss": 0.3175, + "step": 5044 + }, + { + "epoch": 28.82857142857143, + "grad_norm": 159.66786193847656, + "learning_rate": 2.3523809523809526e-05, + "loss": 0.2746, + "step": 5045 + }, + { + "epoch": 28.834285714285713, + "grad_norm": 30.27684783935547, + "learning_rate": 2.351746031746032e-05, + "loss": 0.31, + "step": 5046 + }, + { + "epoch": 28.84, + "grad_norm": 179.499755859375, + "learning_rate": 2.351111111111111e-05, + "loss": 0.2647, + "step": 5047 + }, + { + "epoch": 28.845714285714287, + "grad_norm": 37.019981384277344, + "learning_rate": 2.3504761904761908e-05, + "loss": 0.2639, + "step": 5048 + }, + { + "epoch": 28.85142857142857, + "grad_norm": 25.458894729614258, + "learning_rate": 2.34984126984127e-05, + "loss": 0.2163, + "step": 5049 + }, + { + "epoch": 28.857142857142858, + "grad_norm": 28.230152130126953, + "learning_rate": 2.3492063492063493e-05, + "loss": 0.2093, + "step": 5050 + }, + { + "epoch": 28.86285714285714, + "grad_norm": 60.67851257324219, + "learning_rate": 2.3485714285714285e-05, + "loss": 0.3026, + "step": 5051 + }, + { + "epoch": 28.86857142857143, + "grad_norm": 17.1420955657959, + "learning_rate": 2.347936507936508e-05, + "loss": 0.2783, + "step": 5052 + }, + { + "epoch": 28.874285714285715, + "grad_norm": 23.672748565673828, + "learning_rate": 2.3473015873015874e-05, + "loss": 0.2812, + "step": 5053 + }, + { + "epoch": 28.88, + "grad_norm": 41.617652893066406, + "learning_rate": 2.3466666666666667e-05, + "loss": 0.2867, + "step": 5054 + }, + { + "epoch": 28.885714285714286, + "grad_norm": 256.7421875, + "learning_rate": 2.346031746031746e-05, + "loss": 0.3205, + "step": 5055 + }, + { + "epoch": 28.89142857142857, + "grad_norm": 70.80400085449219, + "learning_rate": 2.3453968253968255e-05, + "loss": 0.3938, + "step": 5056 + }, + { + "epoch": 28.897142857142857, + "grad_norm": 35.83601379394531, + "learning_rate": 2.3447619047619048e-05, + "loss": 0.215, + "step": 5057 + }, + { + "epoch": 28.902857142857144, + "grad_norm": 64.71305084228516, + "learning_rate": 2.344126984126984e-05, + "loss": 0.2704, + "step": 5058 + }, + { + "epoch": 28.908571428571427, + "grad_norm": 2356.586181640625, + "learning_rate": 2.3434920634920637e-05, + "loss": 0.3223, + "step": 5059 + }, + { + "epoch": 28.914285714285715, + "grad_norm": 122.84352111816406, + "learning_rate": 2.342857142857143e-05, + "loss": 0.2599, + "step": 5060 + }, + { + "epoch": 28.92, + "grad_norm": 47.86798858642578, + "learning_rate": 2.3422222222222222e-05, + "loss": 0.4071, + "step": 5061 + }, + { + "epoch": 28.925714285714285, + "grad_norm": 75.26738739013672, + "learning_rate": 2.3415873015873015e-05, + "loss": 0.2682, + "step": 5062 + }, + { + "epoch": 28.931428571428572, + "grad_norm": 46.6579704284668, + "learning_rate": 2.340952380952381e-05, + "loss": 0.3293, + "step": 5063 + }, + { + "epoch": 28.937142857142856, + "grad_norm": 56.65286636352539, + "learning_rate": 2.3403174603174603e-05, + "loss": 0.2709, + "step": 5064 + }, + { + "epoch": 28.942857142857143, + "grad_norm": 42.67585372924805, + "learning_rate": 2.3396825396825396e-05, + "loss": 0.2689, + "step": 5065 + }, + { + "epoch": 28.94857142857143, + "grad_norm": 74.5224838256836, + "learning_rate": 2.3390476190476192e-05, + "loss": 0.3419, + "step": 5066 + }, + { + "epoch": 28.954285714285714, + "grad_norm": 54.574241638183594, + "learning_rate": 2.3384126984126985e-05, + "loss": 0.2683, + "step": 5067 + }, + { + "epoch": 28.96, + "grad_norm": 56.5332145690918, + "learning_rate": 2.337777777777778e-05, + "loss": 0.3041, + "step": 5068 + }, + { + "epoch": 28.965714285714284, + "grad_norm": 26.72911262512207, + "learning_rate": 2.3371428571428573e-05, + "loss": 0.259, + "step": 5069 + }, + { + "epoch": 28.97142857142857, + "grad_norm": 55.12527084350586, + "learning_rate": 2.336507936507937e-05, + "loss": 0.2944, + "step": 5070 + }, + { + "epoch": 28.97714285714286, + "grad_norm": 1067.4193115234375, + "learning_rate": 2.3358730158730162e-05, + "loss": 0.2764, + "step": 5071 + }, + { + "epoch": 28.982857142857142, + "grad_norm": 32.64695739746094, + "learning_rate": 2.3352380952380955e-05, + "loss": 0.2097, + "step": 5072 + }, + { + "epoch": 28.98857142857143, + "grad_norm": 53.39537048339844, + "learning_rate": 2.3346031746031747e-05, + "loss": 0.1862, + "step": 5073 + }, + { + "epoch": 28.994285714285713, + "grad_norm": 46.71490478515625, + "learning_rate": 2.3339682539682543e-05, + "loss": 0.2427, + "step": 5074 + }, + { + "epoch": 29.0, + "grad_norm": 23.4241886138916, + "learning_rate": 2.3333333333333336e-05, + "loss": 0.2403, + "step": 5075 + }, + { + "epoch": 29.0, + "eval_classes": 0, + "eval_loss": 0.6370882987976074, + "eval_map": 0.9197, + "eval_map_50": 0.96, + "eval_map_75": 0.9496, + "eval_map_large": 0.9198, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9197, + "eval_map_small": -1.0, + "eval_mar_1": 0.7803, + "eval_mar_10": 0.9714, + "eval_mar_100": 0.9737, + "eval_mar_100_per_class": 0.9737, + "eval_mar_large": 0.9737, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.2092, + "eval_samples_per_second": 22.257, + "eval_steps_per_second": 2.801, + "step": 5075 + }, + { + "epoch": 29.005714285714287, + "grad_norm": 62.46238327026367, + "learning_rate": 2.332698412698413e-05, + "loss": 0.2222, + "step": 5076 + }, + { + "epoch": 29.01142857142857, + "grad_norm": 28.127099990844727, + "learning_rate": 2.332063492063492e-05, + "loss": 0.2054, + "step": 5077 + }, + { + "epoch": 29.017142857142858, + "grad_norm": 21.029979705810547, + "learning_rate": 2.3314285714285717e-05, + "loss": 0.2697, + "step": 5078 + }, + { + "epoch": 29.02285714285714, + "grad_norm": 34.36430740356445, + "learning_rate": 2.330793650793651e-05, + "loss": 0.3093, + "step": 5079 + }, + { + "epoch": 29.02857142857143, + "grad_norm": 53.51395034790039, + "learning_rate": 2.3301587301587302e-05, + "loss": 0.2443, + "step": 5080 + }, + { + "epoch": 29.034285714285716, + "grad_norm": 153.634521484375, + "learning_rate": 2.3295238095238095e-05, + "loss": 0.2986, + "step": 5081 + }, + { + "epoch": 29.04, + "grad_norm": 27.302949905395508, + "learning_rate": 2.328888888888889e-05, + "loss": 0.178, + "step": 5082 + }, + { + "epoch": 29.045714285714286, + "grad_norm": 99.04785919189453, + "learning_rate": 2.3282539682539684e-05, + "loss": 0.3069, + "step": 5083 + }, + { + "epoch": 29.05142857142857, + "grad_norm": 26.07029914855957, + "learning_rate": 2.3276190476190476e-05, + "loss": 0.1895, + "step": 5084 + }, + { + "epoch": 29.057142857142857, + "grad_norm": 41.6248664855957, + "learning_rate": 2.3269841269841272e-05, + "loss": 0.5042, + "step": 5085 + }, + { + "epoch": 29.062857142857144, + "grad_norm": 36.2380485534668, + "learning_rate": 2.3263492063492065e-05, + "loss": 0.1864, + "step": 5086 + }, + { + "epoch": 29.068571428571428, + "grad_norm": 164.3536376953125, + "learning_rate": 2.3257142857142858e-05, + "loss": 0.1958, + "step": 5087 + }, + { + "epoch": 29.074285714285715, + "grad_norm": 51.7596549987793, + "learning_rate": 2.325079365079365e-05, + "loss": 0.2627, + "step": 5088 + }, + { + "epoch": 29.08, + "grad_norm": 48.146697998046875, + "learning_rate": 2.3244444444444446e-05, + "loss": 0.2056, + "step": 5089 + }, + { + "epoch": 29.085714285714285, + "grad_norm": 104.46647644042969, + "learning_rate": 2.323809523809524e-05, + "loss": 0.2248, + "step": 5090 + }, + { + "epoch": 29.091428571428573, + "grad_norm": 54.64583206176758, + "learning_rate": 2.323174603174603e-05, + "loss": 0.2388, + "step": 5091 + }, + { + "epoch": 29.097142857142856, + "grad_norm": 41.416770935058594, + "learning_rate": 2.3225396825396824e-05, + "loss": 0.2371, + "step": 5092 + }, + { + "epoch": 29.102857142857143, + "grad_norm": 49.938232421875, + "learning_rate": 2.321904761904762e-05, + "loss": 0.2131, + "step": 5093 + }, + { + "epoch": 29.10857142857143, + "grad_norm": 49.01100158691406, + "learning_rate": 2.3212698412698413e-05, + "loss": 0.252, + "step": 5094 + }, + { + "epoch": 29.114285714285714, + "grad_norm": 50.47200012207031, + "learning_rate": 2.3206349206349205e-05, + "loss": 0.2117, + "step": 5095 + }, + { + "epoch": 29.12, + "grad_norm": 28.55891227722168, + "learning_rate": 2.32e-05, + "loss": 0.2452, + "step": 5096 + }, + { + "epoch": 29.125714285714285, + "grad_norm": 63.32829666137695, + "learning_rate": 2.3193650793650794e-05, + "loss": 0.2514, + "step": 5097 + }, + { + "epoch": 29.13142857142857, + "grad_norm": 48.11371994018555, + "learning_rate": 2.3187301587301587e-05, + "loss": 0.1662, + "step": 5098 + }, + { + "epoch": 29.13714285714286, + "grad_norm": 71.61554718017578, + "learning_rate": 2.318095238095238e-05, + "loss": 0.2542, + "step": 5099 + }, + { + "epoch": 29.142857142857142, + "grad_norm": 40.359100341796875, + "learning_rate": 2.3174603174603175e-05, + "loss": 0.2401, + "step": 5100 + }, + { + "epoch": 29.14857142857143, + "grad_norm": 65.17253112792969, + "learning_rate": 2.3168253968253968e-05, + "loss": 0.2548, + "step": 5101 + }, + { + "epoch": 29.154285714285713, + "grad_norm": 28.41598129272461, + "learning_rate": 2.3161904761904764e-05, + "loss": 0.3215, + "step": 5102 + }, + { + "epoch": 29.16, + "grad_norm": 52.9326286315918, + "learning_rate": 2.3155555555555557e-05, + "loss": 0.2229, + "step": 5103 + }, + { + "epoch": 29.165714285714287, + "grad_norm": 53.673404693603516, + "learning_rate": 2.314920634920635e-05, + "loss": 0.2258, + "step": 5104 + }, + { + "epoch": 29.17142857142857, + "grad_norm": 73.0558090209961, + "learning_rate": 2.3142857142857145e-05, + "loss": 0.2291, + "step": 5105 + }, + { + "epoch": 29.177142857142858, + "grad_norm": 91.16373443603516, + "learning_rate": 2.3136507936507938e-05, + "loss": 0.249, + "step": 5106 + }, + { + "epoch": 29.18285714285714, + "grad_norm": 50.14714813232422, + "learning_rate": 2.3130158730158734e-05, + "loss": 0.2104, + "step": 5107 + }, + { + "epoch": 29.18857142857143, + "grad_norm": 26.575700759887695, + "learning_rate": 2.3123809523809527e-05, + "loss": 0.1729, + "step": 5108 + }, + { + "epoch": 29.194285714285716, + "grad_norm": 20.4130916595459, + "learning_rate": 2.311746031746032e-05, + "loss": 0.2506, + "step": 5109 + }, + { + "epoch": 29.2, + "grad_norm": 50.21626663208008, + "learning_rate": 2.3111111111111112e-05, + "loss": 0.2938, + "step": 5110 + }, + { + "epoch": 29.205714285714286, + "grad_norm": 76.35237121582031, + "learning_rate": 2.3104761904761908e-05, + "loss": 0.1744, + "step": 5111 + }, + { + "epoch": 29.21142857142857, + "grad_norm": 56.98535919189453, + "learning_rate": 2.30984126984127e-05, + "loss": 0.308, + "step": 5112 + }, + { + "epoch": 29.217142857142857, + "grad_norm": 25.252614974975586, + "learning_rate": 2.3092063492063493e-05, + "loss": 0.2666, + "step": 5113 + }, + { + "epoch": 29.222857142857144, + "grad_norm": 92.06158447265625, + "learning_rate": 2.3085714285714286e-05, + "loss": 0.2411, + "step": 5114 + }, + { + "epoch": 29.228571428571428, + "grad_norm": 34.30237579345703, + "learning_rate": 2.3079365079365082e-05, + "loss": 0.2165, + "step": 5115 + }, + { + "epoch": 29.234285714285715, + "grad_norm": 46.90911865234375, + "learning_rate": 2.3073015873015875e-05, + "loss": 0.2352, + "step": 5116 + }, + { + "epoch": 29.24, + "grad_norm": 51.809452056884766, + "learning_rate": 2.3066666666666667e-05, + "loss": 0.1986, + "step": 5117 + }, + { + "epoch": 29.245714285714286, + "grad_norm": 65.37468719482422, + "learning_rate": 2.3060317460317463e-05, + "loss": 0.3478, + "step": 5118 + }, + { + "epoch": 29.251428571428573, + "grad_norm": 64.15426635742188, + "learning_rate": 2.3053968253968256e-05, + "loss": 0.1796, + "step": 5119 + }, + { + "epoch": 29.257142857142856, + "grad_norm": 28.12679100036621, + "learning_rate": 2.304761904761905e-05, + "loss": 0.2485, + "step": 5120 + }, + { + "epoch": 29.262857142857143, + "grad_norm": 66.80747985839844, + "learning_rate": 2.304126984126984e-05, + "loss": 0.4543, + "step": 5121 + }, + { + "epoch": 29.268571428571427, + "grad_norm": 271.1167297363281, + "learning_rate": 2.3034920634920637e-05, + "loss": 0.3339, + "step": 5122 + }, + { + "epoch": 29.274285714285714, + "grad_norm": 40.10055160522461, + "learning_rate": 2.302857142857143e-05, + "loss": 0.2452, + "step": 5123 + }, + { + "epoch": 29.28, + "grad_norm": 84.37879180908203, + "learning_rate": 2.3022222222222222e-05, + "loss": 0.3137, + "step": 5124 + }, + { + "epoch": 29.285714285714285, + "grad_norm": 73.4004898071289, + "learning_rate": 2.3015873015873015e-05, + "loss": 0.4139, + "step": 5125 + }, + { + "epoch": 29.291428571428572, + "grad_norm": 53.298362731933594, + "learning_rate": 2.300952380952381e-05, + "loss": 0.4182, + "step": 5126 + }, + { + "epoch": 29.29714285714286, + "grad_norm": 46.19538879394531, + "learning_rate": 2.3003174603174604e-05, + "loss": 0.2312, + "step": 5127 + }, + { + "epoch": 29.302857142857142, + "grad_norm": 27.84966278076172, + "learning_rate": 2.2996825396825396e-05, + "loss": 0.295, + "step": 5128 + }, + { + "epoch": 29.30857142857143, + "grad_norm": 29.424104690551758, + "learning_rate": 2.2990476190476192e-05, + "loss": 0.1956, + "step": 5129 + }, + { + "epoch": 29.314285714285713, + "grad_norm": 52.50281524658203, + "learning_rate": 2.2984126984126985e-05, + "loss": 0.2747, + "step": 5130 + }, + { + "epoch": 29.32, + "grad_norm": 22.580923080444336, + "learning_rate": 2.2977777777777778e-05, + "loss": 0.2007, + "step": 5131 + }, + { + "epoch": 29.325714285714287, + "grad_norm": 71.85227966308594, + "learning_rate": 2.297142857142857e-05, + "loss": 0.2285, + "step": 5132 + }, + { + "epoch": 29.33142857142857, + "grad_norm": 32.25730514526367, + "learning_rate": 2.2965079365079366e-05, + "loss": 0.2697, + "step": 5133 + }, + { + "epoch": 29.337142857142858, + "grad_norm": 73.0450210571289, + "learning_rate": 2.295873015873016e-05, + "loss": 0.2458, + "step": 5134 + }, + { + "epoch": 29.34285714285714, + "grad_norm": 29.31700325012207, + "learning_rate": 2.295238095238095e-05, + "loss": 0.2876, + "step": 5135 + }, + { + "epoch": 29.34857142857143, + "grad_norm": 47.174705505371094, + "learning_rate": 2.2946031746031744e-05, + "loss": 0.2977, + "step": 5136 + }, + { + "epoch": 29.354285714285716, + "grad_norm": 46.58511734008789, + "learning_rate": 2.293968253968254e-05, + "loss": 0.211, + "step": 5137 + }, + { + "epoch": 29.36, + "grad_norm": 625.805908203125, + "learning_rate": 2.2933333333333333e-05, + "loss": 0.3245, + "step": 5138 + }, + { + "epoch": 29.365714285714287, + "grad_norm": 286.7671813964844, + "learning_rate": 2.292698412698413e-05, + "loss": 0.2697, + "step": 5139 + }, + { + "epoch": 29.37142857142857, + "grad_norm": 64.6842041015625, + "learning_rate": 2.292063492063492e-05, + "loss": 0.402, + "step": 5140 + }, + { + "epoch": 29.377142857142857, + "grad_norm": 17.133848190307617, + "learning_rate": 2.2914285714285718e-05, + "loss": 0.2594, + "step": 5141 + }, + { + "epoch": 29.382857142857144, + "grad_norm": 22.38144302368164, + "learning_rate": 2.290793650793651e-05, + "loss": 0.3667, + "step": 5142 + }, + { + "epoch": 29.388571428571428, + "grad_norm": 1273.5078125, + "learning_rate": 2.2901587301587303e-05, + "loss": 0.3604, + "step": 5143 + }, + { + "epoch": 29.394285714285715, + "grad_norm": 188.72727966308594, + "learning_rate": 2.28952380952381e-05, + "loss": 0.2807, + "step": 5144 + }, + { + "epoch": 29.4, + "grad_norm": 29.680089950561523, + "learning_rate": 2.288888888888889e-05, + "loss": 0.2622, + "step": 5145 + }, + { + "epoch": 29.405714285714286, + "grad_norm": 87.98640441894531, + "learning_rate": 2.2882539682539684e-05, + "loss": 0.3601, + "step": 5146 + }, + { + "epoch": 29.411428571428573, + "grad_norm": 108.64775085449219, + "learning_rate": 2.2876190476190477e-05, + "loss": 0.4437, + "step": 5147 + }, + { + "epoch": 29.417142857142856, + "grad_norm": 25.7600040435791, + "learning_rate": 2.2869841269841273e-05, + "loss": 0.241, + "step": 5148 + }, + { + "epoch": 29.422857142857143, + "grad_norm": 23.64268684387207, + "learning_rate": 2.2863492063492065e-05, + "loss": 0.2116, + "step": 5149 + }, + { + "epoch": 29.428571428571427, + "grad_norm": 426.9701232910156, + "learning_rate": 2.2857142857142858e-05, + "loss": 0.2912, + "step": 5150 + }, + { + "epoch": 29.434285714285714, + "grad_norm": 51.756980895996094, + "learning_rate": 2.2850793650793654e-05, + "loss": 0.2115, + "step": 5151 + }, + { + "epoch": 29.44, + "grad_norm": 26.336313247680664, + "learning_rate": 2.2844444444444447e-05, + "loss": 0.1632, + "step": 5152 + }, + { + "epoch": 29.445714285714285, + "grad_norm": 91.51924896240234, + "learning_rate": 2.283809523809524e-05, + "loss": 0.2748, + "step": 5153 + }, + { + "epoch": 29.451428571428572, + "grad_norm": 60.591732025146484, + "learning_rate": 2.2831746031746032e-05, + "loss": 0.2213, + "step": 5154 + }, + { + "epoch": 29.457142857142856, + "grad_norm": 83.90245819091797, + "learning_rate": 2.2825396825396828e-05, + "loss": 0.2334, + "step": 5155 + }, + { + "epoch": 29.462857142857143, + "grad_norm": 51.06547927856445, + "learning_rate": 2.281904761904762e-05, + "loss": 0.2837, + "step": 5156 + }, + { + "epoch": 29.46857142857143, + "grad_norm": 66.79014587402344, + "learning_rate": 2.2812698412698413e-05, + "loss": 0.3387, + "step": 5157 + }, + { + "epoch": 29.474285714285713, + "grad_norm": 36.439510345458984, + "learning_rate": 2.2806349206349206e-05, + "loss": 0.2813, + "step": 5158 + }, + { + "epoch": 29.48, + "grad_norm": 51.553646087646484, + "learning_rate": 2.2800000000000002e-05, + "loss": 0.3421, + "step": 5159 + }, + { + "epoch": 29.485714285714284, + "grad_norm": 33.91830825805664, + "learning_rate": 2.2793650793650795e-05, + "loss": 0.2587, + "step": 5160 + }, + { + "epoch": 29.49142857142857, + "grad_norm": 47.36806106567383, + "learning_rate": 2.2787301587301587e-05, + "loss": 0.1622, + "step": 5161 + }, + { + "epoch": 29.497142857142858, + "grad_norm": 63.804405212402344, + "learning_rate": 2.2780952380952383e-05, + "loss": 0.2065, + "step": 5162 + }, + { + "epoch": 29.502857142857142, + "grad_norm": 147.74241638183594, + "learning_rate": 2.2774603174603176e-05, + "loss": 0.2365, + "step": 5163 + }, + { + "epoch": 29.50857142857143, + "grad_norm": 33.29631423950195, + "learning_rate": 2.276825396825397e-05, + "loss": 0.2639, + "step": 5164 + }, + { + "epoch": 29.514285714285712, + "grad_norm": 43.6353759765625, + "learning_rate": 2.276190476190476e-05, + "loss": 0.2283, + "step": 5165 + }, + { + "epoch": 29.52, + "grad_norm": 96.89825439453125, + "learning_rate": 2.2755555555555557e-05, + "loss": 0.2651, + "step": 5166 + }, + { + "epoch": 29.525714285714287, + "grad_norm": 72.50611114501953, + "learning_rate": 2.274920634920635e-05, + "loss": 0.2528, + "step": 5167 + }, + { + "epoch": 29.53142857142857, + "grad_norm": 40.47755813598633, + "learning_rate": 2.2742857142857142e-05, + "loss": 0.2446, + "step": 5168 + }, + { + "epoch": 29.537142857142857, + "grad_norm": 35.787445068359375, + "learning_rate": 2.2736507936507935e-05, + "loss": 0.3317, + "step": 5169 + }, + { + "epoch": 29.542857142857144, + "grad_norm": 62.62869644165039, + "learning_rate": 2.273015873015873e-05, + "loss": 0.2892, + "step": 5170 + }, + { + "epoch": 29.548571428571428, + "grad_norm": 65.2412338256836, + "learning_rate": 2.2723809523809524e-05, + "loss": 0.3481, + "step": 5171 + }, + { + "epoch": 29.554285714285715, + "grad_norm": 37.17427062988281, + "learning_rate": 2.2717460317460316e-05, + "loss": 0.3309, + "step": 5172 + }, + { + "epoch": 29.56, + "grad_norm": 34.58698654174805, + "learning_rate": 2.2711111111111112e-05, + "loss": 0.2199, + "step": 5173 + }, + { + "epoch": 29.565714285714286, + "grad_norm": 46.027435302734375, + "learning_rate": 2.2704761904761905e-05, + "loss": 0.3143, + "step": 5174 + }, + { + "epoch": 29.571428571428573, + "grad_norm": 68.04759216308594, + "learning_rate": 2.2698412698412698e-05, + "loss": 0.2608, + "step": 5175 + }, + { + "epoch": 29.577142857142857, + "grad_norm": 82.01683807373047, + "learning_rate": 2.2692063492063494e-05, + "loss": 0.2682, + "step": 5176 + }, + { + "epoch": 29.582857142857144, + "grad_norm": 40.78395080566406, + "learning_rate": 2.2685714285714286e-05, + "loss": 0.2364, + "step": 5177 + }, + { + "epoch": 29.588571428571427, + "grad_norm": 69.27226257324219, + "learning_rate": 2.2679365079365082e-05, + "loss": 0.2444, + "step": 5178 + }, + { + "epoch": 29.594285714285714, + "grad_norm": 59.43379211425781, + "learning_rate": 2.2673015873015875e-05, + "loss": 0.2642, + "step": 5179 + }, + { + "epoch": 29.6, + "grad_norm": 100.76307678222656, + "learning_rate": 2.2666666666666668e-05, + "loss": 0.4777, + "step": 5180 + }, + { + "epoch": 29.605714285714285, + "grad_norm": 19.478042602539062, + "learning_rate": 2.2660317460317464e-05, + "loss": 0.2047, + "step": 5181 + }, + { + "epoch": 29.611428571428572, + "grad_norm": 58.325008392333984, + "learning_rate": 2.2653968253968256e-05, + "loss": 0.1958, + "step": 5182 + }, + { + "epoch": 29.617142857142856, + "grad_norm": 45.153602600097656, + "learning_rate": 2.264761904761905e-05, + "loss": 0.2607, + "step": 5183 + }, + { + "epoch": 29.622857142857143, + "grad_norm": 75.98762512207031, + "learning_rate": 2.2641269841269845e-05, + "loss": 0.317, + "step": 5184 + }, + { + "epoch": 29.62857142857143, + "grad_norm": 34.11820602416992, + "learning_rate": 2.2634920634920638e-05, + "loss": 0.2393, + "step": 5185 + }, + { + "epoch": 29.634285714285713, + "grad_norm": 77.3637466430664, + "learning_rate": 2.262857142857143e-05, + "loss": 0.2647, + "step": 5186 + }, + { + "epoch": 29.64, + "grad_norm": 69.17243957519531, + "learning_rate": 2.2622222222222223e-05, + "loss": 0.2659, + "step": 5187 + }, + { + "epoch": 29.645714285714284, + "grad_norm": 54.292015075683594, + "learning_rate": 2.261587301587302e-05, + "loss": 0.2226, + "step": 5188 + }, + { + "epoch": 29.65142857142857, + "grad_norm": 46.84638214111328, + "learning_rate": 2.260952380952381e-05, + "loss": 0.2499, + "step": 5189 + }, + { + "epoch": 29.65714285714286, + "grad_norm": 44.82379150390625, + "learning_rate": 2.2603174603174604e-05, + "loss": 0.253, + "step": 5190 + }, + { + "epoch": 29.662857142857142, + "grad_norm": 675.0682373046875, + "learning_rate": 2.2596825396825397e-05, + "loss": 0.2743, + "step": 5191 + }, + { + "epoch": 29.66857142857143, + "grad_norm": 65.07813262939453, + "learning_rate": 2.2590476190476193e-05, + "loss": 0.2476, + "step": 5192 + }, + { + "epoch": 29.674285714285713, + "grad_norm": 62.987884521484375, + "learning_rate": 2.2584126984126985e-05, + "loss": 0.2061, + "step": 5193 + }, + { + "epoch": 29.68, + "grad_norm": 41.408973693847656, + "learning_rate": 2.2577777777777778e-05, + "loss": 0.2269, + "step": 5194 + }, + { + "epoch": 29.685714285714287, + "grad_norm": 60.94583511352539, + "learning_rate": 2.257142857142857e-05, + "loss": 0.2377, + "step": 5195 + }, + { + "epoch": 29.69142857142857, + "grad_norm": 41.704437255859375, + "learning_rate": 2.2565079365079367e-05, + "loss": 0.2191, + "step": 5196 + }, + { + "epoch": 29.697142857142858, + "grad_norm": 26.715286254882812, + "learning_rate": 2.255873015873016e-05, + "loss": 0.3247, + "step": 5197 + }, + { + "epoch": 29.70285714285714, + "grad_norm": 44.722381591796875, + "learning_rate": 2.2552380952380952e-05, + "loss": 0.2171, + "step": 5198 + }, + { + "epoch": 29.708571428571428, + "grad_norm": 28.22305679321289, + "learning_rate": 2.2546031746031748e-05, + "loss": 0.2707, + "step": 5199 + }, + { + "epoch": 29.714285714285715, + "grad_norm": 39.18876266479492, + "learning_rate": 2.253968253968254e-05, + "loss": 0.329, + "step": 5200 + }, + { + "epoch": 29.72, + "grad_norm": 304.74371337890625, + "learning_rate": 2.2533333333333333e-05, + "loss": 0.2566, + "step": 5201 + }, + { + "epoch": 29.725714285714286, + "grad_norm": 40.92671203613281, + "learning_rate": 2.2526984126984126e-05, + "loss": 0.2433, + "step": 5202 + }, + { + "epoch": 29.731428571428573, + "grad_norm": 31.22627830505371, + "learning_rate": 2.2520634920634922e-05, + "loss": 0.2306, + "step": 5203 + }, + { + "epoch": 29.737142857142857, + "grad_norm": 22.06302833557129, + "learning_rate": 2.2514285714285715e-05, + "loss": 0.3377, + "step": 5204 + }, + { + "epoch": 29.742857142857144, + "grad_norm": 84.18092346191406, + "learning_rate": 2.2507936507936507e-05, + "loss": 0.3718, + "step": 5205 + }, + { + "epoch": 29.748571428571427, + "grad_norm": 19.86476707458496, + "learning_rate": 2.25015873015873e-05, + "loss": 0.285, + "step": 5206 + }, + { + "epoch": 29.754285714285714, + "grad_norm": 32.38277053833008, + "learning_rate": 2.2495238095238096e-05, + "loss": 0.3177, + "step": 5207 + }, + { + "epoch": 29.76, + "grad_norm": 28.177043914794922, + "learning_rate": 2.248888888888889e-05, + "loss": 0.2743, + "step": 5208 + }, + { + "epoch": 29.765714285714285, + "grad_norm": 72.46790313720703, + "learning_rate": 2.248253968253968e-05, + "loss": 0.2154, + "step": 5209 + }, + { + "epoch": 29.771428571428572, + "grad_norm": 32.09682083129883, + "learning_rate": 2.2476190476190477e-05, + "loss": 0.1605, + "step": 5210 + }, + { + "epoch": 29.777142857142856, + "grad_norm": 32.612701416015625, + "learning_rate": 2.246984126984127e-05, + "loss": 0.1922, + "step": 5211 + }, + { + "epoch": 29.782857142857143, + "grad_norm": 23.944807052612305, + "learning_rate": 2.2463492063492066e-05, + "loss": 0.2776, + "step": 5212 + }, + { + "epoch": 29.78857142857143, + "grad_norm": 771.417236328125, + "learning_rate": 2.245714285714286e-05, + "loss": 0.3039, + "step": 5213 + }, + { + "epoch": 29.794285714285714, + "grad_norm": 67.97403717041016, + "learning_rate": 2.2450793650793655e-05, + "loss": 0.3022, + "step": 5214 + }, + { + "epoch": 29.8, + "grad_norm": 278.3428955078125, + "learning_rate": 2.2444444444444447e-05, + "loss": 0.2148, + "step": 5215 + }, + { + "epoch": 29.805714285714284, + "grad_norm": 131.3963623046875, + "learning_rate": 2.243809523809524e-05, + "loss": 0.2432, + "step": 5216 + }, + { + "epoch": 29.81142857142857, + "grad_norm": 36.8978271484375, + "learning_rate": 2.2431746031746032e-05, + "loss": 0.2134, + "step": 5217 + }, + { + "epoch": 29.81714285714286, + "grad_norm": 21.065776824951172, + "learning_rate": 2.242539682539683e-05, + "loss": 0.2727, + "step": 5218 + }, + { + "epoch": 29.822857142857142, + "grad_norm": 24.778491973876953, + "learning_rate": 2.241904761904762e-05, + "loss": 0.2979, + "step": 5219 + }, + { + "epoch": 29.82857142857143, + "grad_norm": 30.23557472229004, + "learning_rate": 2.2412698412698414e-05, + "loss": 0.2456, + "step": 5220 + }, + { + "epoch": 29.834285714285713, + "grad_norm": 59.7723388671875, + "learning_rate": 2.240634920634921e-05, + "loss": 0.2358, + "step": 5221 + }, + { + "epoch": 29.84, + "grad_norm": 22.08249282836914, + "learning_rate": 2.2400000000000002e-05, + "loss": 0.1699, + "step": 5222 + }, + { + "epoch": 29.845714285714287, + "grad_norm": 156.52500915527344, + "learning_rate": 2.2393650793650795e-05, + "loss": 0.2667, + "step": 5223 + }, + { + "epoch": 29.85142857142857, + "grad_norm": 26.32693862915039, + "learning_rate": 2.2387301587301588e-05, + "loss": 0.4322, + "step": 5224 + }, + { + "epoch": 29.857142857142858, + "grad_norm": 56.269779205322266, + "learning_rate": 2.2380952380952384e-05, + "loss": 0.2021, + "step": 5225 + }, + { + "epoch": 29.86285714285714, + "grad_norm": 27.565061569213867, + "learning_rate": 2.2374603174603176e-05, + "loss": 0.3126, + "step": 5226 + }, + { + "epoch": 29.86857142857143, + "grad_norm": 303.22210693359375, + "learning_rate": 2.236825396825397e-05, + "loss": 0.2455, + "step": 5227 + }, + { + "epoch": 29.874285714285715, + "grad_norm": 112.21163177490234, + "learning_rate": 2.236190476190476e-05, + "loss": 0.3081, + "step": 5228 + }, + { + "epoch": 29.88, + "grad_norm": 50.15737533569336, + "learning_rate": 2.2355555555555558e-05, + "loss": 0.2953, + "step": 5229 + }, + { + "epoch": 29.885714285714286, + "grad_norm": 169.55072021484375, + "learning_rate": 2.234920634920635e-05, + "loss": 0.3248, + "step": 5230 + }, + { + "epoch": 29.89142857142857, + "grad_norm": 71.83511352539062, + "learning_rate": 2.2342857142857143e-05, + "loss": 0.2653, + "step": 5231 + }, + { + "epoch": 29.897142857142857, + "grad_norm": 51.96732711791992, + "learning_rate": 2.233650793650794e-05, + "loss": 0.2534, + "step": 5232 + }, + { + "epoch": 29.902857142857144, + "grad_norm": 38.780067443847656, + "learning_rate": 2.233015873015873e-05, + "loss": 0.2196, + "step": 5233 + }, + { + "epoch": 29.908571428571427, + "grad_norm": 19.927183151245117, + "learning_rate": 2.2323809523809524e-05, + "loss": 0.2076, + "step": 5234 + }, + { + "epoch": 29.914285714285715, + "grad_norm": 28.219009399414062, + "learning_rate": 2.2317460317460317e-05, + "loss": 0.2898, + "step": 5235 + }, + { + "epoch": 29.92, + "grad_norm": 31.704973220825195, + "learning_rate": 2.2311111111111113e-05, + "loss": 0.2539, + "step": 5236 + }, + { + "epoch": 29.925714285714285, + "grad_norm": 70.12614440917969, + "learning_rate": 2.2304761904761905e-05, + "loss": 0.2214, + "step": 5237 + }, + { + "epoch": 29.931428571428572, + "grad_norm": 23.82673454284668, + "learning_rate": 2.2298412698412698e-05, + "loss": 0.4413, + "step": 5238 + }, + { + "epoch": 29.937142857142856, + "grad_norm": 33.32120895385742, + "learning_rate": 2.229206349206349e-05, + "loss": 0.2039, + "step": 5239 + }, + { + "epoch": 29.942857142857143, + "grad_norm": 72.25456237792969, + "learning_rate": 2.2285714285714287e-05, + "loss": 0.267, + "step": 5240 + }, + { + "epoch": 29.94857142857143, + "grad_norm": 46.91978454589844, + "learning_rate": 2.227936507936508e-05, + "loss": 0.2319, + "step": 5241 + }, + { + "epoch": 29.954285714285714, + "grad_norm": 26.755491256713867, + "learning_rate": 2.2273015873015872e-05, + "loss": 0.1912, + "step": 5242 + }, + { + "epoch": 29.96, + "grad_norm": 77.56159973144531, + "learning_rate": 2.2266666666666668e-05, + "loss": 0.192, + "step": 5243 + }, + { + "epoch": 29.965714285714284, + "grad_norm": 65.45929718017578, + "learning_rate": 2.226031746031746e-05, + "loss": 0.2842, + "step": 5244 + }, + { + "epoch": 29.97142857142857, + "grad_norm": 46.151546478271484, + "learning_rate": 2.2253968253968253e-05, + "loss": 0.289, + "step": 5245 + }, + { + "epoch": 29.97714285714286, + "grad_norm": 61.48402786254883, + "learning_rate": 2.224761904761905e-05, + "loss": 0.2239, + "step": 5246 + }, + { + "epoch": 29.982857142857142, + "grad_norm": 42.47544479370117, + "learning_rate": 2.2241269841269842e-05, + "loss": 0.3292, + "step": 5247 + }, + { + "epoch": 29.98857142857143, + "grad_norm": 16.77855110168457, + "learning_rate": 2.2234920634920635e-05, + "loss": 0.2999, + "step": 5248 + }, + { + "epoch": 29.994285714285713, + "grad_norm": 27.23601531982422, + "learning_rate": 2.222857142857143e-05, + "loss": 0.2653, + "step": 5249 + }, + { + "epoch": 30.0, + "grad_norm": 66.7364730834961, + "learning_rate": 2.2222222222222223e-05, + "loss": 0.3852, + "step": 5250 + }, + { + "epoch": 30.0, + "eval_classes": 0, + "eval_loss": 0.6095895767211914, + "eval_map": 0.9311, + "eval_map_50": 0.9653, + "eval_map_75": 0.9597, + "eval_map_large": 0.9312, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9311, + "eval_map_small": -1.0, + "eval_mar_1": 0.7902, + "eval_mar_10": 0.9762, + "eval_mar_100": 0.9794, + "eval_mar_100_per_class": 0.9794, + "eval_mar_large": 0.9794, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.7648, + "eval_samples_per_second": 21.359, + "eval_steps_per_second": 2.688, + "step": 5250 + }, + { + "epoch": 30.005714285714287, + "grad_norm": 57.8994255065918, + "learning_rate": 2.221587301587302e-05, + "loss": 0.2644, + "step": 5251 + }, + { + "epoch": 30.01142857142857, + "grad_norm": 38.62663650512695, + "learning_rate": 2.2209523809523812e-05, + "loss": 0.3136, + "step": 5252 + }, + { + "epoch": 30.017142857142858, + "grad_norm": 44.23182678222656, + "learning_rate": 2.2203174603174605e-05, + "loss": 0.2108, + "step": 5253 + }, + { + "epoch": 30.02285714285714, + "grad_norm": 86.72383117675781, + "learning_rate": 2.21968253968254e-05, + "loss": 0.2607, + "step": 5254 + }, + { + "epoch": 30.02857142857143, + "grad_norm": 26.44148826599121, + "learning_rate": 2.2190476190476193e-05, + "loss": 0.2828, + "step": 5255 + }, + { + "epoch": 30.034285714285716, + "grad_norm": 54.37052536010742, + "learning_rate": 2.2184126984126986e-05, + "loss": 0.3152, + "step": 5256 + }, + { + "epoch": 30.04, + "grad_norm": 970.4421997070312, + "learning_rate": 2.217777777777778e-05, + "loss": 0.2307, + "step": 5257 + }, + { + "epoch": 30.045714285714286, + "grad_norm": 74.14012908935547, + "learning_rate": 2.2171428571428575e-05, + "loss": 0.2416, + "step": 5258 + }, + { + "epoch": 30.05142857142857, + "grad_norm": 65.60440063476562, + "learning_rate": 2.2165079365079367e-05, + "loss": 0.2617, + "step": 5259 + }, + { + "epoch": 30.057142857142857, + "grad_norm": 32.856510162353516, + "learning_rate": 2.215873015873016e-05, + "loss": 0.3224, + "step": 5260 + }, + { + "epoch": 30.062857142857144, + "grad_norm": 36.577796936035156, + "learning_rate": 2.2152380952380952e-05, + "loss": 0.2311, + "step": 5261 + }, + { + "epoch": 30.068571428571428, + "grad_norm": 37.82815933227539, + "learning_rate": 2.214603174603175e-05, + "loss": 0.255, + "step": 5262 + }, + { + "epoch": 30.074285714285715, + "grad_norm": 319.6885070800781, + "learning_rate": 2.213968253968254e-05, + "loss": 0.3182, + "step": 5263 + }, + { + "epoch": 30.08, + "grad_norm": 53.454898834228516, + "learning_rate": 2.2133333333333334e-05, + "loss": 0.2069, + "step": 5264 + }, + { + "epoch": 30.085714285714285, + "grad_norm": 52.656856536865234, + "learning_rate": 2.212698412698413e-05, + "loss": 0.268, + "step": 5265 + }, + { + "epoch": 30.091428571428573, + "grad_norm": 59.526424407958984, + "learning_rate": 2.2120634920634922e-05, + "loss": 0.3191, + "step": 5266 + }, + { + "epoch": 30.097142857142856, + "grad_norm": 95.74357604980469, + "learning_rate": 2.2114285714285715e-05, + "loss": 0.2975, + "step": 5267 + }, + { + "epoch": 30.102857142857143, + "grad_norm": 60.69863510131836, + "learning_rate": 2.2107936507936508e-05, + "loss": 0.3128, + "step": 5268 + }, + { + "epoch": 30.10857142857143, + "grad_norm": 403.5923767089844, + "learning_rate": 2.2101587301587304e-05, + "loss": 0.3187, + "step": 5269 + }, + { + "epoch": 30.114285714285714, + "grad_norm": 37.859458923339844, + "learning_rate": 2.2095238095238096e-05, + "loss": 0.2386, + "step": 5270 + }, + { + "epoch": 30.12, + "grad_norm": 43.76021957397461, + "learning_rate": 2.208888888888889e-05, + "loss": 0.2183, + "step": 5271 + }, + { + "epoch": 30.125714285714285, + "grad_norm": 60.64640808105469, + "learning_rate": 2.208253968253968e-05, + "loss": 0.2306, + "step": 5272 + }, + { + "epoch": 30.13142857142857, + "grad_norm": 55.451297760009766, + "learning_rate": 2.2076190476190478e-05, + "loss": 0.2518, + "step": 5273 + }, + { + "epoch": 30.13714285714286, + "grad_norm": 51.61478805541992, + "learning_rate": 2.206984126984127e-05, + "loss": 0.1972, + "step": 5274 + }, + { + "epoch": 30.142857142857142, + "grad_norm": 47.890682220458984, + "learning_rate": 2.2063492063492063e-05, + "loss": 0.1798, + "step": 5275 + }, + { + "epoch": 30.14857142857143, + "grad_norm": 86.82106018066406, + "learning_rate": 2.205714285714286e-05, + "loss": 0.2597, + "step": 5276 + }, + { + "epoch": 30.154285714285713, + "grad_norm": 26.615827560424805, + "learning_rate": 2.205079365079365e-05, + "loss": 0.2647, + "step": 5277 + }, + { + "epoch": 30.16, + "grad_norm": 28.28487205505371, + "learning_rate": 2.2044444444444444e-05, + "loss": 0.2985, + "step": 5278 + }, + { + "epoch": 30.165714285714287, + "grad_norm": 46.122684478759766, + "learning_rate": 2.2038095238095237e-05, + "loss": 0.3281, + "step": 5279 + }, + { + "epoch": 30.17142857142857, + "grad_norm": 19.797990798950195, + "learning_rate": 2.2031746031746033e-05, + "loss": 0.2221, + "step": 5280 + }, + { + "epoch": 30.177142857142858, + "grad_norm": 32.67826843261719, + "learning_rate": 2.2025396825396825e-05, + "loss": 0.1973, + "step": 5281 + }, + { + "epoch": 30.18285714285714, + "grad_norm": 42.12900924682617, + "learning_rate": 2.2019047619047618e-05, + "loss": 0.2547, + "step": 5282 + }, + { + "epoch": 30.18857142857143, + "grad_norm": 29.2160701751709, + "learning_rate": 2.2012698412698414e-05, + "loss": 0.2711, + "step": 5283 + }, + { + "epoch": 30.194285714285716, + "grad_norm": 43.39205551147461, + "learning_rate": 2.2006349206349207e-05, + "loss": 0.2193, + "step": 5284 + }, + { + "epoch": 30.2, + "grad_norm": 60.42385482788086, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.221, + "step": 5285 + }, + { + "epoch": 30.205714285714286, + "grad_norm": 31.63413429260254, + "learning_rate": 2.1993650793650795e-05, + "loss": 0.2456, + "step": 5286 + }, + { + "epoch": 30.21142857142857, + "grad_norm": 56.024932861328125, + "learning_rate": 2.1987301587301588e-05, + "loss": 0.2272, + "step": 5287 + }, + { + "epoch": 30.217142857142857, + "grad_norm": 40.43967819213867, + "learning_rate": 2.1980952380952384e-05, + "loss": 0.2552, + "step": 5288 + }, + { + "epoch": 30.222857142857144, + "grad_norm": 138.42617797851562, + "learning_rate": 2.1974603174603177e-05, + "loss": 0.3272, + "step": 5289 + }, + { + "epoch": 30.228571428571428, + "grad_norm": 52.62572479248047, + "learning_rate": 2.196825396825397e-05, + "loss": 0.1945, + "step": 5290 + }, + { + "epoch": 30.234285714285715, + "grad_norm": 25.761674880981445, + "learning_rate": 2.1961904761904765e-05, + "loss": 0.2854, + "step": 5291 + }, + { + "epoch": 30.24, + "grad_norm": 24.410228729248047, + "learning_rate": 2.1955555555555558e-05, + "loss": 0.1844, + "step": 5292 + }, + { + "epoch": 30.245714285714286, + "grad_norm": 24.37540626525879, + "learning_rate": 2.194920634920635e-05, + "loss": 0.3114, + "step": 5293 + }, + { + "epoch": 30.251428571428573, + "grad_norm": 32.545597076416016, + "learning_rate": 2.1942857142857143e-05, + "loss": 0.1947, + "step": 5294 + }, + { + "epoch": 30.257142857142856, + "grad_norm": 26.83466148376465, + "learning_rate": 2.193650793650794e-05, + "loss": 0.2757, + "step": 5295 + }, + { + "epoch": 30.262857142857143, + "grad_norm": 114.49763488769531, + "learning_rate": 2.1930158730158732e-05, + "loss": 0.2074, + "step": 5296 + }, + { + "epoch": 30.268571428571427, + "grad_norm": 80.31643676757812, + "learning_rate": 2.1923809523809525e-05, + "loss": 0.2124, + "step": 5297 + }, + { + "epoch": 30.274285714285714, + "grad_norm": 74.36065673828125, + "learning_rate": 2.1917460317460317e-05, + "loss": 0.2864, + "step": 5298 + }, + { + "epoch": 30.28, + "grad_norm": 36.70797348022461, + "learning_rate": 2.1911111111111113e-05, + "loss": 0.2341, + "step": 5299 + }, + { + "epoch": 30.285714285714285, + "grad_norm": 36.677345275878906, + "learning_rate": 2.1904761904761906e-05, + "loss": 0.2009, + "step": 5300 + }, + { + "epoch": 30.291428571428572, + "grad_norm": 33.610931396484375, + "learning_rate": 2.18984126984127e-05, + "loss": 0.3764, + "step": 5301 + }, + { + "epoch": 30.29714285714286, + "grad_norm": 102.84770202636719, + "learning_rate": 2.1892063492063495e-05, + "loss": 0.2748, + "step": 5302 + }, + { + "epoch": 30.302857142857142, + "grad_norm": 45.53736877441406, + "learning_rate": 2.1885714285714287e-05, + "loss": 0.2016, + "step": 5303 + }, + { + "epoch": 30.30857142857143, + "grad_norm": 43.9678840637207, + "learning_rate": 2.187936507936508e-05, + "loss": 0.1706, + "step": 5304 + }, + { + "epoch": 30.314285714285713, + "grad_norm": 230.81417846679688, + "learning_rate": 2.1873015873015872e-05, + "loss": 0.2759, + "step": 5305 + }, + { + "epoch": 30.32, + "grad_norm": 99.33362579345703, + "learning_rate": 2.186666666666667e-05, + "loss": 0.3033, + "step": 5306 + }, + { + "epoch": 30.325714285714287, + "grad_norm": 55.34583282470703, + "learning_rate": 2.186031746031746e-05, + "loss": 0.2376, + "step": 5307 + }, + { + "epoch": 30.33142857142857, + "grad_norm": 64.33992767333984, + "learning_rate": 2.1853968253968254e-05, + "loss": 0.3425, + "step": 5308 + }, + { + "epoch": 30.337142857142858, + "grad_norm": 33.70119857788086, + "learning_rate": 2.1847619047619046e-05, + "loss": 0.2414, + "step": 5309 + }, + { + "epoch": 30.34285714285714, + "grad_norm": 171.85140991210938, + "learning_rate": 2.1841269841269842e-05, + "loss": 0.2787, + "step": 5310 + }, + { + "epoch": 30.34857142857143, + "grad_norm": 20.956310272216797, + "learning_rate": 2.1834920634920635e-05, + "loss": 0.2725, + "step": 5311 + }, + { + "epoch": 30.354285714285716, + "grad_norm": 62.92211151123047, + "learning_rate": 2.1828571428571428e-05, + "loss": 0.1804, + "step": 5312 + }, + { + "epoch": 30.36, + "grad_norm": 48.061279296875, + "learning_rate": 2.1822222222222224e-05, + "loss": 0.2567, + "step": 5313 + }, + { + "epoch": 30.365714285714287, + "grad_norm": 70.20124053955078, + "learning_rate": 2.1815873015873016e-05, + "loss": 0.334, + "step": 5314 + }, + { + "epoch": 30.37142857142857, + "grad_norm": 226.6661834716797, + "learning_rate": 2.180952380952381e-05, + "loss": 0.2575, + "step": 5315 + }, + { + "epoch": 30.377142857142857, + "grad_norm": 242.7920684814453, + "learning_rate": 2.18031746031746e-05, + "loss": 0.2951, + "step": 5316 + }, + { + "epoch": 30.382857142857144, + "grad_norm": 64.47325134277344, + "learning_rate": 2.1796825396825398e-05, + "loss": 0.2664, + "step": 5317 + }, + { + "epoch": 30.388571428571428, + "grad_norm": 28.074567794799805, + "learning_rate": 2.179047619047619e-05, + "loss": 0.2415, + "step": 5318 + }, + { + "epoch": 30.394285714285715, + "grad_norm": 78.06990814208984, + "learning_rate": 2.1784126984126983e-05, + "loss": 0.2959, + "step": 5319 + }, + { + "epoch": 30.4, + "grad_norm": 35.36109924316406, + "learning_rate": 2.177777777777778e-05, + "loss": 0.3656, + "step": 5320 + }, + { + "epoch": 30.405714285714286, + "grad_norm": 31.605953216552734, + "learning_rate": 2.177142857142857e-05, + "loss": 0.2689, + "step": 5321 + }, + { + "epoch": 30.411428571428573, + "grad_norm": 109.99222564697266, + "learning_rate": 2.1765079365079368e-05, + "loss": 0.1857, + "step": 5322 + }, + { + "epoch": 30.417142857142856, + "grad_norm": 44.36388397216797, + "learning_rate": 2.175873015873016e-05, + "loss": 0.1954, + "step": 5323 + }, + { + "epoch": 30.422857142857143, + "grad_norm": 27.54041862487793, + "learning_rate": 2.1752380952380956e-05, + "loss": 0.1823, + "step": 5324 + }, + { + "epoch": 30.428571428571427, + "grad_norm": 66.09359741210938, + "learning_rate": 2.174603174603175e-05, + "loss": 0.1908, + "step": 5325 + }, + { + "epoch": 30.434285714285714, + "grad_norm": 27.425779342651367, + "learning_rate": 2.173968253968254e-05, + "loss": 0.221, + "step": 5326 + }, + { + "epoch": 30.44, + "grad_norm": 52.88359069824219, + "learning_rate": 2.1733333333333334e-05, + "loss": 0.2874, + "step": 5327 + }, + { + "epoch": 30.445714285714285, + "grad_norm": 41.36067581176758, + "learning_rate": 2.172698412698413e-05, + "loss": 0.2248, + "step": 5328 + }, + { + "epoch": 30.451428571428572, + "grad_norm": 62.25459671020508, + "learning_rate": 2.1720634920634923e-05, + "loss": 0.2223, + "step": 5329 + }, + { + "epoch": 30.457142857142856, + "grad_norm": 39.943809509277344, + "learning_rate": 2.1714285714285715e-05, + "loss": 0.3783, + "step": 5330 + }, + { + "epoch": 30.462857142857143, + "grad_norm": 26.524595260620117, + "learning_rate": 2.1707936507936508e-05, + "loss": 0.2734, + "step": 5331 + }, + { + "epoch": 30.46857142857143, + "grad_norm": 32.22235107421875, + "learning_rate": 2.1701587301587304e-05, + "loss": 0.243, + "step": 5332 + }, + { + "epoch": 30.474285714285713, + "grad_norm": 36.29214096069336, + "learning_rate": 2.1695238095238097e-05, + "loss": 0.2394, + "step": 5333 + }, + { + "epoch": 30.48, + "grad_norm": 186.89295959472656, + "learning_rate": 2.168888888888889e-05, + "loss": 0.2326, + "step": 5334 + }, + { + "epoch": 30.485714285714284, + "grad_norm": 51.586448669433594, + "learning_rate": 2.1682539682539685e-05, + "loss": 0.2229, + "step": 5335 + }, + { + "epoch": 30.49142857142857, + "grad_norm": 496.5753479003906, + "learning_rate": 2.1676190476190478e-05, + "loss": 0.2687, + "step": 5336 + }, + { + "epoch": 30.497142857142858, + "grad_norm": 41.751808166503906, + "learning_rate": 2.166984126984127e-05, + "loss": 0.3579, + "step": 5337 + }, + { + "epoch": 30.502857142857142, + "grad_norm": 45.87074661254883, + "learning_rate": 2.1663492063492063e-05, + "loss": 0.2715, + "step": 5338 + }, + { + "epoch": 30.50857142857143, + "grad_norm": 35.66616439819336, + "learning_rate": 2.165714285714286e-05, + "loss": 0.3016, + "step": 5339 + }, + { + "epoch": 30.514285714285712, + "grad_norm": 76.56392669677734, + "learning_rate": 2.1650793650793652e-05, + "loss": 0.2987, + "step": 5340 + }, + { + "epoch": 30.52, + "grad_norm": 25.273380279541016, + "learning_rate": 2.1644444444444445e-05, + "loss": 0.2906, + "step": 5341 + }, + { + "epoch": 30.525714285714287, + "grad_norm": 82.47328186035156, + "learning_rate": 2.1638095238095237e-05, + "loss": 0.2485, + "step": 5342 + }, + { + "epoch": 30.53142857142857, + "grad_norm": 17.97199821472168, + "learning_rate": 2.1631746031746033e-05, + "loss": 0.275, + "step": 5343 + }, + { + "epoch": 30.537142857142857, + "grad_norm": 78.71432495117188, + "learning_rate": 2.1625396825396826e-05, + "loss": 0.3009, + "step": 5344 + }, + { + "epoch": 30.542857142857144, + "grad_norm": 53.01747131347656, + "learning_rate": 2.161904761904762e-05, + "loss": 0.2443, + "step": 5345 + }, + { + "epoch": 30.548571428571428, + "grad_norm": 66.9712905883789, + "learning_rate": 2.1612698412698415e-05, + "loss": 0.2314, + "step": 5346 + }, + { + "epoch": 30.554285714285715, + "grad_norm": 32.60740280151367, + "learning_rate": 2.1606349206349207e-05, + "loss": 0.2436, + "step": 5347 + }, + { + "epoch": 30.56, + "grad_norm": 76.29956817626953, + "learning_rate": 2.16e-05, + "loss": 0.404, + "step": 5348 + }, + { + "epoch": 30.565714285714286, + "grad_norm": 84.5020523071289, + "learning_rate": 2.1593650793650793e-05, + "loss": 0.2227, + "step": 5349 + }, + { + "epoch": 30.571428571428573, + "grad_norm": 47.953948974609375, + "learning_rate": 2.158730158730159e-05, + "loss": 0.3307, + "step": 5350 + }, + { + "epoch": 30.577142857142857, + "grad_norm": 107.53913116455078, + "learning_rate": 2.158095238095238e-05, + "loss": 0.2171, + "step": 5351 + }, + { + "epoch": 30.582857142857144, + "grad_norm": 572.5935668945312, + "learning_rate": 2.1574603174603174e-05, + "loss": 0.2976, + "step": 5352 + }, + { + "epoch": 30.588571428571427, + "grad_norm": 990.3221435546875, + "learning_rate": 2.1568253968253966e-05, + "loss": 0.2402, + "step": 5353 + }, + { + "epoch": 30.594285714285714, + "grad_norm": 74.72994232177734, + "learning_rate": 2.1561904761904762e-05, + "loss": 0.2424, + "step": 5354 + }, + { + "epoch": 30.6, + "grad_norm": 46.14615249633789, + "learning_rate": 2.1555555555555555e-05, + "loss": 0.2754, + "step": 5355 + }, + { + "epoch": 30.605714285714285, + "grad_norm": 46.194210052490234, + "learning_rate": 2.154920634920635e-05, + "loss": 0.2838, + "step": 5356 + }, + { + "epoch": 30.611428571428572, + "grad_norm": 49.12913131713867, + "learning_rate": 2.1542857142857144e-05, + "loss": 0.3064, + "step": 5357 + }, + { + "epoch": 30.617142857142856, + "grad_norm": 52.44061279296875, + "learning_rate": 2.1536507936507936e-05, + "loss": 0.2395, + "step": 5358 + }, + { + "epoch": 30.622857142857143, + "grad_norm": 69.73971557617188, + "learning_rate": 2.1530158730158732e-05, + "loss": 0.3199, + "step": 5359 + }, + { + "epoch": 30.62857142857143, + "grad_norm": 50.69144058227539, + "learning_rate": 2.1523809523809525e-05, + "loss": 0.1757, + "step": 5360 + }, + { + "epoch": 30.634285714285713, + "grad_norm": 64.29901885986328, + "learning_rate": 2.151746031746032e-05, + "loss": 0.2406, + "step": 5361 + }, + { + "epoch": 30.64, + "grad_norm": 34.74845504760742, + "learning_rate": 2.1511111111111114e-05, + "loss": 0.2962, + "step": 5362 + }, + { + "epoch": 30.645714285714284, + "grad_norm": 62.44707107543945, + "learning_rate": 2.1504761904761906e-05, + "loss": 0.289, + "step": 5363 + }, + { + "epoch": 30.65142857142857, + "grad_norm": 81.79146575927734, + "learning_rate": 2.14984126984127e-05, + "loss": 0.2472, + "step": 5364 + }, + { + "epoch": 30.65714285714286, + "grad_norm": 53.31319808959961, + "learning_rate": 2.1492063492063495e-05, + "loss": 0.3685, + "step": 5365 + }, + { + "epoch": 30.662857142857142, + "grad_norm": 34.42056655883789, + "learning_rate": 2.1485714285714288e-05, + "loss": 0.1763, + "step": 5366 + }, + { + "epoch": 30.66857142857143, + "grad_norm": 34.83342361450195, + "learning_rate": 2.147936507936508e-05, + "loss": 0.3054, + "step": 5367 + }, + { + "epoch": 30.674285714285713, + "grad_norm": 72.38312530517578, + "learning_rate": 2.1473015873015876e-05, + "loss": 0.3662, + "step": 5368 + }, + { + "epoch": 30.68, + "grad_norm": 56.11985778808594, + "learning_rate": 2.146666666666667e-05, + "loss": 0.288, + "step": 5369 + }, + { + "epoch": 30.685714285714287, + "grad_norm": 38.910179138183594, + "learning_rate": 2.146031746031746e-05, + "loss": 0.2519, + "step": 5370 + }, + { + "epoch": 30.69142857142857, + "grad_norm": 19.08800506591797, + "learning_rate": 2.1453968253968254e-05, + "loss": 0.199, + "step": 5371 + }, + { + "epoch": 30.697142857142858, + "grad_norm": 35.26356506347656, + "learning_rate": 2.144761904761905e-05, + "loss": 0.2699, + "step": 5372 + }, + { + "epoch": 30.70285714285714, + "grad_norm": 26.942726135253906, + "learning_rate": 2.1441269841269843e-05, + "loss": 0.2765, + "step": 5373 + }, + { + "epoch": 30.708571428571428, + "grad_norm": 84.79847717285156, + "learning_rate": 2.1434920634920636e-05, + "loss": 0.3339, + "step": 5374 + }, + { + "epoch": 30.714285714285715, + "grad_norm": 582.523681640625, + "learning_rate": 2.1428571428571428e-05, + "loss": 0.3743, + "step": 5375 + }, + { + "epoch": 30.72, + "grad_norm": 23.32284927368164, + "learning_rate": 2.1422222222222224e-05, + "loss": 0.2225, + "step": 5376 + }, + { + "epoch": 30.725714285714286, + "grad_norm": 154.95120239257812, + "learning_rate": 2.1415873015873017e-05, + "loss": 0.1966, + "step": 5377 + }, + { + "epoch": 30.731428571428573, + "grad_norm": 152.61624145507812, + "learning_rate": 2.140952380952381e-05, + "loss": 0.3489, + "step": 5378 + }, + { + "epoch": 30.737142857142857, + "grad_norm": 117.03369140625, + "learning_rate": 2.1403174603174605e-05, + "loss": 0.3226, + "step": 5379 + }, + { + "epoch": 30.742857142857144, + "grad_norm": 68.2706298828125, + "learning_rate": 2.1396825396825398e-05, + "loss": 0.4486, + "step": 5380 + }, + { + "epoch": 30.748571428571427, + "grad_norm": 37.4405517578125, + "learning_rate": 2.139047619047619e-05, + "loss": 0.299, + "step": 5381 + }, + { + "epoch": 30.754285714285714, + "grad_norm": 52.34811019897461, + "learning_rate": 2.1384126984126983e-05, + "loss": 0.2035, + "step": 5382 + }, + { + "epoch": 30.76, + "grad_norm": 52.2584114074707, + "learning_rate": 2.137777777777778e-05, + "loss": 0.2441, + "step": 5383 + }, + { + "epoch": 30.765714285714285, + "grad_norm": 1242.759521484375, + "learning_rate": 2.1371428571428572e-05, + "loss": 0.3314, + "step": 5384 + }, + { + "epoch": 30.771428571428572, + "grad_norm": 22.513540267944336, + "learning_rate": 2.1365079365079365e-05, + "loss": 0.214, + "step": 5385 + }, + { + "epoch": 30.777142857142856, + "grad_norm": 159.3976287841797, + "learning_rate": 2.1358730158730157e-05, + "loss": 0.4593, + "step": 5386 + }, + { + "epoch": 30.782857142857143, + "grad_norm": 22.903493881225586, + "learning_rate": 2.1352380952380953e-05, + "loss": 0.3298, + "step": 5387 + }, + { + "epoch": 30.78857142857143, + "grad_norm": 38.81813049316406, + "learning_rate": 2.1346031746031746e-05, + "loss": 0.2337, + "step": 5388 + }, + { + "epoch": 30.794285714285714, + "grad_norm": 27.080564498901367, + "learning_rate": 2.133968253968254e-05, + "loss": 0.2422, + "step": 5389 + }, + { + "epoch": 30.8, + "grad_norm": 436.7923889160156, + "learning_rate": 2.1333333333333335e-05, + "loss": 0.2084, + "step": 5390 + }, + { + "epoch": 30.805714285714284, + "grad_norm": 27.97318458557129, + "learning_rate": 2.1326984126984127e-05, + "loss": 0.2332, + "step": 5391 + }, + { + "epoch": 30.81142857142857, + "grad_norm": 37.840797424316406, + "learning_rate": 2.132063492063492e-05, + "loss": 0.2998, + "step": 5392 + }, + { + "epoch": 30.81714285714286, + "grad_norm": 31.519920349121094, + "learning_rate": 2.1314285714285716e-05, + "loss": 0.3113, + "step": 5393 + }, + { + "epoch": 30.822857142857142, + "grad_norm": 53.09763717651367, + "learning_rate": 2.130793650793651e-05, + "loss": 0.1737, + "step": 5394 + }, + { + "epoch": 30.82857142857143, + "grad_norm": 53.32387161254883, + "learning_rate": 2.1301587301587305e-05, + "loss": 0.2794, + "step": 5395 + }, + { + "epoch": 30.834285714285713, + "grad_norm": 90.9300765991211, + "learning_rate": 2.1295238095238097e-05, + "loss": 0.2058, + "step": 5396 + }, + { + "epoch": 30.84, + "grad_norm": 23.7263240814209, + "learning_rate": 2.128888888888889e-05, + "loss": 0.2754, + "step": 5397 + }, + { + "epoch": 30.845714285714287, + "grad_norm": 79.28535461425781, + "learning_rate": 2.1282539682539686e-05, + "loss": 0.3019, + "step": 5398 + }, + { + "epoch": 30.85142857142857, + "grad_norm": 65.68453979492188, + "learning_rate": 2.127619047619048e-05, + "loss": 0.2335, + "step": 5399 + }, + { + "epoch": 30.857142857142858, + "grad_norm": 30.954830169677734, + "learning_rate": 2.126984126984127e-05, + "loss": 0.194, + "step": 5400 + }, + { + "epoch": 30.86285714285714, + "grad_norm": 912.3456420898438, + "learning_rate": 2.1263492063492064e-05, + "loss": 0.2008, + "step": 5401 + }, + { + "epoch": 30.86857142857143, + "grad_norm": 71.56365966796875, + "learning_rate": 2.125714285714286e-05, + "loss": 0.2306, + "step": 5402 + }, + { + "epoch": 30.874285714285715, + "grad_norm": 51.95694351196289, + "learning_rate": 2.1250793650793652e-05, + "loss": 0.256, + "step": 5403 + }, + { + "epoch": 30.88, + "grad_norm": 49.92461013793945, + "learning_rate": 2.1244444444444445e-05, + "loss": 0.2412, + "step": 5404 + }, + { + "epoch": 30.885714285714286, + "grad_norm": 40.5355224609375, + "learning_rate": 2.123809523809524e-05, + "loss": 0.1947, + "step": 5405 + }, + { + "epoch": 30.89142857142857, + "grad_norm": 384.1972351074219, + "learning_rate": 2.1231746031746034e-05, + "loss": 0.2032, + "step": 5406 + }, + { + "epoch": 30.897142857142857, + "grad_norm": 32.00604248046875, + "learning_rate": 2.1225396825396826e-05, + "loss": 0.4292, + "step": 5407 + }, + { + "epoch": 30.902857142857144, + "grad_norm": 54.61763000488281, + "learning_rate": 2.121904761904762e-05, + "loss": 0.2052, + "step": 5408 + }, + { + "epoch": 30.908571428571427, + "grad_norm": 17.433794021606445, + "learning_rate": 2.1212698412698415e-05, + "loss": 0.1749, + "step": 5409 + }, + { + "epoch": 30.914285714285715, + "grad_norm": 73.77420043945312, + "learning_rate": 2.1206349206349208e-05, + "loss": 0.3023, + "step": 5410 + }, + { + "epoch": 30.92, + "grad_norm": 39.46044158935547, + "learning_rate": 2.12e-05, + "loss": 0.2494, + "step": 5411 + }, + { + "epoch": 30.925714285714285, + "grad_norm": 28.747493743896484, + "learning_rate": 2.1193650793650793e-05, + "loss": 0.481, + "step": 5412 + }, + { + "epoch": 30.931428571428572, + "grad_norm": 50.620052337646484, + "learning_rate": 2.118730158730159e-05, + "loss": 0.256, + "step": 5413 + }, + { + "epoch": 30.937142857142856, + "grad_norm": 43.31208801269531, + "learning_rate": 2.118095238095238e-05, + "loss": 0.2663, + "step": 5414 + }, + { + "epoch": 30.942857142857143, + "grad_norm": 66.55274963378906, + "learning_rate": 2.1174603174603174e-05, + "loss": 0.2472, + "step": 5415 + }, + { + "epoch": 30.94857142857143, + "grad_norm": 114.87254333496094, + "learning_rate": 2.116825396825397e-05, + "loss": 0.2805, + "step": 5416 + }, + { + "epoch": 30.954285714285714, + "grad_norm": 20.99826431274414, + "learning_rate": 2.1161904761904763e-05, + "loss": 0.2322, + "step": 5417 + }, + { + "epoch": 30.96, + "grad_norm": 53.93523406982422, + "learning_rate": 2.1155555555555556e-05, + "loss": 0.5788, + "step": 5418 + }, + { + "epoch": 30.965714285714284, + "grad_norm": 54.85189437866211, + "learning_rate": 2.1149206349206348e-05, + "loss": 0.2267, + "step": 5419 + }, + { + "epoch": 30.97142857142857, + "grad_norm": 44.18019104003906, + "learning_rate": 2.1142857142857144e-05, + "loss": 0.1996, + "step": 5420 + }, + { + "epoch": 30.97714285714286, + "grad_norm": 37.03151321411133, + "learning_rate": 2.1136507936507937e-05, + "loss": 0.2557, + "step": 5421 + }, + { + "epoch": 30.982857142857142, + "grad_norm": 40.45878219604492, + "learning_rate": 2.113015873015873e-05, + "loss": 0.1859, + "step": 5422 + }, + { + "epoch": 30.98857142857143, + "grad_norm": 32.16279602050781, + "learning_rate": 2.1123809523809522e-05, + "loss": 0.2043, + "step": 5423 + }, + { + "epoch": 30.994285714285713, + "grad_norm": 76.29581451416016, + "learning_rate": 2.1117460317460318e-05, + "loss": 0.276, + "step": 5424 + }, + { + "epoch": 31.0, + "grad_norm": 701.8846435546875, + "learning_rate": 2.111111111111111e-05, + "loss": 0.2463, + "step": 5425 + }, + { + "epoch": 31.0, + "eval_classes": 0, + "eval_loss": 0.6001912951469421, + "eval_map": 0.9271, + "eval_map_50": 0.9659, + "eval_map_75": 0.9567, + "eval_map_large": 0.9272, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9271, + "eval_map_small": -1.0, + "eval_mar_1": 0.787, + "eval_mar_10": 0.9737, + "eval_mar_100": 0.9746, + "eval_mar_100_per_class": 0.9746, + "eval_mar_large": 0.9746, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.2904, + "eval_samples_per_second": 22.121, + "eval_steps_per_second": 2.784, + "step": 5425 + }, + { + "epoch": 31.005714285714287, + "grad_norm": 67.76434326171875, + "learning_rate": 2.1104761904761903e-05, + "loss": 0.1804, + "step": 5426 + }, + { + "epoch": 31.01142857142857, + "grad_norm": 57.018394470214844, + "learning_rate": 2.10984126984127e-05, + "loss": 0.2118, + "step": 5427 + }, + { + "epoch": 31.017142857142858, + "grad_norm": 42.061222076416016, + "learning_rate": 2.1092063492063492e-05, + "loss": 0.3424, + "step": 5428 + }, + { + "epoch": 31.02285714285714, + "grad_norm": 89.73456573486328, + "learning_rate": 2.1085714285714288e-05, + "loss": 0.2357, + "step": 5429 + }, + { + "epoch": 31.02857142857143, + "grad_norm": 139.79827880859375, + "learning_rate": 2.107936507936508e-05, + "loss": 0.2857, + "step": 5430 + }, + { + "epoch": 31.034285714285716, + "grad_norm": 28.9287166595459, + "learning_rate": 2.1073015873015873e-05, + "loss": 0.2584, + "step": 5431 + }, + { + "epoch": 31.04, + "grad_norm": 30.034549713134766, + "learning_rate": 2.106666666666667e-05, + "loss": 0.2977, + "step": 5432 + }, + { + "epoch": 31.045714285714286, + "grad_norm": 70.49272155761719, + "learning_rate": 2.1060317460317462e-05, + "loss": 0.2071, + "step": 5433 + }, + { + "epoch": 31.05142857142857, + "grad_norm": 47.150718688964844, + "learning_rate": 2.1053968253968255e-05, + "loss": 0.189, + "step": 5434 + }, + { + "epoch": 31.057142857142857, + "grad_norm": 139.5208740234375, + "learning_rate": 2.104761904761905e-05, + "loss": 0.194, + "step": 5435 + }, + { + "epoch": 31.062857142857144, + "grad_norm": 93.4288330078125, + "learning_rate": 2.1041269841269843e-05, + "loss": 0.2558, + "step": 5436 + }, + { + "epoch": 31.068571428571428, + "grad_norm": 27.41012954711914, + "learning_rate": 2.1034920634920636e-05, + "loss": 0.2515, + "step": 5437 + }, + { + "epoch": 31.074285714285715, + "grad_norm": 134.5114288330078, + "learning_rate": 2.1028571428571432e-05, + "loss": 0.2749, + "step": 5438 + }, + { + "epoch": 31.08, + "grad_norm": 25.675273895263672, + "learning_rate": 2.1022222222222225e-05, + "loss": 0.2624, + "step": 5439 + }, + { + "epoch": 31.085714285714285, + "grad_norm": 31.290029525756836, + "learning_rate": 2.1015873015873017e-05, + "loss": 0.1919, + "step": 5440 + }, + { + "epoch": 31.091428571428573, + "grad_norm": 170.2156982421875, + "learning_rate": 2.100952380952381e-05, + "loss": 0.235, + "step": 5441 + }, + { + "epoch": 31.097142857142856, + "grad_norm": 62.920921325683594, + "learning_rate": 2.1003174603174606e-05, + "loss": 0.1886, + "step": 5442 + }, + { + "epoch": 31.102857142857143, + "grad_norm": 906.18896484375, + "learning_rate": 2.09968253968254e-05, + "loss": 0.305, + "step": 5443 + }, + { + "epoch": 31.10857142857143, + "grad_norm": 41.33077621459961, + "learning_rate": 2.099047619047619e-05, + "loss": 0.2124, + "step": 5444 + }, + { + "epoch": 31.114285714285714, + "grad_norm": 26.16013526916504, + "learning_rate": 2.0984126984126984e-05, + "loss": 0.2462, + "step": 5445 + }, + { + "epoch": 31.12, + "grad_norm": 36.835792541503906, + "learning_rate": 2.097777777777778e-05, + "loss": 0.2799, + "step": 5446 + }, + { + "epoch": 31.125714285714285, + "grad_norm": 68.2093734741211, + "learning_rate": 2.0971428571428572e-05, + "loss": 0.2181, + "step": 5447 + }, + { + "epoch": 31.13142857142857, + "grad_norm": 52.82464599609375, + "learning_rate": 2.0965079365079365e-05, + "loss": 0.2331, + "step": 5448 + }, + { + "epoch": 31.13714285714286, + "grad_norm": 45.499717712402344, + "learning_rate": 2.095873015873016e-05, + "loss": 0.2341, + "step": 5449 + }, + { + "epoch": 31.142857142857142, + "grad_norm": 76.03508758544922, + "learning_rate": 2.0952380952380954e-05, + "loss": 0.1941, + "step": 5450 + }, + { + "epoch": 31.14857142857143, + "grad_norm": 22.56733512878418, + "learning_rate": 2.0946031746031746e-05, + "loss": 0.2002, + "step": 5451 + }, + { + "epoch": 31.154285714285713, + "grad_norm": 46.011348724365234, + "learning_rate": 2.093968253968254e-05, + "loss": 0.3206, + "step": 5452 + }, + { + "epoch": 31.16, + "grad_norm": 35.01982879638672, + "learning_rate": 2.0933333333333335e-05, + "loss": 0.2467, + "step": 5453 + }, + { + "epoch": 31.165714285714287, + "grad_norm": 64.70989227294922, + "learning_rate": 2.0926984126984128e-05, + "loss": 0.2938, + "step": 5454 + }, + { + "epoch": 31.17142857142857, + "grad_norm": 21.85089874267578, + "learning_rate": 2.092063492063492e-05, + "loss": 0.3465, + "step": 5455 + }, + { + "epoch": 31.177142857142858, + "grad_norm": 48.40916442871094, + "learning_rate": 2.0914285714285713e-05, + "loss": 0.1811, + "step": 5456 + }, + { + "epoch": 31.18285714285714, + "grad_norm": 76.00787353515625, + "learning_rate": 2.090793650793651e-05, + "loss": 0.4322, + "step": 5457 + }, + { + "epoch": 31.18857142857143, + "grad_norm": 20.779376983642578, + "learning_rate": 2.09015873015873e-05, + "loss": 0.2481, + "step": 5458 + }, + { + "epoch": 31.194285714285716, + "grad_norm": 47.01481246948242, + "learning_rate": 2.0895238095238094e-05, + "loss": 0.2247, + "step": 5459 + }, + { + "epoch": 31.2, + "grad_norm": 63.114315032958984, + "learning_rate": 2.088888888888889e-05, + "loss": 0.2689, + "step": 5460 + }, + { + "epoch": 31.205714285714286, + "grad_norm": 42.617279052734375, + "learning_rate": 2.0882539682539683e-05, + "loss": 0.2182, + "step": 5461 + }, + { + "epoch": 31.21142857142857, + "grad_norm": 24.569684982299805, + "learning_rate": 2.0876190476190476e-05, + "loss": 0.2207, + "step": 5462 + }, + { + "epoch": 31.217142857142857, + "grad_norm": 37.63087844848633, + "learning_rate": 2.0869841269841268e-05, + "loss": 0.2266, + "step": 5463 + }, + { + "epoch": 31.222857142857144, + "grad_norm": 19.446998596191406, + "learning_rate": 2.0863492063492064e-05, + "loss": 0.236, + "step": 5464 + }, + { + "epoch": 31.228571428571428, + "grad_norm": 50.6137580871582, + "learning_rate": 2.0857142857142857e-05, + "loss": 0.1773, + "step": 5465 + }, + { + "epoch": 31.234285714285715, + "grad_norm": 67.84950256347656, + "learning_rate": 2.0850793650793653e-05, + "loss": 0.1816, + "step": 5466 + }, + { + "epoch": 31.24, + "grad_norm": 76.56767272949219, + "learning_rate": 2.0844444444444446e-05, + "loss": 0.2586, + "step": 5467 + }, + { + "epoch": 31.245714285714286, + "grad_norm": 730.4011840820312, + "learning_rate": 2.083809523809524e-05, + "loss": 0.3457, + "step": 5468 + }, + { + "epoch": 31.251428571428573, + "grad_norm": 74.66912078857422, + "learning_rate": 2.0831746031746034e-05, + "loss": 0.1926, + "step": 5469 + }, + { + "epoch": 31.257142857142856, + "grad_norm": 63.90117263793945, + "learning_rate": 2.0825396825396827e-05, + "loss": 0.2367, + "step": 5470 + }, + { + "epoch": 31.262857142857143, + "grad_norm": 73.22410583496094, + "learning_rate": 2.0819047619047623e-05, + "loss": 0.2367, + "step": 5471 + }, + { + "epoch": 31.268571428571427, + "grad_norm": 36.858543395996094, + "learning_rate": 2.0812698412698415e-05, + "loss": 0.3009, + "step": 5472 + }, + { + "epoch": 31.274285714285714, + "grad_norm": 21.480995178222656, + "learning_rate": 2.0806349206349208e-05, + "loss": 0.1774, + "step": 5473 + }, + { + "epoch": 31.28, + "grad_norm": 35.80170822143555, + "learning_rate": 2.08e-05, + "loss": 0.234, + "step": 5474 + }, + { + "epoch": 31.285714285714285, + "grad_norm": 49.55852508544922, + "learning_rate": 2.0793650793650797e-05, + "loss": 0.1872, + "step": 5475 + }, + { + "epoch": 31.291428571428572, + "grad_norm": 32.34794616699219, + "learning_rate": 2.078730158730159e-05, + "loss": 0.2328, + "step": 5476 + }, + { + "epoch": 31.29714285714286, + "grad_norm": 53.036277770996094, + "learning_rate": 2.0780952380952382e-05, + "loss": 0.3155, + "step": 5477 + }, + { + "epoch": 31.302857142857142, + "grad_norm": 34.03774642944336, + "learning_rate": 2.0774603174603175e-05, + "loss": 0.2306, + "step": 5478 + }, + { + "epoch": 31.30857142857143, + "grad_norm": 124.47933197021484, + "learning_rate": 2.076825396825397e-05, + "loss": 0.2288, + "step": 5479 + }, + { + "epoch": 31.314285714285713, + "grad_norm": 31.235647201538086, + "learning_rate": 2.0761904761904763e-05, + "loss": 0.2811, + "step": 5480 + }, + { + "epoch": 31.32, + "grad_norm": 112.3315200805664, + "learning_rate": 2.0755555555555556e-05, + "loss": 0.302, + "step": 5481 + }, + { + "epoch": 31.325714285714287, + "grad_norm": 32.7776985168457, + "learning_rate": 2.0749206349206352e-05, + "loss": 0.2671, + "step": 5482 + }, + { + "epoch": 31.33142857142857, + "grad_norm": 60.15741729736328, + "learning_rate": 2.0742857142857145e-05, + "loss": 0.3821, + "step": 5483 + }, + { + "epoch": 31.337142857142858, + "grad_norm": 28.403162002563477, + "learning_rate": 2.0736507936507937e-05, + "loss": 0.1984, + "step": 5484 + }, + { + "epoch": 31.34285714285714, + "grad_norm": 51.33551788330078, + "learning_rate": 2.073015873015873e-05, + "loss": 0.2028, + "step": 5485 + }, + { + "epoch": 31.34857142857143, + "grad_norm": 31.286685943603516, + "learning_rate": 2.0723809523809526e-05, + "loss": 0.306, + "step": 5486 + }, + { + "epoch": 31.354285714285716, + "grad_norm": 43.21095275878906, + "learning_rate": 2.071746031746032e-05, + "loss": 0.3492, + "step": 5487 + }, + { + "epoch": 31.36, + "grad_norm": 21.936094284057617, + "learning_rate": 2.071111111111111e-05, + "loss": 0.1757, + "step": 5488 + }, + { + "epoch": 31.365714285714287, + "grad_norm": 115.03231811523438, + "learning_rate": 2.0704761904761904e-05, + "loss": 0.2715, + "step": 5489 + }, + { + "epoch": 31.37142857142857, + "grad_norm": 78.47013854980469, + "learning_rate": 2.06984126984127e-05, + "loss": 0.2645, + "step": 5490 + }, + { + "epoch": 31.377142857142857, + "grad_norm": 45.49565505981445, + "learning_rate": 2.0692063492063493e-05, + "loss": 0.2694, + "step": 5491 + }, + { + "epoch": 31.382857142857144, + "grad_norm": 24.608911514282227, + "learning_rate": 2.0685714285714285e-05, + "loss": 0.176, + "step": 5492 + }, + { + "epoch": 31.388571428571428, + "grad_norm": 88.68647766113281, + "learning_rate": 2.067936507936508e-05, + "loss": 0.2936, + "step": 5493 + }, + { + "epoch": 31.394285714285715, + "grad_norm": 54.628055572509766, + "learning_rate": 2.0673015873015874e-05, + "loss": 0.2538, + "step": 5494 + }, + { + "epoch": 31.4, + "grad_norm": 79.88322448730469, + "learning_rate": 2.0666666666666666e-05, + "loss": 0.2181, + "step": 5495 + }, + { + "epoch": 31.405714285714286, + "grad_norm": 459.6210632324219, + "learning_rate": 2.066031746031746e-05, + "loss": 0.2895, + "step": 5496 + }, + { + "epoch": 31.411428571428573, + "grad_norm": 72.49649810791016, + "learning_rate": 2.0653968253968255e-05, + "loss": 0.2486, + "step": 5497 + }, + { + "epoch": 31.417142857142856, + "grad_norm": 39.870601654052734, + "learning_rate": 2.0647619047619048e-05, + "loss": 0.2942, + "step": 5498 + }, + { + "epoch": 31.422857142857143, + "grad_norm": 67.51768493652344, + "learning_rate": 2.064126984126984e-05, + "loss": 0.3125, + "step": 5499 + }, + { + "epoch": 31.428571428571427, + "grad_norm": 1334.420654296875, + "learning_rate": 2.0634920634920636e-05, + "loss": 0.2806, + "step": 5500 + }, + { + "epoch": 31.434285714285714, + "grad_norm": 164.1793212890625, + "learning_rate": 2.062857142857143e-05, + "loss": 0.2918, + "step": 5501 + }, + { + "epoch": 31.44, + "grad_norm": 63.5439453125, + "learning_rate": 2.062222222222222e-05, + "loss": 0.2616, + "step": 5502 + }, + { + "epoch": 31.445714285714285, + "grad_norm": 69.50310516357422, + "learning_rate": 2.0615873015873018e-05, + "loss": 0.2505, + "step": 5503 + }, + { + "epoch": 31.451428571428572, + "grad_norm": 38.087135314941406, + "learning_rate": 2.060952380952381e-05, + "loss": 0.2052, + "step": 5504 + }, + { + "epoch": 31.457142857142856, + "grad_norm": 44.78586196899414, + "learning_rate": 2.0603174603174606e-05, + "loss": 0.2981, + "step": 5505 + }, + { + "epoch": 31.462857142857143, + "grad_norm": 60.432674407958984, + "learning_rate": 2.05968253968254e-05, + "loss": 0.2139, + "step": 5506 + }, + { + "epoch": 31.46857142857143, + "grad_norm": 43.80990982055664, + "learning_rate": 2.059047619047619e-05, + "loss": 0.3612, + "step": 5507 + }, + { + "epoch": 31.474285714285713, + "grad_norm": 157.6404266357422, + "learning_rate": 2.0584126984126988e-05, + "loss": 0.319, + "step": 5508 + }, + { + "epoch": 31.48, + "grad_norm": 76.8299789428711, + "learning_rate": 2.057777777777778e-05, + "loss": 0.2222, + "step": 5509 + }, + { + "epoch": 31.485714285714284, + "grad_norm": 67.4052505493164, + "learning_rate": 2.0571428571428573e-05, + "loss": 0.256, + "step": 5510 + }, + { + "epoch": 31.49142857142857, + "grad_norm": 35.062255859375, + "learning_rate": 2.0565079365079366e-05, + "loss": 0.2586, + "step": 5511 + }, + { + "epoch": 31.497142857142858, + "grad_norm": 46.1373405456543, + "learning_rate": 2.055873015873016e-05, + "loss": 0.3328, + "step": 5512 + }, + { + "epoch": 31.502857142857142, + "grad_norm": 305.5950927734375, + "learning_rate": 2.0552380952380954e-05, + "loss": 0.1931, + "step": 5513 + }, + { + "epoch": 31.50857142857143, + "grad_norm": 36.37525939941406, + "learning_rate": 2.0546031746031747e-05, + "loss": 0.2586, + "step": 5514 + }, + { + "epoch": 31.514285714285712, + "grad_norm": 557.3544311523438, + "learning_rate": 2.053968253968254e-05, + "loss": 0.2959, + "step": 5515 + }, + { + "epoch": 31.52, + "grad_norm": 63.56522750854492, + "learning_rate": 2.0533333333333336e-05, + "loss": 0.2672, + "step": 5516 + }, + { + "epoch": 31.525714285714287, + "grad_norm": 258.99517822265625, + "learning_rate": 2.0526984126984128e-05, + "loss": 0.3091, + "step": 5517 + }, + { + "epoch": 31.53142857142857, + "grad_norm": 24.586519241333008, + "learning_rate": 2.052063492063492e-05, + "loss": 0.3207, + "step": 5518 + }, + { + "epoch": 31.537142857142857, + "grad_norm": 20.953536987304688, + "learning_rate": 2.0514285714285717e-05, + "loss": 0.2527, + "step": 5519 + }, + { + "epoch": 31.542857142857144, + "grad_norm": 117.11972045898438, + "learning_rate": 2.050793650793651e-05, + "loss": 0.2423, + "step": 5520 + }, + { + "epoch": 31.548571428571428, + "grad_norm": 115.53871154785156, + "learning_rate": 2.0501587301587302e-05, + "loss": 0.2458, + "step": 5521 + }, + { + "epoch": 31.554285714285715, + "grad_norm": 56.487728118896484, + "learning_rate": 2.0495238095238095e-05, + "loss": 0.287, + "step": 5522 + }, + { + "epoch": 31.56, + "grad_norm": 77.6148681640625, + "learning_rate": 2.048888888888889e-05, + "loss": 0.26, + "step": 5523 + }, + { + "epoch": 31.565714285714286, + "grad_norm": 51.763145446777344, + "learning_rate": 2.0482539682539683e-05, + "loss": 0.2177, + "step": 5524 + }, + { + "epoch": 31.571428571428573, + "grad_norm": 45.99419021606445, + "learning_rate": 2.0476190476190476e-05, + "loss": 0.1893, + "step": 5525 + }, + { + "epoch": 31.577142857142857, + "grad_norm": 46.7635498046875, + "learning_rate": 2.046984126984127e-05, + "loss": 0.243, + "step": 5526 + }, + { + "epoch": 31.582857142857144, + "grad_norm": 48.2321662902832, + "learning_rate": 2.0463492063492065e-05, + "loss": 0.2543, + "step": 5527 + }, + { + "epoch": 31.588571428571427, + "grad_norm": 69.36721801757812, + "learning_rate": 2.0457142857142857e-05, + "loss": 0.3989, + "step": 5528 + }, + { + "epoch": 31.594285714285714, + "grad_norm": 119.64047241210938, + "learning_rate": 2.045079365079365e-05, + "loss": 0.3188, + "step": 5529 + }, + { + "epoch": 31.6, + "grad_norm": 46.181678771972656, + "learning_rate": 2.0444444444444446e-05, + "loss": 0.2096, + "step": 5530 + }, + { + "epoch": 31.605714285714285, + "grad_norm": 57.24357223510742, + "learning_rate": 2.043809523809524e-05, + "loss": 0.3174, + "step": 5531 + }, + { + "epoch": 31.611428571428572, + "grad_norm": 67.29043579101562, + "learning_rate": 2.043174603174603e-05, + "loss": 0.2383, + "step": 5532 + }, + { + "epoch": 31.617142857142856, + "grad_norm": 28.11064910888672, + "learning_rate": 2.0425396825396824e-05, + "loss": 0.2024, + "step": 5533 + }, + { + "epoch": 31.622857142857143, + "grad_norm": 74.12226104736328, + "learning_rate": 2.041904761904762e-05, + "loss": 0.2789, + "step": 5534 + }, + { + "epoch": 31.62857142857143, + "grad_norm": 32.433372497558594, + "learning_rate": 2.0412698412698413e-05, + "loss": 0.2769, + "step": 5535 + }, + { + "epoch": 31.634285714285713, + "grad_norm": 30.32268524169922, + "learning_rate": 2.0406349206349205e-05, + "loss": 0.2244, + "step": 5536 + }, + { + "epoch": 31.64, + "grad_norm": 48.050132751464844, + "learning_rate": 2.04e-05, + "loss": 0.5059, + "step": 5537 + }, + { + "epoch": 31.645714285714284, + "grad_norm": 57.8617057800293, + "learning_rate": 2.0393650793650794e-05, + "loss": 0.219, + "step": 5538 + }, + { + "epoch": 31.65142857142857, + "grad_norm": 38.06205749511719, + "learning_rate": 2.038730158730159e-05, + "loss": 0.3258, + "step": 5539 + }, + { + "epoch": 31.65714285714286, + "grad_norm": 1111.97412109375, + "learning_rate": 2.0380952380952382e-05, + "loss": 0.2625, + "step": 5540 + }, + { + "epoch": 31.662857142857142, + "grad_norm": 51.53617858886719, + "learning_rate": 2.0374603174603175e-05, + "loss": 0.1704, + "step": 5541 + }, + { + "epoch": 31.66857142857143, + "grad_norm": 39.850181579589844, + "learning_rate": 2.036825396825397e-05, + "loss": 0.2694, + "step": 5542 + }, + { + "epoch": 31.674285714285713, + "grad_norm": 65.64685821533203, + "learning_rate": 2.0361904761904764e-05, + "loss": 0.418, + "step": 5543 + }, + { + "epoch": 31.68, + "grad_norm": 61.119632720947266, + "learning_rate": 2.0355555555555556e-05, + "loss": 0.2006, + "step": 5544 + }, + { + "epoch": 31.685714285714287, + "grad_norm": 55.178192138671875, + "learning_rate": 2.0349206349206352e-05, + "loss": 0.2113, + "step": 5545 + }, + { + "epoch": 31.69142857142857, + "grad_norm": 25.63457489013672, + "learning_rate": 2.0342857142857145e-05, + "loss": 0.258, + "step": 5546 + }, + { + "epoch": 31.697142857142858, + "grad_norm": 41.02190399169922, + "learning_rate": 2.0336507936507938e-05, + "loss": 0.2765, + "step": 5547 + }, + { + "epoch": 31.70285714285714, + "grad_norm": 56.24254608154297, + "learning_rate": 2.033015873015873e-05, + "loss": 0.2344, + "step": 5548 + }, + { + "epoch": 31.708571428571428, + "grad_norm": 395.86474609375, + "learning_rate": 2.0323809523809526e-05, + "loss": 0.2746, + "step": 5549 + }, + { + "epoch": 31.714285714285715, + "grad_norm": 48.37761688232422, + "learning_rate": 2.031746031746032e-05, + "loss": 0.1888, + "step": 5550 + }, + { + "epoch": 31.72, + "grad_norm": 46.34769821166992, + "learning_rate": 2.031111111111111e-05, + "loss": 0.2266, + "step": 5551 + }, + { + "epoch": 31.725714285714286, + "grad_norm": 79.68155670166016, + "learning_rate": 2.0304761904761908e-05, + "loss": 0.2262, + "step": 5552 + }, + { + "epoch": 31.731428571428573, + "grad_norm": 74.87509155273438, + "learning_rate": 2.02984126984127e-05, + "loss": 0.2685, + "step": 5553 + }, + { + "epoch": 31.737142857142857, + "grad_norm": 178.58267211914062, + "learning_rate": 2.0292063492063493e-05, + "loss": 0.2651, + "step": 5554 + }, + { + "epoch": 31.742857142857144, + "grad_norm": 31.781280517578125, + "learning_rate": 2.0285714285714286e-05, + "loss": 0.2259, + "step": 5555 + }, + { + "epoch": 31.748571428571427, + "grad_norm": 187.2349853515625, + "learning_rate": 2.027936507936508e-05, + "loss": 0.2315, + "step": 5556 + }, + { + "epoch": 31.754285714285714, + "grad_norm": 68.71722412109375, + "learning_rate": 2.0273015873015874e-05, + "loss": 0.2797, + "step": 5557 + }, + { + "epoch": 31.76, + "grad_norm": 41.61360168457031, + "learning_rate": 2.0266666666666667e-05, + "loss": 0.2068, + "step": 5558 + }, + { + "epoch": 31.765714285714285, + "grad_norm": 30.7895450592041, + "learning_rate": 2.026031746031746e-05, + "loss": 0.2622, + "step": 5559 + }, + { + "epoch": 31.771428571428572, + "grad_norm": 66.16661071777344, + "learning_rate": 2.0253968253968256e-05, + "loss": 0.246, + "step": 5560 + }, + { + "epoch": 31.777142857142856, + "grad_norm": 277.6895751953125, + "learning_rate": 2.0247619047619048e-05, + "loss": 0.3371, + "step": 5561 + }, + { + "epoch": 31.782857142857143, + "grad_norm": 241.08880615234375, + "learning_rate": 2.024126984126984e-05, + "loss": 0.2674, + "step": 5562 + }, + { + "epoch": 31.78857142857143, + "grad_norm": 38.30510711669922, + "learning_rate": 2.0234920634920637e-05, + "loss": 0.2794, + "step": 5563 + }, + { + "epoch": 31.794285714285714, + "grad_norm": 50.54317092895508, + "learning_rate": 2.022857142857143e-05, + "loss": 0.3048, + "step": 5564 + }, + { + "epoch": 31.8, + "grad_norm": 316.1440734863281, + "learning_rate": 2.0222222222222222e-05, + "loss": 0.2916, + "step": 5565 + }, + { + "epoch": 31.805714285714284, + "grad_norm": 44.01576614379883, + "learning_rate": 2.0215873015873015e-05, + "loss": 0.265, + "step": 5566 + }, + { + "epoch": 31.81142857142857, + "grad_norm": 61.58749008178711, + "learning_rate": 2.020952380952381e-05, + "loss": 0.3227, + "step": 5567 + }, + { + "epoch": 31.81714285714286, + "grad_norm": 58.81139373779297, + "learning_rate": 2.0203174603174603e-05, + "loss": 0.2526, + "step": 5568 + }, + { + "epoch": 31.822857142857142, + "grad_norm": 62.55400085449219, + "learning_rate": 2.0196825396825396e-05, + "loss": 0.1877, + "step": 5569 + }, + { + "epoch": 31.82857142857143, + "grad_norm": 60.63581466674805, + "learning_rate": 2.019047619047619e-05, + "loss": 0.2509, + "step": 5570 + }, + { + "epoch": 31.834285714285713, + "grad_norm": 31.16853141784668, + "learning_rate": 2.0184126984126985e-05, + "loss": 0.2073, + "step": 5571 + }, + { + "epoch": 31.84, + "grad_norm": 86.914794921875, + "learning_rate": 2.0177777777777777e-05, + "loss": 0.2536, + "step": 5572 + }, + { + "epoch": 31.845714285714287, + "grad_norm": 77.10182189941406, + "learning_rate": 2.0171428571428573e-05, + "loss": 0.2441, + "step": 5573 + }, + { + "epoch": 31.85142857142857, + "grad_norm": 438.7278137207031, + "learning_rate": 2.0165079365079366e-05, + "loss": 0.2637, + "step": 5574 + }, + { + "epoch": 31.857142857142858, + "grad_norm": 634.8936157226562, + "learning_rate": 2.015873015873016e-05, + "loss": 0.2978, + "step": 5575 + }, + { + "epoch": 31.86285714285714, + "grad_norm": 65.57212829589844, + "learning_rate": 2.0152380952380955e-05, + "loss": 0.3746, + "step": 5576 + }, + { + "epoch": 31.86857142857143, + "grad_norm": 52.18415451049805, + "learning_rate": 2.0146031746031747e-05, + "loss": 0.355, + "step": 5577 + }, + { + "epoch": 31.874285714285715, + "grad_norm": 277.6800231933594, + "learning_rate": 2.0139682539682543e-05, + "loss": 0.3008, + "step": 5578 + }, + { + "epoch": 31.88, + "grad_norm": 50.11739730834961, + "learning_rate": 2.0133333333333336e-05, + "loss": 0.2894, + "step": 5579 + }, + { + "epoch": 31.885714285714286, + "grad_norm": 56.192787170410156, + "learning_rate": 2.012698412698413e-05, + "loss": 0.2317, + "step": 5580 + }, + { + "epoch": 31.89142857142857, + "grad_norm": 827.2831420898438, + "learning_rate": 2.012063492063492e-05, + "loss": 0.2363, + "step": 5581 + }, + { + "epoch": 31.897142857142857, + "grad_norm": 69.73470306396484, + "learning_rate": 2.0114285714285717e-05, + "loss": 0.203, + "step": 5582 + }, + { + "epoch": 31.902857142857144, + "grad_norm": 30.762157440185547, + "learning_rate": 2.010793650793651e-05, + "loss": 0.2364, + "step": 5583 + }, + { + "epoch": 31.908571428571427, + "grad_norm": 27.67940330505371, + "learning_rate": 2.0101587301587303e-05, + "loss": 0.2964, + "step": 5584 + }, + { + "epoch": 31.914285714285715, + "grad_norm": 53.260562896728516, + "learning_rate": 2.00952380952381e-05, + "loss": 0.2025, + "step": 5585 + }, + { + "epoch": 31.92, + "grad_norm": 39.59288024902344, + "learning_rate": 2.008888888888889e-05, + "loss": 0.2818, + "step": 5586 + }, + { + "epoch": 31.925714285714285, + "grad_norm": 67.39969635009766, + "learning_rate": 2.0082539682539684e-05, + "loss": 0.2169, + "step": 5587 + }, + { + "epoch": 31.931428571428572, + "grad_norm": 43.52405548095703, + "learning_rate": 2.0076190476190476e-05, + "loss": 0.2053, + "step": 5588 + }, + { + "epoch": 31.937142857142856, + "grad_norm": 58.90415573120117, + "learning_rate": 2.0069841269841272e-05, + "loss": 0.2644, + "step": 5589 + }, + { + "epoch": 31.942857142857143, + "grad_norm": 29.253192901611328, + "learning_rate": 2.0063492063492065e-05, + "loss": 0.2197, + "step": 5590 + }, + { + "epoch": 31.94857142857143, + "grad_norm": 33.914119720458984, + "learning_rate": 2.0057142857142858e-05, + "loss": 0.1722, + "step": 5591 + }, + { + "epoch": 31.954285714285714, + "grad_norm": 680.5887451171875, + "learning_rate": 2.005079365079365e-05, + "loss": 0.2555, + "step": 5592 + }, + { + "epoch": 31.96, + "grad_norm": 91.59898376464844, + "learning_rate": 2.0044444444444446e-05, + "loss": 0.323, + "step": 5593 + }, + { + "epoch": 31.965714285714284, + "grad_norm": 49.2437744140625, + "learning_rate": 2.003809523809524e-05, + "loss": 0.2468, + "step": 5594 + }, + { + "epoch": 31.97142857142857, + "grad_norm": 51.85927200317383, + "learning_rate": 2.003174603174603e-05, + "loss": 0.2949, + "step": 5595 + }, + { + "epoch": 31.97714285714286, + "grad_norm": 84.97169494628906, + "learning_rate": 2.0025396825396828e-05, + "loss": 0.1939, + "step": 5596 + }, + { + "epoch": 31.982857142857142, + "grad_norm": 31.042736053466797, + "learning_rate": 2.001904761904762e-05, + "loss": 0.2351, + "step": 5597 + }, + { + "epoch": 31.98857142857143, + "grad_norm": 573.0760498046875, + "learning_rate": 2.0012698412698413e-05, + "loss": 0.3312, + "step": 5598 + }, + { + "epoch": 31.994285714285713, + "grad_norm": 41.7373046875, + "learning_rate": 2.0006349206349206e-05, + "loss": 0.2306, + "step": 5599 + }, + { + "epoch": 32.0, + "grad_norm": 31.659475326538086, + "learning_rate": 2e-05, + "loss": 0.2117, + "step": 5600 + }, + { + "epoch": 32.0, + "eval_classes": 0, + "eval_loss": 0.5764447450637817, + "eval_map": 0.9374, + "eval_map_50": 0.9692, + "eval_map_75": 0.963, + "eval_map_large": 0.9374, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9374, + "eval_map_small": -1.0, + "eval_mar_1": 0.7838, + "eval_mar_10": 0.9768, + "eval_mar_100": 0.9781, + "eval_mar_100_per_class": 0.9781, + "eval_mar_large": 0.9781, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.1244, + "eval_samples_per_second": 22.401, + "eval_steps_per_second": 2.819, + "step": 5600 + }, + { + "epoch": 32.005714285714284, + "grad_norm": 68.49040985107422, + "learning_rate": 1.9993650793650794e-05, + "loss": 0.2377, + "step": 5601 + }, + { + "epoch": 32.011428571428574, + "grad_norm": 75.28813171386719, + "learning_rate": 1.9987301587301587e-05, + "loss": 0.2451, + "step": 5602 + }, + { + "epoch": 32.01714285714286, + "grad_norm": 29.55270767211914, + "learning_rate": 1.998095238095238e-05, + "loss": 0.1495, + "step": 5603 + }, + { + "epoch": 32.02285714285714, + "grad_norm": 1036.5198974609375, + "learning_rate": 1.9974603174603176e-05, + "loss": 0.2689, + "step": 5604 + }, + { + "epoch": 32.02857142857143, + "grad_norm": 25.972496032714844, + "learning_rate": 1.9968253968253968e-05, + "loss": 0.1889, + "step": 5605 + }, + { + "epoch": 32.034285714285716, + "grad_norm": 56.76987075805664, + "learning_rate": 1.996190476190476e-05, + "loss": 0.3136, + "step": 5606 + }, + { + "epoch": 32.04, + "grad_norm": 48.14201354980469, + "learning_rate": 1.9955555555555557e-05, + "loss": 0.1673, + "step": 5607 + }, + { + "epoch": 32.04571428571428, + "grad_norm": 55.37479782104492, + "learning_rate": 1.994920634920635e-05, + "loss": 0.3009, + "step": 5608 + }, + { + "epoch": 32.05142857142857, + "grad_norm": 27.12615394592285, + "learning_rate": 1.9942857142857142e-05, + "loss": 0.2047, + "step": 5609 + }, + { + "epoch": 32.05714285714286, + "grad_norm": 57.392921447753906, + "learning_rate": 1.9936507936507938e-05, + "loss": 0.2449, + "step": 5610 + }, + { + "epoch": 32.06285714285714, + "grad_norm": 44.158294677734375, + "learning_rate": 1.993015873015873e-05, + "loss": 0.2323, + "step": 5611 + }, + { + "epoch": 32.06857142857143, + "grad_norm": 53.65294647216797, + "learning_rate": 1.9923809523809527e-05, + "loss": 0.1964, + "step": 5612 + }, + { + "epoch": 32.074285714285715, + "grad_norm": 98.22899627685547, + "learning_rate": 1.991746031746032e-05, + "loss": 0.2387, + "step": 5613 + }, + { + "epoch": 32.08, + "grad_norm": 46.33811950683594, + "learning_rate": 1.9911111111111112e-05, + "loss": 0.2555, + "step": 5614 + }, + { + "epoch": 32.08571428571429, + "grad_norm": 21.99703025817871, + "learning_rate": 1.9904761904761908e-05, + "loss": 0.1987, + "step": 5615 + }, + { + "epoch": 32.09142857142857, + "grad_norm": 34.59161376953125, + "learning_rate": 1.98984126984127e-05, + "loss": 0.2103, + "step": 5616 + }, + { + "epoch": 32.097142857142856, + "grad_norm": 28.423341751098633, + "learning_rate": 1.9892063492063493e-05, + "loss": 0.2242, + "step": 5617 + }, + { + "epoch": 32.10285714285714, + "grad_norm": 48.24335479736328, + "learning_rate": 1.9885714285714286e-05, + "loss": 0.3536, + "step": 5618 + }, + { + "epoch": 32.10857142857143, + "grad_norm": 75.23125457763672, + "learning_rate": 1.9879365079365082e-05, + "loss": 0.2559, + "step": 5619 + }, + { + "epoch": 32.114285714285714, + "grad_norm": 28.813011169433594, + "learning_rate": 1.9873015873015875e-05, + "loss": 0.2605, + "step": 5620 + }, + { + "epoch": 32.12, + "grad_norm": 101.69721221923828, + "learning_rate": 1.9866666666666667e-05, + "loss": 0.2976, + "step": 5621 + }, + { + "epoch": 32.12571428571429, + "grad_norm": 46.14924240112305, + "learning_rate": 1.9860317460317463e-05, + "loss": 0.1942, + "step": 5622 + }, + { + "epoch": 32.13142857142857, + "grad_norm": 49.38679122924805, + "learning_rate": 1.9853968253968256e-05, + "loss": 0.1939, + "step": 5623 + }, + { + "epoch": 32.137142857142855, + "grad_norm": 30.698293685913086, + "learning_rate": 1.984761904761905e-05, + "loss": 0.2122, + "step": 5624 + }, + { + "epoch": 32.142857142857146, + "grad_norm": 68.74352264404297, + "learning_rate": 1.984126984126984e-05, + "loss": 0.273, + "step": 5625 + }, + { + "epoch": 32.14857142857143, + "grad_norm": 53.25042724609375, + "learning_rate": 1.9834920634920637e-05, + "loss": 0.2859, + "step": 5626 + }, + { + "epoch": 32.15428571428571, + "grad_norm": 83.85069274902344, + "learning_rate": 1.982857142857143e-05, + "loss": 0.1411, + "step": 5627 + }, + { + "epoch": 32.16, + "grad_norm": 327.1810607910156, + "learning_rate": 1.9822222222222223e-05, + "loss": 0.3035, + "step": 5628 + }, + { + "epoch": 32.16571428571429, + "grad_norm": 26.484102249145508, + "learning_rate": 1.9815873015873015e-05, + "loss": 0.2007, + "step": 5629 + }, + { + "epoch": 32.17142857142857, + "grad_norm": 44.97897720336914, + "learning_rate": 1.980952380952381e-05, + "loss": 0.1995, + "step": 5630 + }, + { + "epoch": 32.177142857142854, + "grad_norm": 52.42127990722656, + "learning_rate": 1.9803174603174604e-05, + "loss": 0.1818, + "step": 5631 + }, + { + "epoch": 32.182857142857145, + "grad_norm": 25.286893844604492, + "learning_rate": 1.9796825396825396e-05, + "loss": 0.2031, + "step": 5632 + }, + { + "epoch": 32.18857142857143, + "grad_norm": 83.523193359375, + "learning_rate": 1.9790476190476193e-05, + "loss": 0.4617, + "step": 5633 + }, + { + "epoch": 32.19428571428571, + "grad_norm": 32.6356315612793, + "learning_rate": 1.9784126984126985e-05, + "loss": 0.223, + "step": 5634 + }, + { + "epoch": 32.2, + "grad_norm": 362.17803955078125, + "learning_rate": 1.9777777777777778e-05, + "loss": 0.2614, + "step": 5635 + }, + { + "epoch": 32.205714285714286, + "grad_norm": 43.75965881347656, + "learning_rate": 1.977142857142857e-05, + "loss": 0.2011, + "step": 5636 + }, + { + "epoch": 32.21142857142857, + "grad_norm": 25.195335388183594, + "learning_rate": 1.9765079365079366e-05, + "loss": 0.1964, + "step": 5637 + }, + { + "epoch": 32.21714285714286, + "grad_norm": 49.41599655151367, + "learning_rate": 1.975873015873016e-05, + "loss": 0.2598, + "step": 5638 + }, + { + "epoch": 32.222857142857144, + "grad_norm": 28.786399841308594, + "learning_rate": 1.9752380952380952e-05, + "loss": 0.1453, + "step": 5639 + }, + { + "epoch": 32.22857142857143, + "grad_norm": 37.16865158081055, + "learning_rate": 1.9746031746031744e-05, + "loss": 0.3094, + "step": 5640 + }, + { + "epoch": 32.23428571428571, + "grad_norm": 40.28287887573242, + "learning_rate": 1.973968253968254e-05, + "loss": 0.3459, + "step": 5641 + }, + { + "epoch": 32.24, + "grad_norm": 36.927947998046875, + "learning_rate": 1.9733333333333333e-05, + "loss": 0.4682, + "step": 5642 + }, + { + "epoch": 32.245714285714286, + "grad_norm": 35.456565856933594, + "learning_rate": 1.9726984126984126e-05, + "loss": 0.2156, + "step": 5643 + }, + { + "epoch": 32.25142857142857, + "grad_norm": 12.010313034057617, + "learning_rate": 1.972063492063492e-05, + "loss": 0.1473, + "step": 5644 + }, + { + "epoch": 32.25714285714286, + "grad_norm": 32.582576751708984, + "learning_rate": 1.9714285714285714e-05, + "loss": 0.381, + "step": 5645 + }, + { + "epoch": 32.26285714285714, + "grad_norm": 53.8582878112793, + "learning_rate": 1.9707936507936507e-05, + "loss": 0.2633, + "step": 5646 + }, + { + "epoch": 32.26857142857143, + "grad_norm": 720.4382934570312, + "learning_rate": 1.9701587301587303e-05, + "loss": 0.2267, + "step": 5647 + }, + { + "epoch": 32.27428571428572, + "grad_norm": 41.28740310668945, + "learning_rate": 1.9695238095238096e-05, + "loss": 0.2127, + "step": 5648 + }, + { + "epoch": 32.28, + "grad_norm": 50.774024963378906, + "learning_rate": 1.968888888888889e-05, + "loss": 0.2454, + "step": 5649 + }, + { + "epoch": 32.285714285714285, + "grad_norm": 47.7146110534668, + "learning_rate": 1.9682539682539684e-05, + "loss": 0.3452, + "step": 5650 + }, + { + "epoch": 32.29142857142857, + "grad_norm": 29.196165084838867, + "learning_rate": 1.9676190476190477e-05, + "loss": 0.2583, + "step": 5651 + }, + { + "epoch": 32.29714285714286, + "grad_norm": 36.90263748168945, + "learning_rate": 1.9669841269841273e-05, + "loss": 0.2178, + "step": 5652 + }, + { + "epoch": 32.30285714285714, + "grad_norm": 34.539119720458984, + "learning_rate": 1.9663492063492066e-05, + "loss": 0.2255, + "step": 5653 + }, + { + "epoch": 32.308571428571426, + "grad_norm": 21.559595108032227, + "learning_rate": 1.9657142857142858e-05, + "loss": 0.2509, + "step": 5654 + }, + { + "epoch": 32.31428571428572, + "grad_norm": 32.04145812988281, + "learning_rate": 1.9650793650793654e-05, + "loss": 0.1896, + "step": 5655 + }, + { + "epoch": 32.32, + "grad_norm": 20.757484436035156, + "learning_rate": 1.9644444444444447e-05, + "loss": 0.1825, + "step": 5656 + }, + { + "epoch": 32.325714285714284, + "grad_norm": 69.93598175048828, + "learning_rate": 1.963809523809524e-05, + "loss": 0.3693, + "step": 5657 + }, + { + "epoch": 32.331428571428575, + "grad_norm": 87.04507446289062, + "learning_rate": 1.9631746031746032e-05, + "loss": 0.2081, + "step": 5658 + }, + { + "epoch": 32.33714285714286, + "grad_norm": 68.42386627197266, + "learning_rate": 1.9625396825396828e-05, + "loss": 0.2115, + "step": 5659 + }, + { + "epoch": 32.34285714285714, + "grad_norm": 55.57241439819336, + "learning_rate": 1.961904761904762e-05, + "loss": 0.2773, + "step": 5660 + }, + { + "epoch": 32.348571428571425, + "grad_norm": 1198.2554931640625, + "learning_rate": 1.9612698412698413e-05, + "loss": 0.3929, + "step": 5661 + }, + { + "epoch": 32.354285714285716, + "grad_norm": 70.51028442382812, + "learning_rate": 1.9606349206349206e-05, + "loss": 0.2643, + "step": 5662 + }, + { + "epoch": 32.36, + "grad_norm": 99.01145935058594, + "learning_rate": 1.9600000000000002e-05, + "loss": 0.2505, + "step": 5663 + }, + { + "epoch": 32.36571428571428, + "grad_norm": 37.562618255615234, + "learning_rate": 1.9593650793650795e-05, + "loss": 0.303, + "step": 5664 + }, + { + "epoch": 32.371428571428574, + "grad_norm": 42.09748077392578, + "learning_rate": 1.9587301587301587e-05, + "loss": 0.3365, + "step": 5665 + }, + { + "epoch": 32.37714285714286, + "grad_norm": 97.21544647216797, + "learning_rate": 1.9580952380952383e-05, + "loss": 0.2125, + "step": 5666 + }, + { + "epoch": 32.38285714285714, + "grad_norm": 37.335872650146484, + "learning_rate": 1.9574603174603176e-05, + "loss": 0.3086, + "step": 5667 + }, + { + "epoch": 32.38857142857143, + "grad_norm": 46.7644157409668, + "learning_rate": 1.956825396825397e-05, + "loss": 0.249, + "step": 5668 + }, + { + "epoch": 32.394285714285715, + "grad_norm": 67.24867248535156, + "learning_rate": 1.956190476190476e-05, + "loss": 0.2295, + "step": 5669 + }, + { + "epoch": 32.4, + "grad_norm": 19.075559616088867, + "learning_rate": 1.9555555555555557e-05, + "loss": 0.2432, + "step": 5670 + }, + { + "epoch": 32.40571428571428, + "grad_norm": 1084.0755615234375, + "learning_rate": 1.954920634920635e-05, + "loss": 0.2487, + "step": 5671 + }, + { + "epoch": 32.41142857142857, + "grad_norm": 415.1152648925781, + "learning_rate": 1.9542857142857143e-05, + "loss": 0.2982, + "step": 5672 + }, + { + "epoch": 32.417142857142856, + "grad_norm": 50.63746643066406, + "learning_rate": 1.9536507936507935e-05, + "loss": 0.2723, + "step": 5673 + }, + { + "epoch": 32.42285714285714, + "grad_norm": 498.7071228027344, + "learning_rate": 1.953015873015873e-05, + "loss": 0.1889, + "step": 5674 + }, + { + "epoch": 32.42857142857143, + "grad_norm": 1690.307861328125, + "learning_rate": 1.9523809523809524e-05, + "loss": 0.2719, + "step": 5675 + }, + { + "epoch": 32.434285714285714, + "grad_norm": 77.0636215209961, + "learning_rate": 1.9517460317460317e-05, + "loss": 0.3183, + "step": 5676 + }, + { + "epoch": 32.44, + "grad_norm": 82.31941986083984, + "learning_rate": 1.9511111111111113e-05, + "loss": 0.2919, + "step": 5677 + }, + { + "epoch": 32.44571428571429, + "grad_norm": 40.9356575012207, + "learning_rate": 1.9504761904761905e-05, + "loss": 0.2544, + "step": 5678 + }, + { + "epoch": 32.45142857142857, + "grad_norm": 94.53091430664062, + "learning_rate": 1.9498412698412698e-05, + "loss": 0.3237, + "step": 5679 + }, + { + "epoch": 32.457142857142856, + "grad_norm": 1199.8826904296875, + "learning_rate": 1.949206349206349e-05, + "loss": 0.3413, + "step": 5680 + }, + { + "epoch": 32.462857142857146, + "grad_norm": 36.30007553100586, + "learning_rate": 1.9485714285714286e-05, + "loss": 0.27, + "step": 5681 + }, + { + "epoch": 32.46857142857143, + "grad_norm": 93.01171112060547, + "learning_rate": 1.947936507936508e-05, + "loss": 0.2413, + "step": 5682 + }, + { + "epoch": 32.47428571428571, + "grad_norm": 22.21965217590332, + "learning_rate": 1.9473015873015875e-05, + "loss": 0.3064, + "step": 5683 + }, + { + "epoch": 32.48, + "grad_norm": 44.686790466308594, + "learning_rate": 1.9466666666666668e-05, + "loss": 0.2629, + "step": 5684 + }, + { + "epoch": 32.48571428571429, + "grad_norm": 48.551109313964844, + "learning_rate": 1.946031746031746e-05, + "loss": 0.2389, + "step": 5685 + }, + { + "epoch": 32.49142857142857, + "grad_norm": 80.80036926269531, + "learning_rate": 1.9453968253968256e-05, + "loss": 0.3031, + "step": 5686 + }, + { + "epoch": 32.497142857142855, + "grad_norm": 18.785585403442383, + "learning_rate": 1.944761904761905e-05, + "loss": 0.2197, + "step": 5687 + }, + { + "epoch": 32.502857142857145, + "grad_norm": 28.99616241455078, + "learning_rate": 1.9441269841269845e-05, + "loss": 0.2276, + "step": 5688 + }, + { + "epoch": 32.50857142857143, + "grad_norm": 41.807281494140625, + "learning_rate": 1.9434920634920638e-05, + "loss": 0.1598, + "step": 5689 + }, + { + "epoch": 32.51428571428571, + "grad_norm": 19.651777267456055, + "learning_rate": 1.942857142857143e-05, + "loss": 0.2166, + "step": 5690 + }, + { + "epoch": 32.52, + "grad_norm": 102.34699249267578, + "learning_rate": 1.9422222222222223e-05, + "loss": 0.183, + "step": 5691 + }, + { + "epoch": 32.52571428571429, + "grad_norm": 17.926746368408203, + "learning_rate": 1.941587301587302e-05, + "loss": 0.1883, + "step": 5692 + }, + { + "epoch": 32.53142857142857, + "grad_norm": 45.363197326660156, + "learning_rate": 1.940952380952381e-05, + "loss": 0.2435, + "step": 5693 + }, + { + "epoch": 32.537142857142854, + "grad_norm": 80.7637939453125, + "learning_rate": 1.9403174603174604e-05, + "loss": 0.217, + "step": 5694 + }, + { + "epoch": 32.542857142857144, + "grad_norm": 268.4298095703125, + "learning_rate": 1.9396825396825397e-05, + "loss": 0.2071, + "step": 5695 + }, + { + "epoch": 32.54857142857143, + "grad_norm": 306.17010498046875, + "learning_rate": 1.9390476190476193e-05, + "loss": 0.3023, + "step": 5696 + }, + { + "epoch": 32.55428571428571, + "grad_norm": 35.1732177734375, + "learning_rate": 1.9384126984126986e-05, + "loss": 0.1646, + "step": 5697 + }, + { + "epoch": 32.56, + "grad_norm": 46.12312698364258, + "learning_rate": 1.9377777777777778e-05, + "loss": 0.2559, + "step": 5698 + }, + { + "epoch": 32.565714285714286, + "grad_norm": 213.47662353515625, + "learning_rate": 1.9371428571428574e-05, + "loss": 0.23, + "step": 5699 + }, + { + "epoch": 32.57142857142857, + "grad_norm": 365.5625, + "learning_rate": 1.9365079365079367e-05, + "loss": 0.2685, + "step": 5700 + }, + { + "epoch": 32.57714285714286, + "grad_norm": 49.27860641479492, + "learning_rate": 1.935873015873016e-05, + "loss": 0.2247, + "step": 5701 + }, + { + "epoch": 32.582857142857144, + "grad_norm": 62.4014892578125, + "learning_rate": 1.9352380952380952e-05, + "loss": 0.2656, + "step": 5702 + }, + { + "epoch": 32.58857142857143, + "grad_norm": 56.03572463989258, + "learning_rate": 1.9346031746031748e-05, + "loss": 0.1949, + "step": 5703 + }, + { + "epoch": 32.59428571428572, + "grad_norm": 37.72709274291992, + "learning_rate": 1.933968253968254e-05, + "loss": 0.2318, + "step": 5704 + }, + { + "epoch": 32.6, + "grad_norm": 55.7191162109375, + "learning_rate": 1.9333333333333333e-05, + "loss": 0.2583, + "step": 5705 + }, + { + "epoch": 32.605714285714285, + "grad_norm": 35.487213134765625, + "learning_rate": 1.9326984126984126e-05, + "loss": 0.2064, + "step": 5706 + }, + { + "epoch": 32.61142857142857, + "grad_norm": 49.100337982177734, + "learning_rate": 1.9320634920634922e-05, + "loss": 0.2311, + "step": 5707 + }, + { + "epoch": 32.61714285714286, + "grad_norm": 63.767578125, + "learning_rate": 1.9314285714285715e-05, + "loss": 0.3147, + "step": 5708 + }, + { + "epoch": 32.62285714285714, + "grad_norm": 25.621097564697266, + "learning_rate": 1.9307936507936507e-05, + "loss": 0.1902, + "step": 5709 + }, + { + "epoch": 32.628571428571426, + "grad_norm": 52.18640899658203, + "learning_rate": 1.9301587301587303e-05, + "loss": 0.2201, + "step": 5710 + }, + { + "epoch": 32.63428571428572, + "grad_norm": 74.32487487792969, + "learning_rate": 1.9295238095238096e-05, + "loss": 0.2006, + "step": 5711 + }, + { + "epoch": 32.64, + "grad_norm": 42.27890396118164, + "learning_rate": 1.928888888888889e-05, + "loss": 0.305, + "step": 5712 + }, + { + "epoch": 32.645714285714284, + "grad_norm": 44.3375358581543, + "learning_rate": 1.928253968253968e-05, + "loss": 0.1867, + "step": 5713 + }, + { + "epoch": 32.651428571428575, + "grad_norm": 26.97228240966797, + "learning_rate": 1.9276190476190477e-05, + "loss": 0.2139, + "step": 5714 + }, + { + "epoch": 32.65714285714286, + "grad_norm": 30.574569702148438, + "learning_rate": 1.926984126984127e-05, + "loss": 0.1768, + "step": 5715 + }, + { + "epoch": 32.66285714285714, + "grad_norm": 31.63504409790039, + "learning_rate": 1.9263492063492063e-05, + "loss": 0.2208, + "step": 5716 + }, + { + "epoch": 32.668571428571425, + "grad_norm": 37.90189743041992, + "learning_rate": 1.9257142857142855e-05, + "loss": 0.2087, + "step": 5717 + }, + { + "epoch": 32.674285714285716, + "grad_norm": 78.9525375366211, + "learning_rate": 1.925079365079365e-05, + "loss": 0.2433, + "step": 5718 + }, + { + "epoch": 32.68, + "grad_norm": 564.4703979492188, + "learning_rate": 1.9244444444444444e-05, + "loss": 0.2879, + "step": 5719 + }, + { + "epoch": 32.68571428571428, + "grad_norm": 39.07098388671875, + "learning_rate": 1.923809523809524e-05, + "loss": 0.3621, + "step": 5720 + }, + { + "epoch": 32.691428571428574, + "grad_norm": 30.343294143676758, + "learning_rate": 1.9231746031746033e-05, + "loss": 0.2439, + "step": 5721 + }, + { + "epoch": 32.69714285714286, + "grad_norm": 94.84039306640625, + "learning_rate": 1.922539682539683e-05, + "loss": 0.2671, + "step": 5722 + }, + { + "epoch": 32.70285714285714, + "grad_norm": 945.572998046875, + "learning_rate": 1.921904761904762e-05, + "loss": 0.2558, + "step": 5723 + }, + { + "epoch": 32.70857142857143, + "grad_norm": 60.11537551879883, + "learning_rate": 1.9212698412698414e-05, + "loss": 0.3213, + "step": 5724 + }, + { + "epoch": 32.714285714285715, + "grad_norm": 68.07587432861328, + "learning_rate": 1.920634920634921e-05, + "loss": 0.1808, + "step": 5725 + }, + { + "epoch": 32.72, + "grad_norm": 155.82656860351562, + "learning_rate": 1.9200000000000003e-05, + "loss": 0.3225, + "step": 5726 + }, + { + "epoch": 32.72571428571428, + "grad_norm": 24.076765060424805, + "learning_rate": 1.9193650793650795e-05, + "loss": 0.2316, + "step": 5727 + }, + { + "epoch": 32.73142857142857, + "grad_norm": 36.95673370361328, + "learning_rate": 1.9187301587301588e-05, + "loss": 0.2036, + "step": 5728 + }, + { + "epoch": 32.73714285714286, + "grad_norm": 18.792818069458008, + "learning_rate": 1.9180952380952384e-05, + "loss": 0.2172, + "step": 5729 + }, + { + "epoch": 32.74285714285714, + "grad_norm": 55.471466064453125, + "learning_rate": 1.9174603174603176e-05, + "loss": 0.2202, + "step": 5730 + }, + { + "epoch": 32.74857142857143, + "grad_norm": 81.36102294921875, + "learning_rate": 1.916825396825397e-05, + "loss": 0.2538, + "step": 5731 + }, + { + "epoch": 32.754285714285714, + "grad_norm": 75.76774597167969, + "learning_rate": 1.9161904761904762e-05, + "loss": 0.2875, + "step": 5732 + }, + { + "epoch": 32.76, + "grad_norm": 59.79932403564453, + "learning_rate": 1.9155555555555558e-05, + "loss": 0.3493, + "step": 5733 + }, + { + "epoch": 32.76571428571429, + "grad_norm": 55.21664810180664, + "learning_rate": 1.914920634920635e-05, + "loss": 0.1894, + "step": 5734 + }, + { + "epoch": 32.77142857142857, + "grad_norm": 24.527069091796875, + "learning_rate": 1.9142857142857143e-05, + "loss": 0.2233, + "step": 5735 + }, + { + "epoch": 32.777142857142856, + "grad_norm": 36.4738655090332, + "learning_rate": 1.913650793650794e-05, + "loss": 0.1894, + "step": 5736 + }, + { + "epoch": 32.78285714285714, + "grad_norm": 85.22257232666016, + "learning_rate": 1.913015873015873e-05, + "loss": 0.2481, + "step": 5737 + }, + { + "epoch": 32.78857142857143, + "grad_norm": 61.35136413574219, + "learning_rate": 1.9123809523809524e-05, + "loss": 0.2234, + "step": 5738 + }, + { + "epoch": 32.794285714285714, + "grad_norm": 47.99897384643555, + "learning_rate": 1.9117460317460317e-05, + "loss": 0.2531, + "step": 5739 + }, + { + "epoch": 32.8, + "grad_norm": 30.050312042236328, + "learning_rate": 1.9111111111111113e-05, + "loss": 0.2414, + "step": 5740 + }, + { + "epoch": 32.80571428571429, + "grad_norm": 62.89798355102539, + "learning_rate": 1.9104761904761906e-05, + "loss": 0.2529, + "step": 5741 + }, + { + "epoch": 32.81142857142857, + "grad_norm": 35.83088302612305, + "learning_rate": 1.9098412698412698e-05, + "loss": 0.2205, + "step": 5742 + }, + { + "epoch": 32.817142857142855, + "grad_norm": 49.34012222290039, + "learning_rate": 1.909206349206349e-05, + "loss": 0.2036, + "step": 5743 + }, + { + "epoch": 32.822857142857146, + "grad_norm": 104.82341003417969, + "learning_rate": 1.9085714285714287e-05, + "loss": 0.2899, + "step": 5744 + }, + { + "epoch": 32.82857142857143, + "grad_norm": 26.515548706054688, + "learning_rate": 1.907936507936508e-05, + "loss": 0.2559, + "step": 5745 + }, + { + "epoch": 32.83428571428571, + "grad_norm": 27.458879470825195, + "learning_rate": 1.9073015873015872e-05, + "loss": 0.207, + "step": 5746 + }, + { + "epoch": 32.84, + "grad_norm": 70.85945892333984, + "learning_rate": 1.9066666666666668e-05, + "loss": 0.2689, + "step": 5747 + }, + { + "epoch": 32.84571428571429, + "grad_norm": 93.12171936035156, + "learning_rate": 1.906031746031746e-05, + "loss": 0.278, + "step": 5748 + }, + { + "epoch": 32.85142857142857, + "grad_norm": 65.2746353149414, + "learning_rate": 1.9053968253968253e-05, + "loss": 0.263, + "step": 5749 + }, + { + "epoch": 32.857142857142854, + "grad_norm": 60.12397766113281, + "learning_rate": 1.9047619047619046e-05, + "loss": 0.3596, + "step": 5750 + }, + { + "epoch": 32.862857142857145, + "grad_norm": 74.2511978149414, + "learning_rate": 1.9041269841269842e-05, + "loss": 0.1573, + "step": 5751 + }, + { + "epoch": 32.86857142857143, + "grad_norm": 42.45563888549805, + "learning_rate": 1.9034920634920635e-05, + "loss": 0.1389, + "step": 5752 + }, + { + "epoch": 32.87428571428571, + "grad_norm": 69.6099624633789, + "learning_rate": 1.9028571428571427e-05, + "loss": 0.2109, + "step": 5753 + }, + { + "epoch": 32.88, + "grad_norm": 95.45622253417969, + "learning_rate": 1.9022222222222223e-05, + "loss": 0.1731, + "step": 5754 + }, + { + "epoch": 32.885714285714286, + "grad_norm": 42.497886657714844, + "learning_rate": 1.9015873015873016e-05, + "loss": 0.2298, + "step": 5755 + }, + { + "epoch": 32.89142857142857, + "grad_norm": 56.04786682128906, + "learning_rate": 1.9009523809523812e-05, + "loss": 0.2151, + "step": 5756 + }, + { + "epoch": 32.89714285714286, + "grad_norm": 25.797842025756836, + "learning_rate": 1.9003174603174605e-05, + "loss": 0.2009, + "step": 5757 + }, + { + "epoch": 32.902857142857144, + "grad_norm": 30.58416748046875, + "learning_rate": 1.8996825396825397e-05, + "loss": 0.2266, + "step": 5758 + }, + { + "epoch": 32.90857142857143, + "grad_norm": 62.21387481689453, + "learning_rate": 1.8990476190476193e-05, + "loss": 0.2812, + "step": 5759 + }, + { + "epoch": 32.91428571428571, + "grad_norm": 67.15927124023438, + "learning_rate": 1.8984126984126986e-05, + "loss": 0.536, + "step": 5760 + }, + { + "epoch": 32.92, + "grad_norm": 33.9781379699707, + "learning_rate": 1.897777777777778e-05, + "loss": 0.2723, + "step": 5761 + }, + { + "epoch": 32.925714285714285, + "grad_norm": 29.440732955932617, + "learning_rate": 1.8971428571428575e-05, + "loss": 0.1815, + "step": 5762 + }, + { + "epoch": 32.93142857142857, + "grad_norm": 21.851028442382812, + "learning_rate": 1.8965079365079367e-05, + "loss": 0.26, + "step": 5763 + }, + { + "epoch": 32.93714285714286, + "grad_norm": 36.22255325317383, + "learning_rate": 1.895873015873016e-05, + "loss": 0.2044, + "step": 5764 + }, + { + "epoch": 32.94285714285714, + "grad_norm": 35.30541229248047, + "learning_rate": 1.8952380952380953e-05, + "loss": 0.2584, + "step": 5765 + }, + { + "epoch": 32.94857142857143, + "grad_norm": 164.14581298828125, + "learning_rate": 1.894603174603175e-05, + "loss": 0.2357, + "step": 5766 + }, + { + "epoch": 32.95428571428572, + "grad_norm": 66.16490173339844, + "learning_rate": 1.893968253968254e-05, + "loss": 0.2905, + "step": 5767 + }, + { + "epoch": 32.96, + "grad_norm": 122.70243072509766, + "learning_rate": 1.8933333333333334e-05, + "loss": 0.2421, + "step": 5768 + }, + { + "epoch": 32.965714285714284, + "grad_norm": 33.81968307495117, + "learning_rate": 1.892698412698413e-05, + "loss": 0.2856, + "step": 5769 + }, + { + "epoch": 32.97142857142857, + "grad_norm": 370.8056945800781, + "learning_rate": 1.8920634920634923e-05, + "loss": 0.2989, + "step": 5770 + }, + { + "epoch": 32.97714285714286, + "grad_norm": 56.27879333496094, + "learning_rate": 1.8914285714285715e-05, + "loss": 0.2699, + "step": 5771 + }, + { + "epoch": 32.98285714285714, + "grad_norm": 28.76259422302246, + "learning_rate": 1.8907936507936508e-05, + "loss": 0.3926, + "step": 5772 + }, + { + "epoch": 32.988571428571426, + "grad_norm": 53.48399353027344, + "learning_rate": 1.8901587301587304e-05, + "loss": 0.2002, + "step": 5773 + }, + { + "epoch": 32.994285714285716, + "grad_norm": 38.394779205322266, + "learning_rate": 1.8895238095238096e-05, + "loss": 0.2, + "step": 5774 + }, + { + "epoch": 33.0, + "grad_norm": 173.07803344726562, + "learning_rate": 1.888888888888889e-05, + "loss": 0.2864, + "step": 5775 + }, + { + "epoch": 33.0, + "eval_classes": 0, + "eval_loss": 0.5889254212379456, + "eval_map": 0.9286, + "eval_map_50": 0.9623, + "eval_map_75": 0.9582, + "eval_map_large": 0.9287, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9286, + "eval_map_small": -1.0, + "eval_mar_1": 0.7829, + "eval_mar_10": 0.9733, + "eval_mar_100": 0.9759, + "eval_mar_100_per_class": 0.9759, + "eval_mar_large": 0.9759, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.1098, + "eval_samples_per_second": 22.426, + "eval_steps_per_second": 2.822, + "step": 5775 + }, + { + "epoch": 33.005714285714284, + "grad_norm": 61.6531867980957, + "learning_rate": 1.8882539682539682e-05, + "loss": 0.1863, + "step": 5776 + }, + { + "epoch": 33.011428571428574, + "grad_norm": 85.87842559814453, + "learning_rate": 1.8876190476190478e-05, + "loss": 0.1609, + "step": 5777 + }, + { + "epoch": 33.01714285714286, + "grad_norm": 66.04412078857422, + "learning_rate": 1.886984126984127e-05, + "loss": 0.4112, + "step": 5778 + }, + { + "epoch": 33.02285714285714, + "grad_norm": 59.694461822509766, + "learning_rate": 1.8863492063492063e-05, + "loss": 0.1821, + "step": 5779 + }, + { + "epoch": 33.02857142857143, + "grad_norm": 48.2636833190918, + "learning_rate": 1.885714285714286e-05, + "loss": 0.3267, + "step": 5780 + }, + { + "epoch": 33.034285714285716, + "grad_norm": 328.946044921875, + "learning_rate": 1.8850793650793652e-05, + "loss": 0.2647, + "step": 5781 + }, + { + "epoch": 33.04, + "grad_norm": 28.746435165405273, + "learning_rate": 1.8844444444444444e-05, + "loss": 0.2212, + "step": 5782 + }, + { + "epoch": 33.04571428571428, + "grad_norm": 28.282203674316406, + "learning_rate": 1.8838095238095237e-05, + "loss": 0.1856, + "step": 5783 + }, + { + "epoch": 33.05142857142857, + "grad_norm": 31.544275283813477, + "learning_rate": 1.8831746031746033e-05, + "loss": 0.2117, + "step": 5784 + }, + { + "epoch": 33.05714285714286, + "grad_norm": 40.49601364135742, + "learning_rate": 1.8825396825396826e-05, + "loss": 0.1419, + "step": 5785 + }, + { + "epoch": 33.06285714285714, + "grad_norm": 46.23110580444336, + "learning_rate": 1.8819047619047618e-05, + "loss": 0.2121, + "step": 5786 + }, + { + "epoch": 33.06857142857143, + "grad_norm": 44.122901916503906, + "learning_rate": 1.881269841269841e-05, + "loss": 0.2214, + "step": 5787 + }, + { + "epoch": 33.074285714285715, + "grad_norm": 23.44615936279297, + "learning_rate": 1.8806349206349207e-05, + "loss": 0.2412, + "step": 5788 + }, + { + "epoch": 33.08, + "grad_norm": 50.15841293334961, + "learning_rate": 1.88e-05, + "loss": 0.2051, + "step": 5789 + }, + { + "epoch": 33.08571428571429, + "grad_norm": 322.5673522949219, + "learning_rate": 1.8793650793650792e-05, + "loss": 0.2591, + "step": 5790 + }, + { + "epoch": 33.09142857142857, + "grad_norm": 218.38636779785156, + "learning_rate": 1.8787301587301588e-05, + "loss": 0.2429, + "step": 5791 + }, + { + "epoch": 33.097142857142856, + "grad_norm": 41.37594223022461, + "learning_rate": 1.878095238095238e-05, + "loss": 0.2061, + "step": 5792 + }, + { + "epoch": 33.10285714285714, + "grad_norm": 39.84482192993164, + "learning_rate": 1.8774603174603177e-05, + "loss": 0.1754, + "step": 5793 + }, + { + "epoch": 33.10857142857143, + "grad_norm": 48.588802337646484, + "learning_rate": 1.876825396825397e-05, + "loss": 0.2415, + "step": 5794 + }, + { + "epoch": 33.114285714285714, + "grad_norm": 45.063846588134766, + "learning_rate": 1.8761904761904766e-05, + "loss": 0.275, + "step": 5795 + }, + { + "epoch": 33.12, + "grad_norm": 55.40730667114258, + "learning_rate": 1.8755555555555558e-05, + "loss": 0.206, + "step": 5796 + }, + { + "epoch": 33.12571428571429, + "grad_norm": 32.80923080444336, + "learning_rate": 1.874920634920635e-05, + "loss": 0.1894, + "step": 5797 + }, + { + "epoch": 33.13142857142857, + "grad_norm": 28.337350845336914, + "learning_rate": 1.8742857142857143e-05, + "loss": 0.2787, + "step": 5798 + }, + { + "epoch": 33.137142857142855, + "grad_norm": 34.23060607910156, + "learning_rate": 1.873650793650794e-05, + "loss": 0.2507, + "step": 5799 + }, + { + "epoch": 33.142857142857146, + "grad_norm": 824.5645141601562, + "learning_rate": 1.8730158730158732e-05, + "loss": 0.3496, + "step": 5800 + }, + { + "epoch": 33.14857142857143, + "grad_norm": 46.26865005493164, + "learning_rate": 1.8723809523809525e-05, + "loss": 0.2216, + "step": 5801 + }, + { + "epoch": 33.15428571428571, + "grad_norm": 90.3018569946289, + "learning_rate": 1.871746031746032e-05, + "loss": 0.3366, + "step": 5802 + }, + { + "epoch": 33.16, + "grad_norm": 41.25313186645508, + "learning_rate": 1.8711111111111113e-05, + "loss": 0.1777, + "step": 5803 + }, + { + "epoch": 33.16571428571429, + "grad_norm": 37.36777877807617, + "learning_rate": 1.8704761904761906e-05, + "loss": 0.3449, + "step": 5804 + }, + { + "epoch": 33.17142857142857, + "grad_norm": 30.33721351623535, + "learning_rate": 1.86984126984127e-05, + "loss": 0.2448, + "step": 5805 + }, + { + "epoch": 33.177142857142854, + "grad_norm": 41.4080696105957, + "learning_rate": 1.8692063492063495e-05, + "loss": 0.1894, + "step": 5806 + }, + { + "epoch": 33.182857142857145, + "grad_norm": 23.389171600341797, + "learning_rate": 1.8685714285714287e-05, + "loss": 0.1878, + "step": 5807 + }, + { + "epoch": 33.18857142857143, + "grad_norm": 33.94114685058594, + "learning_rate": 1.867936507936508e-05, + "loss": 0.2903, + "step": 5808 + }, + { + "epoch": 33.19428571428571, + "grad_norm": 38.92851638793945, + "learning_rate": 1.8673015873015873e-05, + "loss": 0.2322, + "step": 5809 + }, + { + "epoch": 33.2, + "grad_norm": 26.486780166625977, + "learning_rate": 1.866666666666667e-05, + "loss": 0.2645, + "step": 5810 + }, + { + "epoch": 33.205714285714286, + "grad_norm": 79.34441375732422, + "learning_rate": 1.866031746031746e-05, + "loss": 0.2896, + "step": 5811 + }, + { + "epoch": 33.21142857142857, + "grad_norm": 43.19795227050781, + "learning_rate": 1.8653968253968254e-05, + "loss": 0.3704, + "step": 5812 + }, + { + "epoch": 33.21714285714286, + "grad_norm": 56.158416748046875, + "learning_rate": 1.864761904761905e-05, + "loss": 0.2275, + "step": 5813 + }, + { + "epoch": 33.222857142857144, + "grad_norm": 44.3139762878418, + "learning_rate": 1.8641269841269843e-05, + "loss": 0.3289, + "step": 5814 + }, + { + "epoch": 33.22857142857143, + "grad_norm": 30.506383895874023, + "learning_rate": 1.8634920634920635e-05, + "loss": 0.2183, + "step": 5815 + }, + { + "epoch": 33.23428571428571, + "grad_norm": 61.62275695800781, + "learning_rate": 1.8628571428571428e-05, + "loss": 0.3011, + "step": 5816 + }, + { + "epoch": 33.24, + "grad_norm": 31.950824737548828, + "learning_rate": 1.8622222222222224e-05, + "loss": 0.2199, + "step": 5817 + }, + { + "epoch": 33.245714285714286, + "grad_norm": 39.46795654296875, + "learning_rate": 1.8615873015873017e-05, + "loss": 0.1838, + "step": 5818 + }, + { + "epoch": 33.25142857142857, + "grad_norm": 201.7964324951172, + "learning_rate": 1.860952380952381e-05, + "loss": 0.2733, + "step": 5819 + }, + { + "epoch": 33.25714285714286, + "grad_norm": 35.36155700683594, + "learning_rate": 1.8603174603174602e-05, + "loss": 0.1887, + "step": 5820 + }, + { + "epoch": 33.26285714285714, + "grad_norm": 45.871803283691406, + "learning_rate": 1.8596825396825398e-05, + "loss": 0.1908, + "step": 5821 + }, + { + "epoch": 33.26857142857143, + "grad_norm": 32.87693786621094, + "learning_rate": 1.859047619047619e-05, + "loss": 0.2116, + "step": 5822 + }, + { + "epoch": 33.27428571428572, + "grad_norm": 37.83621597290039, + "learning_rate": 1.8584126984126983e-05, + "loss": 0.2422, + "step": 5823 + }, + { + "epoch": 33.28, + "grad_norm": 173.2474822998047, + "learning_rate": 1.8577777777777776e-05, + "loss": 0.2098, + "step": 5824 + }, + { + "epoch": 33.285714285714285, + "grad_norm": 69.15506744384766, + "learning_rate": 1.8571428571428572e-05, + "loss": 0.278, + "step": 5825 + }, + { + "epoch": 33.29142857142857, + "grad_norm": 36.939918518066406, + "learning_rate": 1.8565079365079364e-05, + "loss": 0.214, + "step": 5826 + }, + { + "epoch": 33.29714285714286, + "grad_norm": 54.35163116455078, + "learning_rate": 1.855873015873016e-05, + "loss": 0.3314, + "step": 5827 + }, + { + "epoch": 33.30285714285714, + "grad_norm": 55.574005126953125, + "learning_rate": 1.8552380952380953e-05, + "loss": 0.2441, + "step": 5828 + }, + { + "epoch": 33.308571428571426, + "grad_norm": 27.20143699645996, + "learning_rate": 1.8546031746031746e-05, + "loss": 0.3751, + "step": 5829 + }, + { + "epoch": 33.31428571428572, + "grad_norm": 126.8397445678711, + "learning_rate": 1.853968253968254e-05, + "loss": 0.3334, + "step": 5830 + }, + { + "epoch": 33.32, + "grad_norm": 38.47798156738281, + "learning_rate": 1.8533333333333334e-05, + "loss": 0.1912, + "step": 5831 + }, + { + "epoch": 33.325714285714284, + "grad_norm": 69.825927734375, + "learning_rate": 1.852698412698413e-05, + "loss": 0.3188, + "step": 5832 + }, + { + "epoch": 33.331428571428575, + "grad_norm": 42.98884582519531, + "learning_rate": 1.8520634920634923e-05, + "loss": 0.3282, + "step": 5833 + }, + { + "epoch": 33.33714285714286, + "grad_norm": 59.24766159057617, + "learning_rate": 1.8514285714285716e-05, + "loss": 0.2309, + "step": 5834 + }, + { + "epoch": 33.34285714285714, + "grad_norm": 69.93727111816406, + "learning_rate": 1.8507936507936508e-05, + "loss": 0.216, + "step": 5835 + }, + { + "epoch": 33.348571428571425, + "grad_norm": 316.0108337402344, + "learning_rate": 1.8501587301587304e-05, + "loss": 0.331, + "step": 5836 + }, + { + "epoch": 33.354285714285716, + "grad_norm": 78.41392517089844, + "learning_rate": 1.8495238095238097e-05, + "loss": 0.3179, + "step": 5837 + }, + { + "epoch": 33.36, + "grad_norm": 33.232337951660156, + "learning_rate": 1.848888888888889e-05, + "loss": 0.2548, + "step": 5838 + }, + { + "epoch": 33.36571428571428, + "grad_norm": 41.12696838378906, + "learning_rate": 1.8482539682539686e-05, + "loss": 0.2283, + "step": 5839 + }, + { + "epoch": 33.371428571428574, + "grad_norm": 30.386310577392578, + "learning_rate": 1.8476190476190478e-05, + "loss": 0.2291, + "step": 5840 + }, + { + "epoch": 33.37714285714286, + "grad_norm": 19.18267250061035, + "learning_rate": 1.846984126984127e-05, + "loss": 0.2395, + "step": 5841 + }, + { + "epoch": 33.38285714285714, + "grad_norm": 43.62106704711914, + "learning_rate": 1.8463492063492063e-05, + "loss": 0.1517, + "step": 5842 + }, + { + "epoch": 33.38857142857143, + "grad_norm": 37.835575103759766, + "learning_rate": 1.845714285714286e-05, + "loss": 0.1752, + "step": 5843 + }, + { + "epoch": 33.394285714285715, + "grad_norm": 36.11626434326172, + "learning_rate": 1.8450793650793652e-05, + "loss": 0.1825, + "step": 5844 + }, + { + "epoch": 33.4, + "grad_norm": 65.71627044677734, + "learning_rate": 1.8444444444444445e-05, + "loss": 0.2111, + "step": 5845 + }, + { + "epoch": 33.40571428571428, + "grad_norm": 52.04588317871094, + "learning_rate": 1.8438095238095237e-05, + "loss": 0.2537, + "step": 5846 + }, + { + "epoch": 33.41142857142857, + "grad_norm": 35.33464813232422, + "learning_rate": 1.8431746031746033e-05, + "loss": 0.1835, + "step": 5847 + }, + { + "epoch": 33.417142857142856, + "grad_norm": 45.317501068115234, + "learning_rate": 1.8425396825396826e-05, + "loss": 0.2132, + "step": 5848 + }, + { + "epoch": 33.42285714285714, + "grad_norm": 46.01725769042969, + "learning_rate": 1.841904761904762e-05, + "loss": 0.2026, + "step": 5849 + }, + { + "epoch": 33.42857142857143, + "grad_norm": 46.28281021118164, + "learning_rate": 1.8412698412698415e-05, + "loss": 0.2128, + "step": 5850 + }, + { + "epoch": 33.434285714285714, + "grad_norm": 30.394298553466797, + "learning_rate": 1.8406349206349207e-05, + "loss": 0.2085, + "step": 5851 + }, + { + "epoch": 33.44, + "grad_norm": 51.70058822631836, + "learning_rate": 1.84e-05, + "loss": 0.2591, + "step": 5852 + }, + { + "epoch": 33.44571428571429, + "grad_norm": 48.758636474609375, + "learning_rate": 1.8393650793650793e-05, + "loss": 0.2412, + "step": 5853 + }, + { + "epoch": 33.45142857142857, + "grad_norm": 21.338415145874023, + "learning_rate": 1.838730158730159e-05, + "loss": 0.2175, + "step": 5854 + }, + { + "epoch": 33.457142857142856, + "grad_norm": 40.013221740722656, + "learning_rate": 1.838095238095238e-05, + "loss": 0.2884, + "step": 5855 + }, + { + "epoch": 33.462857142857146, + "grad_norm": 28.742542266845703, + "learning_rate": 1.8374603174603174e-05, + "loss": 0.2486, + "step": 5856 + }, + { + "epoch": 33.46857142857143, + "grad_norm": 31.17852783203125, + "learning_rate": 1.8368253968253967e-05, + "loss": 0.2581, + "step": 5857 + }, + { + "epoch": 33.47428571428571, + "grad_norm": 24.765287399291992, + "learning_rate": 1.8361904761904763e-05, + "loss": 0.2704, + "step": 5858 + }, + { + "epoch": 33.48, + "grad_norm": 85.52215576171875, + "learning_rate": 1.8355555555555555e-05, + "loss": 0.1736, + "step": 5859 + }, + { + "epoch": 33.48571428571429, + "grad_norm": 74.87712860107422, + "learning_rate": 1.8349206349206348e-05, + "loss": 0.1951, + "step": 5860 + }, + { + "epoch": 33.49142857142857, + "grad_norm": 60.96209716796875, + "learning_rate": 1.8342857142857144e-05, + "loss": 0.2245, + "step": 5861 + }, + { + "epoch": 33.497142857142855, + "grad_norm": 39.256160736083984, + "learning_rate": 1.8336507936507937e-05, + "loss": 0.2472, + "step": 5862 + }, + { + "epoch": 33.502857142857145, + "grad_norm": 57.78693771362305, + "learning_rate": 1.833015873015873e-05, + "loss": 0.1854, + "step": 5863 + }, + { + "epoch": 33.50857142857143, + "grad_norm": 46.70375061035156, + "learning_rate": 1.8323809523809525e-05, + "loss": 0.2039, + "step": 5864 + }, + { + "epoch": 33.51428571428571, + "grad_norm": 26.03221893310547, + "learning_rate": 1.8317460317460318e-05, + "loss": 0.2059, + "step": 5865 + }, + { + "epoch": 33.52, + "grad_norm": 468.8216247558594, + "learning_rate": 1.8311111111111114e-05, + "loss": 0.295, + "step": 5866 + }, + { + "epoch": 33.52571428571429, + "grad_norm": 65.85757446289062, + "learning_rate": 1.8304761904761906e-05, + "loss": 0.219, + "step": 5867 + }, + { + "epoch": 33.53142857142857, + "grad_norm": 105.2874984741211, + "learning_rate": 1.82984126984127e-05, + "loss": 0.251, + "step": 5868 + }, + { + "epoch": 33.537142857142854, + "grad_norm": 39.05388641357422, + "learning_rate": 1.8292063492063495e-05, + "loss": 0.2449, + "step": 5869 + }, + { + "epoch": 33.542857142857144, + "grad_norm": 23.31039047241211, + "learning_rate": 1.8285714285714288e-05, + "loss": 0.2333, + "step": 5870 + }, + { + "epoch": 33.54857142857143, + "grad_norm": 966.0343017578125, + "learning_rate": 1.827936507936508e-05, + "loss": 0.2797, + "step": 5871 + }, + { + "epoch": 33.55428571428571, + "grad_norm": 28.16424560546875, + "learning_rate": 1.8273015873015876e-05, + "loss": 0.2148, + "step": 5872 + }, + { + "epoch": 33.56, + "grad_norm": 486.0005187988281, + "learning_rate": 1.826666666666667e-05, + "loss": 0.2506, + "step": 5873 + }, + { + "epoch": 33.565714285714286, + "grad_norm": 132.05374145507812, + "learning_rate": 1.8260317460317462e-05, + "loss": 0.2897, + "step": 5874 + }, + { + "epoch": 33.57142857142857, + "grad_norm": 31.29085922241211, + "learning_rate": 1.8253968253968254e-05, + "loss": 0.2292, + "step": 5875 + }, + { + "epoch": 33.57714285714286, + "grad_norm": 41.23748016357422, + "learning_rate": 1.824761904761905e-05, + "loss": 0.1858, + "step": 5876 + }, + { + "epoch": 33.582857142857144, + "grad_norm": 52.48597717285156, + "learning_rate": 1.8241269841269843e-05, + "loss": 0.2425, + "step": 5877 + }, + { + "epoch": 33.58857142857143, + "grad_norm": 49.15097427368164, + "learning_rate": 1.8234920634920636e-05, + "loss": 0.2167, + "step": 5878 + }, + { + "epoch": 33.59428571428572, + "grad_norm": 94.56541442871094, + "learning_rate": 1.8228571428571428e-05, + "loss": 0.309, + "step": 5879 + }, + { + "epoch": 33.6, + "grad_norm": 113.2981948852539, + "learning_rate": 1.8222222222222224e-05, + "loss": 0.1997, + "step": 5880 + }, + { + "epoch": 33.605714285714285, + "grad_norm": 258.0194396972656, + "learning_rate": 1.8215873015873017e-05, + "loss": 0.3929, + "step": 5881 + }, + { + "epoch": 33.61142857142857, + "grad_norm": 33.491188049316406, + "learning_rate": 1.820952380952381e-05, + "loss": 0.1989, + "step": 5882 + }, + { + "epoch": 33.61714285714286, + "grad_norm": 49.691864013671875, + "learning_rate": 1.8203174603174606e-05, + "loss": 0.1652, + "step": 5883 + }, + { + "epoch": 33.62285714285714, + "grad_norm": 20.83930015563965, + "learning_rate": 1.8196825396825398e-05, + "loss": 0.1858, + "step": 5884 + }, + { + "epoch": 33.628571428571426, + "grad_norm": 60.397544860839844, + "learning_rate": 1.819047619047619e-05, + "loss": 0.2188, + "step": 5885 + }, + { + "epoch": 33.63428571428572, + "grad_norm": 51.880409240722656, + "learning_rate": 1.8184126984126984e-05, + "loss": 0.3225, + "step": 5886 + }, + { + "epoch": 33.64, + "grad_norm": 130.122314453125, + "learning_rate": 1.817777777777778e-05, + "loss": 0.2302, + "step": 5887 + }, + { + "epoch": 33.645714285714284, + "grad_norm": 61.00599670410156, + "learning_rate": 1.8171428571428572e-05, + "loss": 0.181, + "step": 5888 + }, + { + "epoch": 33.651428571428575, + "grad_norm": 34.79129409790039, + "learning_rate": 1.8165079365079365e-05, + "loss": 0.1801, + "step": 5889 + }, + { + "epoch": 33.65714285714286, + "grad_norm": 40.54106521606445, + "learning_rate": 1.8158730158730157e-05, + "loss": 0.2039, + "step": 5890 + }, + { + "epoch": 33.66285714285714, + "grad_norm": 84.70269012451172, + "learning_rate": 1.8152380952380953e-05, + "loss": 0.1887, + "step": 5891 + }, + { + "epoch": 33.668571428571425, + "grad_norm": 70.48677825927734, + "learning_rate": 1.8146031746031746e-05, + "loss": 0.2872, + "step": 5892 + }, + { + "epoch": 33.674285714285716, + "grad_norm": 49.539405822753906, + "learning_rate": 1.813968253968254e-05, + "loss": 0.1647, + "step": 5893 + }, + { + "epoch": 33.68, + "grad_norm": 23.737159729003906, + "learning_rate": 1.8133333333333335e-05, + "loss": 0.2219, + "step": 5894 + }, + { + "epoch": 33.68571428571428, + "grad_norm": 20.998498916625977, + "learning_rate": 1.8126984126984127e-05, + "loss": 0.2741, + "step": 5895 + }, + { + "epoch": 33.691428571428574, + "grad_norm": 33.75244140625, + "learning_rate": 1.812063492063492e-05, + "loss": 0.1634, + "step": 5896 + }, + { + "epoch": 33.69714285714286, + "grad_norm": 279.3129577636719, + "learning_rate": 1.8114285714285713e-05, + "loss": 0.2181, + "step": 5897 + }, + { + "epoch": 33.70285714285714, + "grad_norm": 19.29251480102539, + "learning_rate": 1.810793650793651e-05, + "loss": 0.2423, + "step": 5898 + }, + { + "epoch": 33.70857142857143, + "grad_norm": 83.79081726074219, + "learning_rate": 1.81015873015873e-05, + "loss": 0.1878, + "step": 5899 + }, + { + "epoch": 33.714285714285715, + "grad_norm": 55.04714584350586, + "learning_rate": 1.8095238095238094e-05, + "loss": 0.2047, + "step": 5900 + }, + { + "epoch": 33.72, + "grad_norm": 32.267974853515625, + "learning_rate": 1.808888888888889e-05, + "loss": 0.2654, + "step": 5901 + }, + { + "epoch": 33.72571428571428, + "grad_norm": 759.3602905273438, + "learning_rate": 1.8082539682539683e-05, + "loss": 0.2962, + "step": 5902 + }, + { + "epoch": 33.73142857142857, + "grad_norm": 82.70172882080078, + "learning_rate": 1.807619047619048e-05, + "loss": 0.2832, + "step": 5903 + }, + { + "epoch": 33.73714285714286, + "grad_norm": 28.534273147583008, + "learning_rate": 1.806984126984127e-05, + "loss": 0.2042, + "step": 5904 + }, + { + "epoch": 33.74285714285714, + "grad_norm": 39.39735794067383, + "learning_rate": 1.8063492063492067e-05, + "loss": 0.2213, + "step": 5905 + }, + { + "epoch": 33.74857142857143, + "grad_norm": 106.2161865234375, + "learning_rate": 1.805714285714286e-05, + "loss": 0.2477, + "step": 5906 + }, + { + "epoch": 33.754285714285714, + "grad_norm": 59.39938735961914, + "learning_rate": 1.8050793650793653e-05, + "loss": 0.3675, + "step": 5907 + }, + { + "epoch": 33.76, + "grad_norm": 350.4398498535156, + "learning_rate": 1.8044444444444445e-05, + "loss": 0.2806, + "step": 5908 + }, + { + "epoch": 33.76571428571429, + "grad_norm": 117.81426239013672, + "learning_rate": 1.803809523809524e-05, + "loss": 0.2766, + "step": 5909 + }, + { + "epoch": 33.77142857142857, + "grad_norm": 54.07353973388672, + "learning_rate": 1.8031746031746034e-05, + "loss": 0.2752, + "step": 5910 + }, + { + "epoch": 33.777142857142856, + "grad_norm": 35.34177780151367, + "learning_rate": 1.8025396825396827e-05, + "loss": 0.2255, + "step": 5911 + }, + { + "epoch": 33.78285714285714, + "grad_norm": 57.84912872314453, + "learning_rate": 1.801904761904762e-05, + "loss": 0.1488, + "step": 5912 + }, + { + "epoch": 33.78857142857143, + "grad_norm": 45.213775634765625, + "learning_rate": 1.8012698412698415e-05, + "loss": 0.1765, + "step": 5913 + }, + { + "epoch": 33.794285714285714, + "grad_norm": 474.8720397949219, + "learning_rate": 1.8006349206349208e-05, + "loss": 0.2694, + "step": 5914 + }, + { + "epoch": 33.8, + "grad_norm": 29.5314884185791, + "learning_rate": 1.8e-05, + "loss": 0.239, + "step": 5915 + }, + { + "epoch": 33.80571428571429, + "grad_norm": 38.79159164428711, + "learning_rate": 1.7993650793650796e-05, + "loss": 0.1762, + "step": 5916 + }, + { + "epoch": 33.81142857142857, + "grad_norm": 41.41892623901367, + "learning_rate": 1.798730158730159e-05, + "loss": 0.1811, + "step": 5917 + }, + { + "epoch": 33.817142857142855, + "grad_norm": 36.53205490112305, + "learning_rate": 1.7980952380952382e-05, + "loss": 0.252, + "step": 5918 + }, + { + "epoch": 33.822857142857146, + "grad_norm": 20.373428344726562, + "learning_rate": 1.7974603174603174e-05, + "loss": 0.2905, + "step": 5919 + }, + { + "epoch": 33.82857142857143, + "grad_norm": 37.673095703125, + "learning_rate": 1.796825396825397e-05, + "loss": 0.2657, + "step": 5920 + }, + { + "epoch": 33.83428571428571, + "grad_norm": 32.326541900634766, + "learning_rate": 1.7961904761904763e-05, + "loss": 0.2661, + "step": 5921 + }, + { + "epoch": 33.84, + "grad_norm": 46.6069221496582, + "learning_rate": 1.7955555555555556e-05, + "loss": 0.3008, + "step": 5922 + }, + { + "epoch": 33.84571428571429, + "grad_norm": 18.55901527404785, + "learning_rate": 1.794920634920635e-05, + "loss": 0.3043, + "step": 5923 + }, + { + "epoch": 33.85142857142857, + "grad_norm": 30.72215461730957, + "learning_rate": 1.7942857142857144e-05, + "loss": 0.2451, + "step": 5924 + }, + { + "epoch": 33.857142857142854, + "grad_norm": 259.6184997558594, + "learning_rate": 1.7936507936507937e-05, + "loss": 0.2118, + "step": 5925 + }, + { + "epoch": 33.862857142857145, + "grad_norm": 54.99461364746094, + "learning_rate": 1.793015873015873e-05, + "loss": 0.2317, + "step": 5926 + }, + { + "epoch": 33.86857142857143, + "grad_norm": 50.739219665527344, + "learning_rate": 1.7923809523809526e-05, + "loss": 0.1776, + "step": 5927 + }, + { + "epoch": 33.87428571428571, + "grad_norm": 58.7663459777832, + "learning_rate": 1.7917460317460318e-05, + "loss": 0.1811, + "step": 5928 + }, + { + "epoch": 33.88, + "grad_norm": 78.92102813720703, + "learning_rate": 1.791111111111111e-05, + "loss": 0.2529, + "step": 5929 + }, + { + "epoch": 33.885714285714286, + "grad_norm": 61.0960693359375, + "learning_rate": 1.7904761904761904e-05, + "loss": 0.2295, + "step": 5930 + }, + { + "epoch": 33.89142857142857, + "grad_norm": 43.502567291259766, + "learning_rate": 1.78984126984127e-05, + "loss": 0.2208, + "step": 5931 + }, + { + "epoch": 33.89714285714286, + "grad_norm": 50.775177001953125, + "learning_rate": 1.7892063492063492e-05, + "loss": 0.215, + "step": 5932 + }, + { + "epoch": 33.902857142857144, + "grad_norm": 30.936521530151367, + "learning_rate": 1.7885714285714285e-05, + "loss": 0.1987, + "step": 5933 + }, + { + "epoch": 33.90857142857143, + "grad_norm": 67.06620788574219, + "learning_rate": 1.7879365079365077e-05, + "loss": 0.4565, + "step": 5934 + }, + { + "epoch": 33.91428571428571, + "grad_norm": 64.06864166259766, + "learning_rate": 1.7873015873015874e-05, + "loss": 0.2216, + "step": 5935 + }, + { + "epoch": 33.92, + "grad_norm": 58.99102783203125, + "learning_rate": 1.7866666666666666e-05, + "loss": 0.1912, + "step": 5936 + }, + { + "epoch": 33.925714285714285, + "grad_norm": 54.659034729003906, + "learning_rate": 1.7860317460317462e-05, + "loss": 0.2342, + "step": 5937 + }, + { + "epoch": 33.93142857142857, + "grad_norm": 66.0268325805664, + "learning_rate": 1.7853968253968255e-05, + "loss": 0.2237, + "step": 5938 + }, + { + "epoch": 33.93714285714286, + "grad_norm": 58.34406280517578, + "learning_rate": 1.7847619047619047e-05, + "loss": 0.2444, + "step": 5939 + }, + { + "epoch": 33.94285714285714, + "grad_norm": 43.55903625488281, + "learning_rate": 1.7841269841269843e-05, + "loss": 0.2069, + "step": 5940 + }, + { + "epoch": 33.94857142857143, + "grad_norm": 1790.8717041015625, + "learning_rate": 1.7834920634920636e-05, + "loss": 0.2266, + "step": 5941 + }, + { + "epoch": 33.95428571428572, + "grad_norm": 67.68915557861328, + "learning_rate": 1.7828571428571432e-05, + "loss": 0.2376, + "step": 5942 + }, + { + "epoch": 33.96, + "grad_norm": 25.61754035949707, + "learning_rate": 1.7822222222222225e-05, + "loss": 0.186, + "step": 5943 + }, + { + "epoch": 33.965714285714284, + "grad_norm": 24.572595596313477, + "learning_rate": 1.7815873015873017e-05, + "loss": 0.1734, + "step": 5944 + }, + { + "epoch": 33.97142857142857, + "grad_norm": 74.7918701171875, + "learning_rate": 1.780952380952381e-05, + "loss": 0.1948, + "step": 5945 + }, + { + "epoch": 33.97714285714286, + "grad_norm": 97.4121322631836, + "learning_rate": 1.7803174603174606e-05, + "loss": 0.3412, + "step": 5946 + }, + { + "epoch": 33.98285714285714, + "grad_norm": 514.5524291992188, + "learning_rate": 1.77968253968254e-05, + "loss": 0.226, + "step": 5947 + }, + { + "epoch": 33.988571428571426, + "grad_norm": 50.86960983276367, + "learning_rate": 1.779047619047619e-05, + "loss": 0.2376, + "step": 5948 + }, + { + "epoch": 33.994285714285716, + "grad_norm": 39.601444244384766, + "learning_rate": 1.7784126984126984e-05, + "loss": 0.291, + "step": 5949 + }, + { + "epoch": 34.0, + "grad_norm": 30.264354705810547, + "learning_rate": 1.777777777777778e-05, + "loss": 0.2828, + "step": 5950 + }, + { + "epoch": 34.0, + "eval_classes": 0, + "eval_loss": 0.5732106566429138, + "eval_map": 0.9371, + "eval_map_50": 0.9699, + "eval_map_75": 0.9643, + "eval_map_large": 0.9372, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9371, + "eval_map_small": -1.0, + "eval_mar_1": 0.7867, + "eval_mar_10": 0.9746, + "eval_mar_100": 0.9771, + "eval_mar_100_per_class": 0.9771, + "eval_mar_large": 0.9771, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.9114, + "eval_samples_per_second": 21.134, + "eval_steps_per_second": 2.66, + "step": 5950 + }, + { + "epoch": 34.005714285714284, + "grad_norm": 39.4473991394043, + "learning_rate": 1.7771428571428573e-05, + "loss": 0.2211, + "step": 5951 + }, + { + "epoch": 34.011428571428574, + "grad_norm": 33.2810173034668, + "learning_rate": 1.7765079365079365e-05, + "loss": 0.1745, + "step": 5952 + }, + { + "epoch": 34.01714285714286, + "grad_norm": 25.688941955566406, + "learning_rate": 1.775873015873016e-05, + "loss": 0.1702, + "step": 5953 + }, + { + "epoch": 34.02285714285714, + "grad_norm": 40.11772537231445, + "learning_rate": 1.7752380952380954e-05, + "loss": 0.1975, + "step": 5954 + }, + { + "epoch": 34.02857142857143, + "grad_norm": 28.627344131469727, + "learning_rate": 1.7746031746031747e-05, + "loss": 0.2539, + "step": 5955 + }, + { + "epoch": 34.034285714285716, + "grad_norm": 69.74943542480469, + "learning_rate": 1.773968253968254e-05, + "loss": 0.2654, + "step": 5956 + }, + { + "epoch": 34.04, + "grad_norm": 49.73385238647461, + "learning_rate": 1.7733333333333335e-05, + "loss": 0.2028, + "step": 5957 + }, + { + "epoch": 34.04571428571428, + "grad_norm": 22.300336837768555, + "learning_rate": 1.7726984126984128e-05, + "loss": 0.1805, + "step": 5958 + }, + { + "epoch": 34.05142857142857, + "grad_norm": 51.91956329345703, + "learning_rate": 1.772063492063492e-05, + "loss": 0.2041, + "step": 5959 + }, + { + "epoch": 34.05714285714286, + "grad_norm": 100.5795669555664, + "learning_rate": 1.7714285714285713e-05, + "loss": 0.3603, + "step": 5960 + }, + { + "epoch": 34.06285714285714, + "grad_norm": 78.1702880859375, + "learning_rate": 1.770793650793651e-05, + "loss": 0.1778, + "step": 5961 + }, + { + "epoch": 34.06857142857143, + "grad_norm": 52.29579544067383, + "learning_rate": 1.7701587301587302e-05, + "loss": 0.3952, + "step": 5962 + }, + { + "epoch": 34.074285714285715, + "grad_norm": 111.71765899658203, + "learning_rate": 1.7695238095238094e-05, + "loss": 0.3385, + "step": 5963 + }, + { + "epoch": 34.08, + "grad_norm": 44.42064666748047, + "learning_rate": 1.768888888888889e-05, + "loss": 0.2243, + "step": 5964 + }, + { + "epoch": 34.08571428571429, + "grad_norm": 29.016536712646484, + "learning_rate": 1.7682539682539683e-05, + "loss": 0.1984, + "step": 5965 + }, + { + "epoch": 34.09142857142857, + "grad_norm": 22.22270965576172, + "learning_rate": 1.7676190476190476e-05, + "loss": 0.2315, + "step": 5966 + }, + { + "epoch": 34.097142857142856, + "grad_norm": 32.561676025390625, + "learning_rate": 1.766984126984127e-05, + "loss": 0.2779, + "step": 5967 + }, + { + "epoch": 34.10285714285714, + "grad_norm": 48.95793151855469, + "learning_rate": 1.7663492063492064e-05, + "loss": 0.2555, + "step": 5968 + }, + { + "epoch": 34.10857142857143, + "grad_norm": 53.35732650756836, + "learning_rate": 1.7657142857142857e-05, + "loss": 0.1497, + "step": 5969 + }, + { + "epoch": 34.114285714285714, + "grad_norm": 85.69971466064453, + "learning_rate": 1.765079365079365e-05, + "loss": 0.2759, + "step": 5970 + }, + { + "epoch": 34.12, + "grad_norm": 31.9514217376709, + "learning_rate": 1.7644444444444446e-05, + "loss": 0.1854, + "step": 5971 + }, + { + "epoch": 34.12571428571429, + "grad_norm": 70.69202423095703, + "learning_rate": 1.7638095238095238e-05, + "loss": 0.1737, + "step": 5972 + }, + { + "epoch": 34.13142857142857, + "grad_norm": 53.84906005859375, + "learning_rate": 1.763174603174603e-05, + "loss": 0.233, + "step": 5973 + }, + { + "epoch": 34.137142857142855, + "grad_norm": 28.47622299194336, + "learning_rate": 1.7625396825396827e-05, + "loss": 0.1744, + "step": 5974 + }, + { + "epoch": 34.142857142857146, + "grad_norm": 141.35400390625, + "learning_rate": 1.761904761904762e-05, + "loss": 0.2277, + "step": 5975 + }, + { + "epoch": 34.14857142857143, + "grad_norm": 39.02643966674805, + "learning_rate": 1.7612698412698416e-05, + "loss": 0.1827, + "step": 5976 + }, + { + "epoch": 34.15428571428571, + "grad_norm": 39.80881881713867, + "learning_rate": 1.7606349206349208e-05, + "loss": 0.1508, + "step": 5977 + }, + { + "epoch": 34.16, + "grad_norm": 27.678665161132812, + "learning_rate": 1.76e-05, + "loss": 0.3936, + "step": 5978 + }, + { + "epoch": 34.16571428571429, + "grad_norm": 34.82521438598633, + "learning_rate": 1.7593650793650797e-05, + "loss": 0.1694, + "step": 5979 + }, + { + "epoch": 34.17142857142857, + "grad_norm": 567.7770385742188, + "learning_rate": 1.758730158730159e-05, + "loss": 0.1819, + "step": 5980 + }, + { + "epoch": 34.177142857142854, + "grad_norm": 39.7049560546875, + "learning_rate": 1.7580952380952382e-05, + "loss": 0.1963, + "step": 5981 + }, + { + "epoch": 34.182857142857145, + "grad_norm": 81.57500457763672, + "learning_rate": 1.7574603174603175e-05, + "loss": 0.3676, + "step": 5982 + }, + { + "epoch": 34.18857142857143, + "grad_norm": 37.454769134521484, + "learning_rate": 1.756825396825397e-05, + "loss": 0.2362, + "step": 5983 + }, + { + "epoch": 34.19428571428571, + "grad_norm": 26.152082443237305, + "learning_rate": 1.7561904761904763e-05, + "loss": 0.2013, + "step": 5984 + }, + { + "epoch": 34.2, + "grad_norm": 48.61448669433594, + "learning_rate": 1.7555555555555556e-05, + "loss": 0.1921, + "step": 5985 + }, + { + "epoch": 34.205714285714286, + "grad_norm": 54.888362884521484, + "learning_rate": 1.7549206349206352e-05, + "loss": 0.2217, + "step": 5986 + }, + { + "epoch": 34.21142857142857, + "grad_norm": 58.8105583190918, + "learning_rate": 1.7542857142857145e-05, + "loss": 0.2862, + "step": 5987 + }, + { + "epoch": 34.21714285714286, + "grad_norm": 44.90412139892578, + "learning_rate": 1.7536507936507937e-05, + "loss": 0.1811, + "step": 5988 + }, + { + "epoch": 34.222857142857144, + "grad_norm": 22.135730743408203, + "learning_rate": 1.753015873015873e-05, + "loss": 0.2369, + "step": 5989 + }, + { + "epoch": 34.22857142857143, + "grad_norm": 52.97178649902344, + "learning_rate": 1.7523809523809526e-05, + "loss": 0.2835, + "step": 5990 + }, + { + "epoch": 34.23428571428571, + "grad_norm": 24.361135482788086, + "learning_rate": 1.751746031746032e-05, + "loss": 0.2666, + "step": 5991 + }, + { + "epoch": 34.24, + "grad_norm": 39.67509078979492, + "learning_rate": 1.751111111111111e-05, + "loss": 0.2313, + "step": 5992 + }, + { + "epoch": 34.245714285714286, + "grad_norm": 67.84577941894531, + "learning_rate": 1.7504761904761904e-05, + "loss": 0.2451, + "step": 5993 + }, + { + "epoch": 34.25142857142857, + "grad_norm": 42.42686462402344, + "learning_rate": 1.74984126984127e-05, + "loss": 0.213, + "step": 5994 + }, + { + "epoch": 34.25714285714286, + "grad_norm": 47.220462799072266, + "learning_rate": 1.7492063492063493e-05, + "loss": 0.2164, + "step": 5995 + }, + { + "epoch": 34.26285714285714, + "grad_norm": 27.19646453857422, + "learning_rate": 1.7485714285714285e-05, + "loss": 0.201, + "step": 5996 + }, + { + "epoch": 34.26857142857143, + "grad_norm": 29.466632843017578, + "learning_rate": 1.747936507936508e-05, + "loss": 0.3826, + "step": 5997 + }, + { + "epoch": 34.27428571428572, + "grad_norm": 25.750511169433594, + "learning_rate": 1.7473015873015874e-05, + "loss": 0.2327, + "step": 5998 + }, + { + "epoch": 34.28, + "grad_norm": 43.1969108581543, + "learning_rate": 1.7466666666666667e-05, + "loss": 0.2463, + "step": 5999 + }, + { + "epoch": 34.285714285714285, + "grad_norm": 111.6524429321289, + "learning_rate": 1.746031746031746e-05, + "loss": 0.2307, + "step": 6000 + }, + { + "epoch": 34.29142857142857, + "grad_norm": 52.85550308227539, + "learning_rate": 1.7453968253968255e-05, + "loss": 0.1707, + "step": 6001 + }, + { + "epoch": 34.29714285714286, + "grad_norm": 89.5367202758789, + "learning_rate": 1.7447619047619048e-05, + "loss": 0.2228, + "step": 6002 + }, + { + "epoch": 34.30285714285714, + "grad_norm": 46.877376556396484, + "learning_rate": 1.744126984126984e-05, + "loss": 0.3267, + "step": 6003 + }, + { + "epoch": 34.308571428571426, + "grad_norm": 29.10450553894043, + "learning_rate": 1.7434920634920633e-05, + "loss": 0.2678, + "step": 6004 + }, + { + "epoch": 34.31428571428572, + "grad_norm": 28.012819290161133, + "learning_rate": 1.742857142857143e-05, + "loss": 0.2131, + "step": 6005 + }, + { + "epoch": 34.32, + "grad_norm": 35.5361442565918, + "learning_rate": 1.7422222222222222e-05, + "loss": 0.1771, + "step": 6006 + }, + { + "epoch": 34.325714285714284, + "grad_norm": 37.48767852783203, + "learning_rate": 1.7415873015873014e-05, + "loss": 0.1776, + "step": 6007 + }, + { + "epoch": 34.331428571428575, + "grad_norm": 1077.1754150390625, + "learning_rate": 1.740952380952381e-05, + "loss": 0.2416, + "step": 6008 + }, + { + "epoch": 34.33714285714286, + "grad_norm": 23.403268814086914, + "learning_rate": 1.7403174603174603e-05, + "loss": 0.2405, + "step": 6009 + }, + { + "epoch": 34.34285714285714, + "grad_norm": 30.210250854492188, + "learning_rate": 1.73968253968254e-05, + "loss": 0.2152, + "step": 6010 + }, + { + "epoch": 34.348571428571425, + "grad_norm": 70.3405990600586, + "learning_rate": 1.7390476190476192e-05, + "loss": 0.1559, + "step": 6011 + }, + { + "epoch": 34.354285714285716, + "grad_norm": 41.96592330932617, + "learning_rate": 1.7384126984126984e-05, + "loss": 0.2466, + "step": 6012 + }, + { + "epoch": 34.36, + "grad_norm": 56.36053466796875, + "learning_rate": 1.737777777777778e-05, + "loss": 0.2149, + "step": 6013 + }, + { + "epoch": 34.36571428571428, + "grad_norm": 37.00887680053711, + "learning_rate": 1.7371428571428573e-05, + "loss": 0.2144, + "step": 6014 + }, + { + "epoch": 34.371428571428574, + "grad_norm": 24.215007781982422, + "learning_rate": 1.7365079365079366e-05, + "loss": 0.2389, + "step": 6015 + }, + { + "epoch": 34.37714285714286, + "grad_norm": 46.74251937866211, + "learning_rate": 1.7358730158730162e-05, + "loss": 0.2139, + "step": 6016 + }, + { + "epoch": 34.38285714285714, + "grad_norm": 55.992042541503906, + "learning_rate": 1.7352380952380954e-05, + "loss": 0.1324, + "step": 6017 + }, + { + "epoch": 34.38857142857143, + "grad_norm": 29.065580368041992, + "learning_rate": 1.7346031746031747e-05, + "loss": 0.2013, + "step": 6018 + }, + { + "epoch": 34.394285714285715, + "grad_norm": 62.71901321411133, + "learning_rate": 1.7339682539682543e-05, + "loss": 0.16, + "step": 6019 + }, + { + "epoch": 34.4, + "grad_norm": 52.24561309814453, + "learning_rate": 1.7333333333333336e-05, + "loss": 0.2242, + "step": 6020 + }, + { + "epoch": 34.40571428571428, + "grad_norm": 40.833763122558594, + "learning_rate": 1.7326984126984128e-05, + "loss": 0.2674, + "step": 6021 + }, + { + "epoch": 34.41142857142857, + "grad_norm": 27.984975814819336, + "learning_rate": 1.732063492063492e-05, + "loss": 0.2379, + "step": 6022 + }, + { + "epoch": 34.417142857142856, + "grad_norm": 74.86846923828125, + "learning_rate": 1.7314285714285717e-05, + "loss": 0.3131, + "step": 6023 + }, + { + "epoch": 34.42285714285714, + "grad_norm": 21.960285186767578, + "learning_rate": 1.730793650793651e-05, + "loss": 0.1784, + "step": 6024 + }, + { + "epoch": 34.42857142857143, + "grad_norm": 206.73475646972656, + "learning_rate": 1.7301587301587302e-05, + "loss": 0.1801, + "step": 6025 + }, + { + "epoch": 34.434285714285714, + "grad_norm": 53.96074676513672, + "learning_rate": 1.7295238095238095e-05, + "loss": 0.2141, + "step": 6026 + }, + { + "epoch": 34.44, + "grad_norm": 33.00743865966797, + "learning_rate": 1.728888888888889e-05, + "loss": 0.2226, + "step": 6027 + }, + { + "epoch": 34.44571428571429, + "grad_norm": 59.796897888183594, + "learning_rate": 1.7282539682539684e-05, + "loss": 0.2167, + "step": 6028 + }, + { + "epoch": 34.45142857142857, + "grad_norm": 98.43783569335938, + "learning_rate": 1.7276190476190476e-05, + "loss": 0.2007, + "step": 6029 + }, + { + "epoch": 34.457142857142856, + "grad_norm": 65.62691497802734, + "learning_rate": 1.7269841269841272e-05, + "loss": 0.2217, + "step": 6030 + }, + { + "epoch": 34.462857142857146, + "grad_norm": 25.618061065673828, + "learning_rate": 1.7263492063492065e-05, + "loss": 0.2301, + "step": 6031 + }, + { + "epoch": 34.46857142857143, + "grad_norm": 36.77336120605469, + "learning_rate": 1.7257142857142857e-05, + "loss": 0.2109, + "step": 6032 + }, + { + "epoch": 34.47428571428571, + "grad_norm": 81.1491470336914, + "learning_rate": 1.725079365079365e-05, + "loss": 0.4455, + "step": 6033 + }, + { + "epoch": 34.48, + "grad_norm": 76.56790924072266, + "learning_rate": 1.7244444444444446e-05, + "loss": 0.2304, + "step": 6034 + }, + { + "epoch": 34.48571428571429, + "grad_norm": 72.3931655883789, + "learning_rate": 1.723809523809524e-05, + "loss": 0.1967, + "step": 6035 + }, + { + "epoch": 34.49142857142857, + "grad_norm": 45.4559211730957, + "learning_rate": 1.723174603174603e-05, + "loss": 0.2656, + "step": 6036 + }, + { + "epoch": 34.497142857142855, + "grad_norm": 11.608014106750488, + "learning_rate": 1.7225396825396824e-05, + "loss": 0.254, + "step": 6037 + }, + { + "epoch": 34.502857142857145, + "grad_norm": 33.927425384521484, + "learning_rate": 1.721904761904762e-05, + "loss": 0.1657, + "step": 6038 + }, + { + "epoch": 34.50857142857143, + "grad_norm": 51.072715759277344, + "learning_rate": 1.7212698412698413e-05, + "loss": 0.2104, + "step": 6039 + }, + { + "epoch": 34.51428571428571, + "grad_norm": 22.702539443969727, + "learning_rate": 1.7206349206349205e-05, + "loss": 0.2078, + "step": 6040 + }, + { + "epoch": 34.52, + "grad_norm": 44.61540985107422, + "learning_rate": 1.7199999999999998e-05, + "loss": 0.2692, + "step": 6041 + }, + { + "epoch": 34.52571428571429, + "grad_norm": 47.26350402832031, + "learning_rate": 1.7193650793650794e-05, + "loss": 0.2449, + "step": 6042 + }, + { + "epoch": 34.53142857142857, + "grad_norm": 76.13980865478516, + "learning_rate": 1.7187301587301587e-05, + "loss": 0.1774, + "step": 6043 + }, + { + "epoch": 34.537142857142854, + "grad_norm": 61.66749954223633, + "learning_rate": 1.718095238095238e-05, + "loss": 0.2082, + "step": 6044 + }, + { + "epoch": 34.542857142857144, + "grad_norm": 38.39115905761719, + "learning_rate": 1.7174603174603175e-05, + "loss": 0.1778, + "step": 6045 + }, + { + "epoch": 34.54857142857143, + "grad_norm": 24.32440757751465, + "learning_rate": 1.7168253968253968e-05, + "loss": 0.2897, + "step": 6046 + }, + { + "epoch": 34.55428571428571, + "grad_norm": 48.9962272644043, + "learning_rate": 1.7161904761904764e-05, + "loss": 0.1896, + "step": 6047 + }, + { + "epoch": 34.56, + "grad_norm": 40.588623046875, + "learning_rate": 1.7155555555555557e-05, + "loss": 0.2037, + "step": 6048 + }, + { + "epoch": 34.565714285714286, + "grad_norm": 105.08045196533203, + "learning_rate": 1.7149206349206353e-05, + "loss": 0.1645, + "step": 6049 + }, + { + "epoch": 34.57142857142857, + "grad_norm": 84.69966888427734, + "learning_rate": 1.7142857142857145e-05, + "loss": 0.1833, + "step": 6050 + }, + { + "epoch": 34.57714285714286, + "grad_norm": 46.13557815551758, + "learning_rate": 1.7136507936507938e-05, + "loss": 0.2534, + "step": 6051 + }, + { + "epoch": 34.582857142857144, + "grad_norm": 92.61463928222656, + "learning_rate": 1.713015873015873e-05, + "loss": 0.2749, + "step": 6052 + }, + { + "epoch": 34.58857142857143, + "grad_norm": 27.787425994873047, + "learning_rate": 1.7123809523809527e-05, + "loss": 0.2859, + "step": 6053 + }, + { + "epoch": 34.59428571428572, + "grad_norm": 22.343608856201172, + "learning_rate": 1.711746031746032e-05, + "loss": 0.1846, + "step": 6054 + }, + { + "epoch": 34.6, + "grad_norm": 104.19557189941406, + "learning_rate": 1.7111111111111112e-05, + "loss": 0.3087, + "step": 6055 + }, + { + "epoch": 34.605714285714285, + "grad_norm": 107.66898345947266, + "learning_rate": 1.7104761904761908e-05, + "loss": 0.2458, + "step": 6056 + }, + { + "epoch": 34.61142857142857, + "grad_norm": 34.40952682495117, + "learning_rate": 1.70984126984127e-05, + "loss": 0.3493, + "step": 6057 + }, + { + "epoch": 34.61714285714286, + "grad_norm": 47.86616516113281, + "learning_rate": 1.7092063492063493e-05, + "loss": 0.2501, + "step": 6058 + }, + { + "epoch": 34.62285714285714, + "grad_norm": 35.95183181762695, + "learning_rate": 1.7085714285714286e-05, + "loss": 0.1773, + "step": 6059 + }, + { + "epoch": 34.628571428571426, + "grad_norm": 237.09732055664062, + "learning_rate": 1.7079365079365082e-05, + "loss": 0.2854, + "step": 6060 + }, + { + "epoch": 34.63428571428572, + "grad_norm": 42.27085876464844, + "learning_rate": 1.7073015873015874e-05, + "loss": 0.2858, + "step": 6061 + }, + { + "epoch": 34.64, + "grad_norm": 36.25700759887695, + "learning_rate": 1.7066666666666667e-05, + "loss": 0.1894, + "step": 6062 + }, + { + "epoch": 34.645714285714284, + "grad_norm": 50.62672805786133, + "learning_rate": 1.706031746031746e-05, + "loss": 0.2577, + "step": 6063 + }, + { + "epoch": 34.651428571428575, + "grad_norm": 237.9781951904297, + "learning_rate": 1.7053968253968256e-05, + "loss": 0.2226, + "step": 6064 + }, + { + "epoch": 34.65714285714286, + "grad_norm": 22.847118377685547, + "learning_rate": 1.704761904761905e-05, + "loss": 0.1686, + "step": 6065 + }, + { + "epoch": 34.66285714285714, + "grad_norm": 89.11709594726562, + "learning_rate": 1.704126984126984e-05, + "loss": 0.1984, + "step": 6066 + }, + { + "epoch": 34.668571428571425, + "grad_norm": 40.28227233886719, + "learning_rate": 1.7034920634920637e-05, + "loss": 0.2338, + "step": 6067 + }, + { + "epoch": 34.674285714285716, + "grad_norm": 40.13176727294922, + "learning_rate": 1.702857142857143e-05, + "loss": 0.2999, + "step": 6068 + }, + { + "epoch": 34.68, + "grad_norm": 40.50751495361328, + "learning_rate": 1.7022222222222222e-05, + "loss": 0.2266, + "step": 6069 + }, + { + "epoch": 34.68571428571428, + "grad_norm": 207.8365020751953, + "learning_rate": 1.7015873015873015e-05, + "loss": 0.2694, + "step": 6070 + }, + { + "epoch": 34.691428571428574, + "grad_norm": 78.13711547851562, + "learning_rate": 1.700952380952381e-05, + "loss": 0.1755, + "step": 6071 + }, + { + "epoch": 34.69714285714286, + "grad_norm": 49.28493881225586, + "learning_rate": 1.7003174603174604e-05, + "loss": 0.2557, + "step": 6072 + }, + { + "epoch": 34.70285714285714, + "grad_norm": 22.53261375427246, + "learning_rate": 1.6996825396825396e-05, + "loss": 0.2611, + "step": 6073 + }, + { + "epoch": 34.70857142857143, + "grad_norm": 27.282390594482422, + "learning_rate": 1.699047619047619e-05, + "loss": 0.2037, + "step": 6074 + }, + { + "epoch": 34.714285714285715, + "grad_norm": 126.64105987548828, + "learning_rate": 1.6984126984126985e-05, + "loss": 0.2526, + "step": 6075 + }, + { + "epoch": 34.72, + "grad_norm": 31.60548973083496, + "learning_rate": 1.6977777777777777e-05, + "loss": 0.2563, + "step": 6076 + }, + { + "epoch": 34.72571428571428, + "grad_norm": 29.386547088623047, + "learning_rate": 1.697142857142857e-05, + "loss": 0.2285, + "step": 6077 + }, + { + "epoch": 34.73142857142857, + "grad_norm": 55.01473617553711, + "learning_rate": 1.6965079365079366e-05, + "loss": 0.1949, + "step": 6078 + }, + { + "epoch": 34.73714285714286, + "grad_norm": 55.4376220703125, + "learning_rate": 1.695873015873016e-05, + "loss": 0.1849, + "step": 6079 + }, + { + "epoch": 34.74285714285714, + "grad_norm": 22.210613250732422, + "learning_rate": 1.695238095238095e-05, + "loss": 0.2912, + "step": 6080 + }, + { + "epoch": 34.74857142857143, + "grad_norm": 121.5806884765625, + "learning_rate": 1.6946031746031747e-05, + "loss": 0.2269, + "step": 6081 + }, + { + "epoch": 34.754285714285714, + "grad_norm": 75.6827621459961, + "learning_rate": 1.693968253968254e-05, + "loss": 0.2901, + "step": 6082 + }, + { + "epoch": 34.76, + "grad_norm": 66.75840759277344, + "learning_rate": 1.6933333333333333e-05, + "loss": 0.2228, + "step": 6083 + }, + { + "epoch": 34.76571428571429, + "grad_norm": 46.37446975708008, + "learning_rate": 1.692698412698413e-05, + "loss": 0.156, + "step": 6084 + }, + { + "epoch": 34.77142857142857, + "grad_norm": 45.42128372192383, + "learning_rate": 1.692063492063492e-05, + "loss": 0.2107, + "step": 6085 + }, + { + "epoch": 34.777142857142856, + "grad_norm": 41.54645919799805, + "learning_rate": 1.6914285714285717e-05, + "loss": 0.1854, + "step": 6086 + }, + { + "epoch": 34.78285714285714, + "grad_norm": 86.29766082763672, + "learning_rate": 1.690793650793651e-05, + "loss": 0.2166, + "step": 6087 + }, + { + "epoch": 34.78857142857143, + "grad_norm": 104.64183044433594, + "learning_rate": 1.6901587301587303e-05, + "loss": 0.2194, + "step": 6088 + }, + { + "epoch": 34.794285714285714, + "grad_norm": 42.5262451171875, + "learning_rate": 1.68952380952381e-05, + "loss": 0.2445, + "step": 6089 + }, + { + "epoch": 34.8, + "grad_norm": 23.71393585205078, + "learning_rate": 1.688888888888889e-05, + "loss": 0.1826, + "step": 6090 + }, + { + "epoch": 34.80571428571429, + "grad_norm": 39.36594009399414, + "learning_rate": 1.6882539682539684e-05, + "loss": 0.2796, + "step": 6091 + }, + { + "epoch": 34.81142857142857, + "grad_norm": 91.96893310546875, + "learning_rate": 1.6876190476190477e-05, + "loss": 0.1858, + "step": 6092 + }, + { + "epoch": 34.817142857142855, + "grad_norm": 252.6929168701172, + "learning_rate": 1.6869841269841273e-05, + "loss": 0.1824, + "step": 6093 + }, + { + "epoch": 34.822857142857146, + "grad_norm": 145.4113006591797, + "learning_rate": 1.6863492063492065e-05, + "loss": 0.2359, + "step": 6094 + }, + { + "epoch": 34.82857142857143, + "grad_norm": 70.6640396118164, + "learning_rate": 1.6857142857142858e-05, + "loss": 0.1619, + "step": 6095 + }, + { + "epoch": 34.83428571428571, + "grad_norm": 28.273479461669922, + "learning_rate": 1.685079365079365e-05, + "loss": 0.1944, + "step": 6096 + }, + { + "epoch": 34.84, + "grad_norm": 63.15889358520508, + "learning_rate": 1.6844444444444447e-05, + "loss": 0.2183, + "step": 6097 + }, + { + "epoch": 34.84571428571429, + "grad_norm": 73.52220916748047, + "learning_rate": 1.683809523809524e-05, + "loss": 0.1997, + "step": 6098 + }, + { + "epoch": 34.85142857142857, + "grad_norm": 17.628963470458984, + "learning_rate": 1.6831746031746032e-05, + "loss": 0.1901, + "step": 6099 + }, + { + "epoch": 34.857142857142854, + "grad_norm": 35.90450668334961, + "learning_rate": 1.6825396825396828e-05, + "loss": 0.2232, + "step": 6100 + }, + { + "epoch": 34.862857142857145, + "grad_norm": 57.68938446044922, + "learning_rate": 1.681904761904762e-05, + "loss": 0.3033, + "step": 6101 + }, + { + "epoch": 34.86857142857143, + "grad_norm": 53.79425811767578, + "learning_rate": 1.6812698412698413e-05, + "loss": 0.2516, + "step": 6102 + }, + { + "epoch": 34.87428571428571, + "grad_norm": 68.87535095214844, + "learning_rate": 1.6806349206349206e-05, + "loss": 0.2352, + "step": 6103 + }, + { + "epoch": 34.88, + "grad_norm": 74.88392639160156, + "learning_rate": 1.6800000000000002e-05, + "loss": 0.2601, + "step": 6104 + }, + { + "epoch": 34.885714285714286, + "grad_norm": 85.81791687011719, + "learning_rate": 1.6793650793650794e-05, + "loss": 0.1613, + "step": 6105 + }, + { + "epoch": 34.89142857142857, + "grad_norm": 45.918296813964844, + "learning_rate": 1.6787301587301587e-05, + "loss": 0.2329, + "step": 6106 + }, + { + "epoch": 34.89714285714286, + "grad_norm": 36.77676010131836, + "learning_rate": 1.678095238095238e-05, + "loss": 0.268, + "step": 6107 + }, + { + "epoch": 34.902857142857144, + "grad_norm": 26.147130966186523, + "learning_rate": 1.6774603174603176e-05, + "loss": 0.2399, + "step": 6108 + }, + { + "epoch": 34.90857142857143, + "grad_norm": 57.6148681640625, + "learning_rate": 1.676825396825397e-05, + "loss": 0.3074, + "step": 6109 + }, + { + "epoch": 34.91428571428571, + "grad_norm": 25.31715202331543, + "learning_rate": 1.676190476190476e-05, + "loss": 0.1813, + "step": 6110 + }, + { + "epoch": 34.92, + "grad_norm": 101.95768737792969, + "learning_rate": 1.6755555555555557e-05, + "loss": 0.3566, + "step": 6111 + }, + { + "epoch": 34.925714285714285, + "grad_norm": 41.54034423828125, + "learning_rate": 1.674920634920635e-05, + "loss": 0.1578, + "step": 6112 + }, + { + "epoch": 34.93142857142857, + "grad_norm": 130.59205627441406, + "learning_rate": 1.6742857142857142e-05, + "loss": 0.3017, + "step": 6113 + }, + { + "epoch": 34.93714285714286, + "grad_norm": 64.66847229003906, + "learning_rate": 1.6736507936507935e-05, + "loss": 0.2254, + "step": 6114 + }, + { + "epoch": 34.94285714285714, + "grad_norm": 250.42233276367188, + "learning_rate": 1.673015873015873e-05, + "loss": 0.3019, + "step": 6115 + }, + { + "epoch": 34.94857142857143, + "grad_norm": 75.5509033203125, + "learning_rate": 1.6723809523809524e-05, + "loss": 0.2631, + "step": 6116 + }, + { + "epoch": 34.95428571428572, + "grad_norm": 421.3606262207031, + "learning_rate": 1.6717460317460316e-05, + "loss": 0.2681, + "step": 6117 + }, + { + "epoch": 34.96, + "grad_norm": 41.41328048706055, + "learning_rate": 1.6711111111111112e-05, + "loss": 0.2661, + "step": 6118 + }, + { + "epoch": 34.965714285714284, + "grad_norm": 49.59128952026367, + "learning_rate": 1.6704761904761905e-05, + "loss": 0.2326, + "step": 6119 + }, + { + "epoch": 34.97142857142857, + "grad_norm": 71.13414764404297, + "learning_rate": 1.66984126984127e-05, + "loss": 0.1903, + "step": 6120 + }, + { + "epoch": 34.97714285714286, + "grad_norm": 66.1566390991211, + "learning_rate": 1.6692063492063494e-05, + "loss": 0.1926, + "step": 6121 + }, + { + "epoch": 34.98285714285714, + "grad_norm": 36.81869888305664, + "learning_rate": 1.6685714285714286e-05, + "loss": 0.1765, + "step": 6122 + }, + { + "epoch": 34.988571428571426, + "grad_norm": 32.54508590698242, + "learning_rate": 1.6679365079365082e-05, + "loss": 0.1183, + "step": 6123 + }, + { + "epoch": 34.994285714285716, + "grad_norm": 60.68364334106445, + "learning_rate": 1.6673015873015875e-05, + "loss": 0.2048, + "step": 6124 + }, + { + "epoch": 35.0, + "grad_norm": 39.2964973449707, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.2149, + "step": 6125 + }, + { + "epoch": 35.0, + "eval_classes": 0, + "eval_loss": 0.5942349433898926, + "eval_map": 0.9361, + "eval_map_50": 0.9684, + "eval_map_75": 0.9612, + "eval_map_large": 0.9361, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9361, + "eval_map_small": -1.0, + "eval_mar_1": 0.7917, + "eval_mar_10": 0.973, + "eval_mar_100": 0.9756, + "eval_mar_100_per_class": 0.9756, + "eval_mar_large": 0.9756, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.3791, + "eval_samples_per_second": 21.975, + "eval_steps_per_second": 2.766, + "step": 6125 + }, + { + "epoch": 35.005714285714284, + "grad_norm": 73.07441711425781, + "learning_rate": 1.6660317460317463e-05, + "loss": 0.1722, + "step": 6126 + }, + { + "epoch": 35.011428571428574, + "grad_norm": 30.19472312927246, + "learning_rate": 1.6653968253968256e-05, + "loss": 0.1864, + "step": 6127 + }, + { + "epoch": 35.01714285714286, + "grad_norm": 97.52613830566406, + "learning_rate": 1.664761904761905e-05, + "loss": 0.1768, + "step": 6128 + }, + { + "epoch": 35.02285714285714, + "grad_norm": 70.80941009521484, + "learning_rate": 1.664126984126984e-05, + "loss": 0.2576, + "step": 6129 + }, + { + "epoch": 35.02857142857143, + "grad_norm": 31.63057518005371, + "learning_rate": 1.6634920634920637e-05, + "loss": 0.1829, + "step": 6130 + }, + { + "epoch": 35.034285714285716, + "grad_norm": 57.87783432006836, + "learning_rate": 1.662857142857143e-05, + "loss": 0.1627, + "step": 6131 + }, + { + "epoch": 35.04, + "grad_norm": 25.989816665649414, + "learning_rate": 1.6622222222222223e-05, + "loss": 0.2545, + "step": 6132 + }, + { + "epoch": 35.04571428571428, + "grad_norm": 74.0960922241211, + "learning_rate": 1.661587301587302e-05, + "loss": 0.2245, + "step": 6133 + }, + { + "epoch": 35.05142857142857, + "grad_norm": 36.5538215637207, + "learning_rate": 1.660952380952381e-05, + "loss": 0.1737, + "step": 6134 + }, + { + "epoch": 35.05714285714286, + "grad_norm": 102.22315979003906, + "learning_rate": 1.6603174603174604e-05, + "loss": 0.1774, + "step": 6135 + }, + { + "epoch": 35.06285714285714, + "grad_norm": 20.221057891845703, + "learning_rate": 1.6596825396825397e-05, + "loss": 0.2787, + "step": 6136 + }, + { + "epoch": 35.06857142857143, + "grad_norm": 22.389572143554688, + "learning_rate": 1.6590476190476193e-05, + "loss": 0.169, + "step": 6137 + }, + { + "epoch": 35.074285714285715, + "grad_norm": 134.66050720214844, + "learning_rate": 1.6584126984126985e-05, + "loss": 0.2379, + "step": 6138 + }, + { + "epoch": 35.08, + "grad_norm": 68.95519256591797, + "learning_rate": 1.6577777777777778e-05, + "loss": 0.1671, + "step": 6139 + }, + { + "epoch": 35.08571428571429, + "grad_norm": 84.9073257446289, + "learning_rate": 1.657142857142857e-05, + "loss": 0.1476, + "step": 6140 + }, + { + "epoch": 35.09142857142857, + "grad_norm": 50.36892318725586, + "learning_rate": 1.6565079365079367e-05, + "loss": 0.2112, + "step": 6141 + }, + { + "epoch": 35.097142857142856, + "grad_norm": 103.01759338378906, + "learning_rate": 1.655873015873016e-05, + "loss": 0.1881, + "step": 6142 + }, + { + "epoch": 35.10285714285714, + "grad_norm": 31.765804290771484, + "learning_rate": 1.6552380952380952e-05, + "loss": 0.1686, + "step": 6143 + }, + { + "epoch": 35.10857142857143, + "grad_norm": 32.143699645996094, + "learning_rate": 1.6546031746031744e-05, + "loss": 0.25, + "step": 6144 + }, + { + "epoch": 35.114285714285714, + "grad_norm": 24.49603271484375, + "learning_rate": 1.653968253968254e-05, + "loss": 0.3411, + "step": 6145 + }, + { + "epoch": 35.12, + "grad_norm": 67.13269805908203, + "learning_rate": 1.6533333333333333e-05, + "loss": 0.1517, + "step": 6146 + }, + { + "epoch": 35.12571428571429, + "grad_norm": 63.90729904174805, + "learning_rate": 1.6526984126984126e-05, + "loss": 0.1716, + "step": 6147 + }, + { + "epoch": 35.13142857142857, + "grad_norm": 55.39335250854492, + "learning_rate": 1.6520634920634922e-05, + "loss": 0.2807, + "step": 6148 + }, + { + "epoch": 35.137142857142855, + "grad_norm": 73.53040313720703, + "learning_rate": 1.6514285714285714e-05, + "loss": 0.2062, + "step": 6149 + }, + { + "epoch": 35.142857142857146, + "grad_norm": 97.12939453125, + "learning_rate": 1.6507936507936507e-05, + "loss": 0.1838, + "step": 6150 + }, + { + "epoch": 35.14857142857143, + "grad_norm": 41.98698043823242, + "learning_rate": 1.65015873015873e-05, + "loss": 0.2443, + "step": 6151 + }, + { + "epoch": 35.15428571428571, + "grad_norm": 50.828304290771484, + "learning_rate": 1.6495238095238096e-05, + "loss": 0.1269, + "step": 6152 + }, + { + "epoch": 35.16, + "grad_norm": 53.13596725463867, + "learning_rate": 1.648888888888889e-05, + "loss": 0.3944, + "step": 6153 + }, + { + "epoch": 35.16571428571429, + "grad_norm": 100.52580261230469, + "learning_rate": 1.6482539682539684e-05, + "loss": 0.2598, + "step": 6154 + }, + { + "epoch": 35.17142857142857, + "grad_norm": 126.7484130859375, + "learning_rate": 1.6476190476190477e-05, + "loss": 0.2223, + "step": 6155 + }, + { + "epoch": 35.177142857142854, + "grad_norm": 38.768341064453125, + "learning_rate": 1.646984126984127e-05, + "loss": 0.2195, + "step": 6156 + }, + { + "epoch": 35.182857142857145, + "grad_norm": 65.1585693359375, + "learning_rate": 1.6463492063492066e-05, + "loss": 0.2112, + "step": 6157 + }, + { + "epoch": 35.18857142857143, + "grad_norm": 60.47264099121094, + "learning_rate": 1.645714285714286e-05, + "loss": 0.1769, + "step": 6158 + }, + { + "epoch": 35.19428571428571, + "grad_norm": 48.974544525146484, + "learning_rate": 1.6450793650793654e-05, + "loss": 0.2144, + "step": 6159 + }, + { + "epoch": 35.2, + "grad_norm": 93.70869445800781, + "learning_rate": 1.6444444444444447e-05, + "loss": 0.2033, + "step": 6160 + }, + { + "epoch": 35.205714285714286, + "grad_norm": 50.43521499633789, + "learning_rate": 1.643809523809524e-05, + "loss": 0.1514, + "step": 6161 + }, + { + "epoch": 35.21142857142857, + "grad_norm": 19.45742416381836, + "learning_rate": 1.6431746031746032e-05, + "loss": 0.2133, + "step": 6162 + }, + { + "epoch": 35.21714285714286, + "grad_norm": 35.2542839050293, + "learning_rate": 1.6425396825396828e-05, + "loss": 0.181, + "step": 6163 + }, + { + "epoch": 35.222857142857144, + "grad_norm": 51.425270080566406, + "learning_rate": 1.641904761904762e-05, + "loss": 0.2242, + "step": 6164 + }, + { + "epoch": 35.22857142857143, + "grad_norm": 49.880558013916016, + "learning_rate": 1.6412698412698414e-05, + "loss": 0.3576, + "step": 6165 + }, + { + "epoch": 35.23428571428571, + "grad_norm": 66.03699493408203, + "learning_rate": 1.6406349206349206e-05, + "loss": 0.2637, + "step": 6166 + }, + { + "epoch": 35.24, + "grad_norm": 75.09126281738281, + "learning_rate": 1.6400000000000002e-05, + "loss": 0.2208, + "step": 6167 + }, + { + "epoch": 35.245714285714286, + "grad_norm": 47.65190124511719, + "learning_rate": 1.6393650793650795e-05, + "loss": 0.1784, + "step": 6168 + }, + { + "epoch": 35.25142857142857, + "grad_norm": 45.626792907714844, + "learning_rate": 1.6387301587301587e-05, + "loss": 0.2506, + "step": 6169 + }, + { + "epoch": 35.25714285714286, + "grad_norm": 35.04150390625, + "learning_rate": 1.6380952380952384e-05, + "loss": 0.2119, + "step": 6170 + }, + { + "epoch": 35.26285714285714, + "grad_norm": 24.93942642211914, + "learning_rate": 1.6374603174603176e-05, + "loss": 0.2261, + "step": 6171 + }, + { + "epoch": 35.26857142857143, + "grad_norm": 501.3238830566406, + "learning_rate": 1.636825396825397e-05, + "loss": 0.2102, + "step": 6172 + }, + { + "epoch": 35.27428571428572, + "grad_norm": 31.831348419189453, + "learning_rate": 1.636190476190476e-05, + "loss": 0.1745, + "step": 6173 + }, + { + "epoch": 35.28, + "grad_norm": 43.44385528564453, + "learning_rate": 1.6355555555555557e-05, + "loss": 0.1931, + "step": 6174 + }, + { + "epoch": 35.285714285714285, + "grad_norm": 58.899925231933594, + "learning_rate": 1.634920634920635e-05, + "loss": 0.2839, + "step": 6175 + }, + { + "epoch": 35.29142857142857, + "grad_norm": 32.3404655456543, + "learning_rate": 1.6342857142857143e-05, + "loss": 0.3113, + "step": 6176 + }, + { + "epoch": 35.29714285714286, + "grad_norm": 39.052772521972656, + "learning_rate": 1.6336507936507935e-05, + "loss": 0.1863, + "step": 6177 + }, + { + "epoch": 35.30285714285714, + "grad_norm": 23.971147537231445, + "learning_rate": 1.633015873015873e-05, + "loss": 0.1415, + "step": 6178 + }, + { + "epoch": 35.308571428571426, + "grad_norm": 1262.6932373046875, + "learning_rate": 1.6323809523809524e-05, + "loss": 0.1709, + "step": 6179 + }, + { + "epoch": 35.31428571428572, + "grad_norm": 39.73323059082031, + "learning_rate": 1.6317460317460317e-05, + "loss": 0.2033, + "step": 6180 + }, + { + "epoch": 35.32, + "grad_norm": 20.002708435058594, + "learning_rate": 1.6311111111111113e-05, + "loss": 0.2234, + "step": 6181 + }, + { + "epoch": 35.325714285714284, + "grad_norm": 43.954872131347656, + "learning_rate": 1.6304761904761905e-05, + "loss": 0.2385, + "step": 6182 + }, + { + "epoch": 35.331428571428575, + "grad_norm": 58.16795349121094, + "learning_rate": 1.6298412698412698e-05, + "loss": 0.2301, + "step": 6183 + }, + { + "epoch": 35.33714285714286, + "grad_norm": 22.84110450744629, + "learning_rate": 1.629206349206349e-05, + "loss": 0.1901, + "step": 6184 + }, + { + "epoch": 35.34285714285714, + "grad_norm": 18.52191925048828, + "learning_rate": 1.6285714285714287e-05, + "loss": 0.2187, + "step": 6185 + }, + { + "epoch": 35.348571428571425, + "grad_norm": 75.45260620117188, + "learning_rate": 1.627936507936508e-05, + "loss": 0.279, + "step": 6186 + }, + { + "epoch": 35.354285714285716, + "grad_norm": 30.47543716430664, + "learning_rate": 1.6273015873015872e-05, + "loss": 0.2449, + "step": 6187 + }, + { + "epoch": 35.36, + "grad_norm": 184.71424865722656, + "learning_rate": 1.6266666666666665e-05, + "loss": 0.2356, + "step": 6188 + }, + { + "epoch": 35.36571428571428, + "grad_norm": 43.9375114440918, + "learning_rate": 1.626031746031746e-05, + "loss": 0.2007, + "step": 6189 + }, + { + "epoch": 35.371428571428574, + "grad_norm": 59.61788558959961, + "learning_rate": 1.6253968253968253e-05, + "loss": 0.2017, + "step": 6190 + }, + { + "epoch": 35.37714285714286, + "grad_norm": 32.35559844970703, + "learning_rate": 1.624761904761905e-05, + "loss": 0.2449, + "step": 6191 + }, + { + "epoch": 35.38285714285714, + "grad_norm": 29.847366333007812, + "learning_rate": 1.6241269841269842e-05, + "loss": 0.1943, + "step": 6192 + }, + { + "epoch": 35.38857142857143, + "grad_norm": 41.44870376586914, + "learning_rate": 1.6234920634920638e-05, + "loss": 0.1667, + "step": 6193 + }, + { + "epoch": 35.394285714285715, + "grad_norm": 82.03839874267578, + "learning_rate": 1.622857142857143e-05, + "loss": 0.1517, + "step": 6194 + }, + { + "epoch": 35.4, + "grad_norm": 37.83160400390625, + "learning_rate": 1.6222222222222223e-05, + "loss": 0.1896, + "step": 6195 + }, + { + "epoch": 35.40571428571428, + "grad_norm": 14.38239574432373, + "learning_rate": 1.621587301587302e-05, + "loss": 0.2064, + "step": 6196 + }, + { + "epoch": 35.41142857142857, + "grad_norm": 31.069854736328125, + "learning_rate": 1.6209523809523812e-05, + "loss": 0.2047, + "step": 6197 + }, + { + "epoch": 35.417142857142856, + "grad_norm": 26.791519165039062, + "learning_rate": 1.6203174603174604e-05, + "loss": 0.1788, + "step": 6198 + }, + { + "epoch": 35.42285714285714, + "grad_norm": 68.69419860839844, + "learning_rate": 1.6196825396825397e-05, + "loss": 0.1696, + "step": 6199 + }, + { + "epoch": 35.42857142857143, + "grad_norm": 41.01641845703125, + "learning_rate": 1.6190476190476193e-05, + "loss": 0.2361, + "step": 6200 + }, + { + "epoch": 35.434285714285714, + "grad_norm": 23.218536376953125, + "learning_rate": 1.6184126984126986e-05, + "loss": 0.2061, + "step": 6201 + }, + { + "epoch": 35.44, + "grad_norm": 580.6075439453125, + "learning_rate": 1.617777777777778e-05, + "loss": 0.2366, + "step": 6202 + }, + { + "epoch": 35.44571428571429, + "grad_norm": 44.08100891113281, + "learning_rate": 1.6171428571428574e-05, + "loss": 0.1888, + "step": 6203 + }, + { + "epoch": 35.45142857142857, + "grad_norm": 56.76536178588867, + "learning_rate": 1.6165079365079367e-05, + "loss": 0.2554, + "step": 6204 + }, + { + "epoch": 35.457142857142856, + "grad_norm": 51.591609954833984, + "learning_rate": 1.615873015873016e-05, + "loss": 0.2221, + "step": 6205 + }, + { + "epoch": 35.462857142857146, + "grad_norm": 29.525714874267578, + "learning_rate": 1.6152380952380952e-05, + "loss": 0.2147, + "step": 6206 + }, + { + "epoch": 35.46857142857143, + "grad_norm": 36.911781311035156, + "learning_rate": 1.614603174603175e-05, + "loss": 0.2882, + "step": 6207 + }, + { + "epoch": 35.47428571428571, + "grad_norm": 44.829017639160156, + "learning_rate": 1.613968253968254e-05, + "loss": 0.2356, + "step": 6208 + }, + { + "epoch": 35.48, + "grad_norm": 50.94928741455078, + "learning_rate": 1.6133333333333334e-05, + "loss": 0.2629, + "step": 6209 + }, + { + "epoch": 35.48571428571429, + "grad_norm": 33.35788345336914, + "learning_rate": 1.6126984126984126e-05, + "loss": 0.2352, + "step": 6210 + }, + { + "epoch": 35.49142857142857, + "grad_norm": 33.71529769897461, + "learning_rate": 1.6120634920634922e-05, + "loss": 0.3129, + "step": 6211 + }, + { + "epoch": 35.497142857142855, + "grad_norm": 28.31825065612793, + "learning_rate": 1.6114285714285715e-05, + "loss": 0.2175, + "step": 6212 + }, + { + "epoch": 35.502857142857145, + "grad_norm": 30.960386276245117, + "learning_rate": 1.6107936507936508e-05, + "loss": 0.1477, + "step": 6213 + }, + { + "epoch": 35.50857142857143, + "grad_norm": 33.10350799560547, + "learning_rate": 1.6101587301587304e-05, + "loss": 0.2215, + "step": 6214 + }, + { + "epoch": 35.51428571428571, + "grad_norm": 41.282249450683594, + "learning_rate": 1.6095238095238096e-05, + "loss": 0.3657, + "step": 6215 + }, + { + "epoch": 35.52, + "grad_norm": 53.24930953979492, + "learning_rate": 1.608888888888889e-05, + "loss": 0.2471, + "step": 6216 + }, + { + "epoch": 35.52571428571429, + "grad_norm": 37.373260498046875, + "learning_rate": 1.608253968253968e-05, + "loss": 0.1919, + "step": 6217 + }, + { + "epoch": 35.53142857142857, + "grad_norm": 35.30415344238281, + "learning_rate": 1.6076190476190477e-05, + "loss": 0.2584, + "step": 6218 + }, + { + "epoch": 35.537142857142854, + "grad_norm": 58.4691047668457, + "learning_rate": 1.606984126984127e-05, + "loss": 0.3271, + "step": 6219 + }, + { + "epoch": 35.542857142857144, + "grad_norm": 85.24468994140625, + "learning_rate": 1.6063492063492063e-05, + "loss": 0.1821, + "step": 6220 + }, + { + "epoch": 35.54857142857143, + "grad_norm": 30.816890716552734, + "learning_rate": 1.6057142857142855e-05, + "loss": 0.1627, + "step": 6221 + }, + { + "epoch": 35.55428571428571, + "grad_norm": 771.9216918945312, + "learning_rate": 1.605079365079365e-05, + "loss": 0.2432, + "step": 6222 + }, + { + "epoch": 35.56, + "grad_norm": 49.051143646240234, + "learning_rate": 1.6044444444444444e-05, + "loss": 0.1995, + "step": 6223 + }, + { + "epoch": 35.565714285714286, + "grad_norm": 98.13201904296875, + "learning_rate": 1.6038095238095237e-05, + "loss": 0.3006, + "step": 6224 + }, + { + "epoch": 35.57142857142857, + "grad_norm": 24.999088287353516, + "learning_rate": 1.6031746031746033e-05, + "loss": 0.2711, + "step": 6225 + }, + { + "epoch": 35.57714285714286, + "grad_norm": 37.10238265991211, + "learning_rate": 1.6025396825396825e-05, + "loss": 0.1812, + "step": 6226 + }, + { + "epoch": 35.582857142857144, + "grad_norm": 40.84230041503906, + "learning_rate": 1.6019047619047618e-05, + "loss": 0.1913, + "step": 6227 + }, + { + "epoch": 35.58857142857143, + "grad_norm": 120.78560638427734, + "learning_rate": 1.6012698412698414e-05, + "loss": 0.1983, + "step": 6228 + }, + { + "epoch": 35.59428571428572, + "grad_norm": 92.3857421875, + "learning_rate": 1.6006349206349207e-05, + "loss": 0.2357, + "step": 6229 + }, + { + "epoch": 35.6, + "grad_norm": 36.8802604675293, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.2196, + "step": 6230 + }, + { + "epoch": 35.605714285714285, + "grad_norm": 47.57221984863281, + "learning_rate": 1.5993650793650795e-05, + "loss": 0.2572, + "step": 6231 + }, + { + "epoch": 35.61142857142857, + "grad_norm": 54.17245864868164, + "learning_rate": 1.5987301587301588e-05, + "loss": 0.2616, + "step": 6232 + }, + { + "epoch": 35.61714285714286, + "grad_norm": 56.34675979614258, + "learning_rate": 1.5980952380952384e-05, + "loss": 0.1948, + "step": 6233 + }, + { + "epoch": 35.62285714285714, + "grad_norm": 56.1422233581543, + "learning_rate": 1.5974603174603177e-05, + "loss": 0.1275, + "step": 6234 + }, + { + "epoch": 35.628571428571426, + "grad_norm": 39.908538818359375, + "learning_rate": 1.596825396825397e-05, + "loss": 0.2365, + "step": 6235 + }, + { + "epoch": 35.63428571428572, + "grad_norm": 26.011695861816406, + "learning_rate": 1.5961904761904765e-05, + "loss": 0.1796, + "step": 6236 + }, + { + "epoch": 35.64, + "grad_norm": 74.88202667236328, + "learning_rate": 1.5955555555555558e-05, + "loss": 0.1827, + "step": 6237 + }, + { + "epoch": 35.645714285714284, + "grad_norm": 37.50237274169922, + "learning_rate": 1.594920634920635e-05, + "loss": 0.1882, + "step": 6238 + }, + { + "epoch": 35.651428571428575, + "grad_norm": 19.13360595703125, + "learning_rate": 1.5942857142857143e-05, + "loss": 0.2526, + "step": 6239 + }, + { + "epoch": 35.65714285714286, + "grad_norm": 78.09122467041016, + "learning_rate": 1.593650793650794e-05, + "loss": 0.1978, + "step": 6240 + }, + { + "epoch": 35.66285714285714, + "grad_norm": 49.35946273803711, + "learning_rate": 1.5930158730158732e-05, + "loss": 0.1773, + "step": 6241 + }, + { + "epoch": 35.668571428571425, + "grad_norm": 58.68070983886719, + "learning_rate": 1.5923809523809524e-05, + "loss": 0.2185, + "step": 6242 + }, + { + "epoch": 35.674285714285716, + "grad_norm": 56.76746368408203, + "learning_rate": 1.5917460317460317e-05, + "loss": 0.2128, + "step": 6243 + }, + { + "epoch": 35.68, + "grad_norm": 38.83905792236328, + "learning_rate": 1.5911111111111113e-05, + "loss": 0.2239, + "step": 6244 + }, + { + "epoch": 35.68571428571428, + "grad_norm": 50.337825775146484, + "learning_rate": 1.5904761904761906e-05, + "loss": 0.2765, + "step": 6245 + }, + { + "epoch": 35.691428571428574, + "grad_norm": 24.240882873535156, + "learning_rate": 1.58984126984127e-05, + "loss": 0.2335, + "step": 6246 + }, + { + "epoch": 35.69714285714286, + "grad_norm": 497.1015930175781, + "learning_rate": 1.5892063492063494e-05, + "loss": 0.202, + "step": 6247 + }, + { + "epoch": 35.70285714285714, + "grad_norm": 64.22380065917969, + "learning_rate": 1.5885714285714287e-05, + "loss": 0.212, + "step": 6248 + }, + { + "epoch": 35.70857142857143, + "grad_norm": 40.884334564208984, + "learning_rate": 1.587936507936508e-05, + "loss": 0.1923, + "step": 6249 + }, + { + "epoch": 35.714285714285715, + "grad_norm": 64.15213012695312, + "learning_rate": 1.5873015873015872e-05, + "loss": 0.2336, + "step": 6250 + }, + { + "epoch": 35.72, + "grad_norm": 46.503936767578125, + "learning_rate": 1.586666666666667e-05, + "loss": 0.2912, + "step": 6251 + }, + { + "epoch": 35.72571428571428, + "grad_norm": 48.539791107177734, + "learning_rate": 1.586031746031746e-05, + "loss": 0.1813, + "step": 6252 + }, + { + "epoch": 35.73142857142857, + "grad_norm": 30.292924880981445, + "learning_rate": 1.5853968253968254e-05, + "loss": 0.422, + "step": 6253 + }, + { + "epoch": 35.73714285714286, + "grad_norm": 23.619461059570312, + "learning_rate": 1.5847619047619046e-05, + "loss": 0.1958, + "step": 6254 + }, + { + "epoch": 35.74285714285714, + "grad_norm": 101.78134155273438, + "learning_rate": 1.5841269841269842e-05, + "loss": 0.2327, + "step": 6255 + }, + { + "epoch": 35.74857142857143, + "grad_norm": 30.111948013305664, + "learning_rate": 1.5834920634920635e-05, + "loss": 0.187, + "step": 6256 + }, + { + "epoch": 35.754285714285714, + "grad_norm": 47.025508880615234, + "learning_rate": 1.5828571428571428e-05, + "loss": 0.3453, + "step": 6257 + }, + { + "epoch": 35.76, + "grad_norm": 87.50485229492188, + "learning_rate": 1.582222222222222e-05, + "loss": 0.2542, + "step": 6258 + }, + { + "epoch": 35.76571428571429, + "grad_norm": 54.43192672729492, + "learning_rate": 1.5815873015873016e-05, + "loss": 0.2674, + "step": 6259 + }, + { + "epoch": 35.77142857142857, + "grad_norm": 26.362632751464844, + "learning_rate": 1.580952380952381e-05, + "loss": 0.1999, + "step": 6260 + }, + { + "epoch": 35.777142857142856, + "grad_norm": 42.240299224853516, + "learning_rate": 1.58031746031746e-05, + "loss": 0.1975, + "step": 6261 + }, + { + "epoch": 35.78285714285714, + "grad_norm": 35.53541946411133, + "learning_rate": 1.5796825396825398e-05, + "loss": 0.2892, + "step": 6262 + }, + { + "epoch": 35.78857142857143, + "grad_norm": 34.09052658081055, + "learning_rate": 1.579047619047619e-05, + "loss": 0.2585, + "step": 6263 + }, + { + "epoch": 35.794285714285714, + "grad_norm": 1790.7254638671875, + "learning_rate": 1.5784126984126986e-05, + "loss": 0.2906, + "step": 6264 + }, + { + "epoch": 35.8, + "grad_norm": 24.057857513427734, + "learning_rate": 1.577777777777778e-05, + "loss": 0.1914, + "step": 6265 + }, + { + "epoch": 35.80571428571429, + "grad_norm": 46.7984504699707, + "learning_rate": 1.577142857142857e-05, + "loss": 0.1992, + "step": 6266 + }, + { + "epoch": 35.81142857142857, + "grad_norm": 32.84125518798828, + "learning_rate": 1.5765079365079367e-05, + "loss": 0.2153, + "step": 6267 + }, + { + "epoch": 35.817142857142855, + "grad_norm": 40.810821533203125, + "learning_rate": 1.575873015873016e-05, + "loss": 0.2667, + "step": 6268 + }, + { + "epoch": 35.822857142857146, + "grad_norm": 42.42055892944336, + "learning_rate": 1.5752380952380953e-05, + "loss": 0.1929, + "step": 6269 + }, + { + "epoch": 35.82857142857143, + "grad_norm": 29.975252151489258, + "learning_rate": 1.574603174603175e-05, + "loss": 0.2226, + "step": 6270 + }, + { + "epoch": 35.83428571428571, + "grad_norm": 22.78770637512207, + "learning_rate": 1.573968253968254e-05, + "loss": 0.2817, + "step": 6271 + }, + { + "epoch": 35.84, + "grad_norm": 38.62025451660156, + "learning_rate": 1.5733333333333334e-05, + "loss": 0.2161, + "step": 6272 + }, + { + "epoch": 35.84571428571429, + "grad_norm": 39.60796356201172, + "learning_rate": 1.572698412698413e-05, + "loss": 0.2045, + "step": 6273 + }, + { + "epoch": 35.85142857142857, + "grad_norm": 633.5014038085938, + "learning_rate": 1.5720634920634923e-05, + "loss": 0.219, + "step": 6274 + }, + { + "epoch": 35.857142857142854, + "grad_norm": 49.83086013793945, + "learning_rate": 1.5714285714285715e-05, + "loss": 0.2216, + "step": 6275 + }, + { + "epoch": 35.862857142857145, + "grad_norm": 32.118247985839844, + "learning_rate": 1.5707936507936508e-05, + "loss": 0.199, + "step": 6276 + }, + { + "epoch": 35.86857142857143, + "grad_norm": 16.761796951293945, + "learning_rate": 1.5701587301587304e-05, + "loss": 0.2241, + "step": 6277 + }, + { + "epoch": 35.87428571428571, + "grad_norm": 32.82276153564453, + "learning_rate": 1.5695238095238097e-05, + "loss": 0.2014, + "step": 6278 + }, + { + "epoch": 35.88, + "grad_norm": 139.1734161376953, + "learning_rate": 1.568888888888889e-05, + "loss": 0.239, + "step": 6279 + }, + { + "epoch": 35.885714285714286, + "grad_norm": 60.02780532836914, + "learning_rate": 1.5682539682539682e-05, + "loss": 0.1493, + "step": 6280 + }, + { + "epoch": 35.89142857142857, + "grad_norm": 35.52342987060547, + "learning_rate": 1.5676190476190478e-05, + "loss": 0.1954, + "step": 6281 + }, + { + "epoch": 35.89714285714286, + "grad_norm": 61.01224136352539, + "learning_rate": 1.566984126984127e-05, + "loss": 0.2354, + "step": 6282 + }, + { + "epoch": 35.902857142857144, + "grad_norm": 98.2846450805664, + "learning_rate": 1.5663492063492063e-05, + "loss": 0.2863, + "step": 6283 + }, + { + "epoch": 35.90857142857143, + "grad_norm": 54.908931732177734, + "learning_rate": 1.565714285714286e-05, + "loss": 0.1334, + "step": 6284 + }, + { + "epoch": 35.91428571428571, + "grad_norm": 53.13249206542969, + "learning_rate": 1.5650793650793652e-05, + "loss": 0.2535, + "step": 6285 + }, + { + "epoch": 35.92, + "grad_norm": 1143.5648193359375, + "learning_rate": 1.5644444444444444e-05, + "loss": 0.3104, + "step": 6286 + }, + { + "epoch": 35.925714285714285, + "grad_norm": 38.51319885253906, + "learning_rate": 1.5638095238095237e-05, + "loss": 0.2201, + "step": 6287 + }, + { + "epoch": 35.93142857142857, + "grad_norm": 41.195770263671875, + "learning_rate": 1.5631746031746033e-05, + "loss": 0.2406, + "step": 6288 + }, + { + "epoch": 35.93714285714286, + "grad_norm": 46.77116394042969, + "learning_rate": 1.5625396825396826e-05, + "loss": 0.2352, + "step": 6289 + }, + { + "epoch": 35.94285714285714, + "grad_norm": 113.30989837646484, + "learning_rate": 1.561904761904762e-05, + "loss": 0.2114, + "step": 6290 + }, + { + "epoch": 35.94857142857143, + "grad_norm": 34.49736404418945, + "learning_rate": 1.561269841269841e-05, + "loss": 0.231, + "step": 6291 + }, + { + "epoch": 35.95428571428572, + "grad_norm": 69.27482604980469, + "learning_rate": 1.5606349206349207e-05, + "loss": 0.2033, + "step": 6292 + }, + { + "epoch": 35.96, + "grad_norm": 428.3207702636719, + "learning_rate": 1.56e-05, + "loss": 0.2879, + "step": 6293 + }, + { + "epoch": 35.965714285714284, + "grad_norm": 40.86140823364258, + "learning_rate": 1.5593650793650792e-05, + "loss": 0.1804, + "step": 6294 + }, + { + "epoch": 35.97142857142857, + "grad_norm": 97.99716186523438, + "learning_rate": 1.558730158730159e-05, + "loss": 0.2291, + "step": 6295 + }, + { + "epoch": 35.97714285714286, + "grad_norm": 101.12525939941406, + "learning_rate": 1.558095238095238e-05, + "loss": 0.2111, + "step": 6296 + }, + { + "epoch": 35.98285714285714, + "grad_norm": 39.819671630859375, + "learning_rate": 1.5574603174603174e-05, + "loss": 0.2151, + "step": 6297 + }, + { + "epoch": 35.988571428571426, + "grad_norm": 34.32330322265625, + "learning_rate": 1.556825396825397e-05, + "loss": 0.2041, + "step": 6298 + }, + { + "epoch": 35.994285714285716, + "grad_norm": 57.05809020996094, + "learning_rate": 1.5561904761904762e-05, + "loss": 0.268, + "step": 6299 + }, + { + "epoch": 36.0, + "grad_norm": 44.08351516723633, + "learning_rate": 1.5555555555555555e-05, + "loss": 0.1798, + "step": 6300 + }, + { + "epoch": 36.0, + "eval_classes": 0, + "eval_loss": 0.5695643424987793, + "eval_map": 0.9409, + "eval_map_50": 0.971, + "eval_map_75": 0.9667, + "eval_map_large": 0.9409, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9409, + "eval_map_small": -1.0, + "eval_mar_1": 0.7943, + "eval_mar_10": 0.9743, + "eval_mar_100": 0.9759, + "eval_mar_100_per_class": 0.9759, + "eval_mar_large": 0.9759, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.4622, + "eval_samples_per_second": 21.839, + "eval_steps_per_second": 2.748, + "step": 6300 + }, + { + "epoch": 36.005714285714284, + "grad_norm": 60.560211181640625, + "learning_rate": 1.554920634920635e-05, + "loss": 0.1635, + "step": 6301 + }, + { + "epoch": 36.011428571428574, + "grad_norm": 30.342844009399414, + "learning_rate": 1.5542857142857144e-05, + "loss": 0.1777, + "step": 6302 + }, + { + "epoch": 36.01714285714286, + "grad_norm": 80.23675537109375, + "learning_rate": 1.553650793650794e-05, + "loss": 0.1611, + "step": 6303 + }, + { + "epoch": 36.02285714285714, + "grad_norm": 101.42436981201172, + "learning_rate": 1.5530158730158732e-05, + "loss": 0.2678, + "step": 6304 + }, + { + "epoch": 36.02857142857143, + "grad_norm": 25.97087860107422, + "learning_rate": 1.5523809523809525e-05, + "loss": 0.137, + "step": 6305 + }, + { + "epoch": 36.034285714285716, + "grad_norm": 54.747676849365234, + "learning_rate": 1.551746031746032e-05, + "loss": 0.2697, + "step": 6306 + }, + { + "epoch": 36.04, + "grad_norm": 34.6653938293457, + "learning_rate": 1.5511111111111114e-05, + "loss": 0.1973, + "step": 6307 + }, + { + "epoch": 36.04571428571428, + "grad_norm": 43.137142181396484, + "learning_rate": 1.5504761904761906e-05, + "loss": 0.225, + "step": 6308 + }, + { + "epoch": 36.05142857142857, + "grad_norm": 73.97096252441406, + "learning_rate": 1.54984126984127e-05, + "loss": 0.1988, + "step": 6309 + }, + { + "epoch": 36.05714285714286, + "grad_norm": 309.857421875, + "learning_rate": 1.5492063492063495e-05, + "loss": 0.2243, + "step": 6310 + }, + { + "epoch": 36.06285714285714, + "grad_norm": 80.37860870361328, + "learning_rate": 1.5485714285714287e-05, + "loss": 0.1729, + "step": 6311 + }, + { + "epoch": 36.06857142857143, + "grad_norm": 19.095983505249023, + "learning_rate": 1.547936507936508e-05, + "loss": 0.1457, + "step": 6312 + }, + { + "epoch": 36.074285714285715, + "grad_norm": 27.1456298828125, + "learning_rate": 1.5473015873015873e-05, + "loss": 0.1896, + "step": 6313 + }, + { + "epoch": 36.08, + "grad_norm": 65.52925109863281, + "learning_rate": 1.546666666666667e-05, + "loss": 0.2088, + "step": 6314 + }, + { + "epoch": 36.08571428571429, + "grad_norm": 47.01498031616211, + "learning_rate": 1.546031746031746e-05, + "loss": 0.1597, + "step": 6315 + }, + { + "epoch": 36.09142857142857, + "grad_norm": 27.8057918548584, + "learning_rate": 1.5453968253968254e-05, + "loss": 0.1789, + "step": 6316 + }, + { + "epoch": 36.097142857142856, + "grad_norm": 138.99972534179688, + "learning_rate": 1.544761904761905e-05, + "loss": 0.3318, + "step": 6317 + }, + { + "epoch": 36.10285714285714, + "grad_norm": 2374.276611328125, + "learning_rate": 1.5441269841269843e-05, + "loss": 0.3334, + "step": 6318 + }, + { + "epoch": 36.10857142857143, + "grad_norm": 92.5726089477539, + "learning_rate": 1.5434920634920635e-05, + "loss": 0.2459, + "step": 6319 + }, + { + "epoch": 36.114285714285714, + "grad_norm": 61.678016662597656, + "learning_rate": 1.5428571428571428e-05, + "loss": 0.2149, + "step": 6320 + }, + { + "epoch": 36.12, + "grad_norm": 35.04982376098633, + "learning_rate": 1.5422222222222224e-05, + "loss": 0.2917, + "step": 6321 + }, + { + "epoch": 36.12571428571429, + "grad_norm": 90.88272857666016, + "learning_rate": 1.5415873015873017e-05, + "loss": 0.1778, + "step": 6322 + }, + { + "epoch": 36.13142857142857, + "grad_norm": 39.97517013549805, + "learning_rate": 1.540952380952381e-05, + "loss": 0.3294, + "step": 6323 + }, + { + "epoch": 36.137142857142855, + "grad_norm": 29.622541427612305, + "learning_rate": 1.5403174603174602e-05, + "loss": 0.2512, + "step": 6324 + }, + { + "epoch": 36.142857142857146, + "grad_norm": 211.47042846679688, + "learning_rate": 1.5396825396825398e-05, + "loss": 0.2083, + "step": 6325 + }, + { + "epoch": 36.14857142857143, + "grad_norm": 76.01374816894531, + "learning_rate": 1.539047619047619e-05, + "loss": 0.2405, + "step": 6326 + }, + { + "epoch": 36.15428571428571, + "grad_norm": 48.560489654541016, + "learning_rate": 1.5384126984126983e-05, + "loss": 0.2085, + "step": 6327 + }, + { + "epoch": 36.16, + "grad_norm": 39.614715576171875, + "learning_rate": 1.537777777777778e-05, + "loss": 0.151, + "step": 6328 + }, + { + "epoch": 36.16571428571429, + "grad_norm": 165.3211669921875, + "learning_rate": 1.5371428571428572e-05, + "loss": 0.3179, + "step": 6329 + }, + { + "epoch": 36.17142857142857, + "grad_norm": 36.70731735229492, + "learning_rate": 1.5365079365079365e-05, + "loss": 0.0889, + "step": 6330 + }, + { + "epoch": 36.177142857142854, + "grad_norm": 22.209880828857422, + "learning_rate": 1.5358730158730157e-05, + "loss": 0.2098, + "step": 6331 + }, + { + "epoch": 36.182857142857145, + "grad_norm": 25.109262466430664, + "learning_rate": 1.5352380952380953e-05, + "loss": 0.2304, + "step": 6332 + }, + { + "epoch": 36.18857142857143, + "grad_norm": 40.98749542236328, + "learning_rate": 1.5346031746031746e-05, + "loss": 0.1794, + "step": 6333 + }, + { + "epoch": 36.19428571428571, + "grad_norm": 93.3605728149414, + "learning_rate": 1.533968253968254e-05, + "loss": 0.2494, + "step": 6334 + }, + { + "epoch": 36.2, + "grad_norm": 53.41642761230469, + "learning_rate": 1.5333333333333334e-05, + "loss": 0.2813, + "step": 6335 + }, + { + "epoch": 36.205714285714286, + "grad_norm": 20.14623260498047, + "learning_rate": 1.5326984126984127e-05, + "loss": 0.2313, + "step": 6336 + }, + { + "epoch": 36.21142857142857, + "grad_norm": 48.9773063659668, + "learning_rate": 1.5320634920634923e-05, + "loss": 0.1613, + "step": 6337 + }, + { + "epoch": 36.21714285714286, + "grad_norm": 123.74031829833984, + "learning_rate": 1.5314285714285716e-05, + "loss": 0.226, + "step": 6338 + }, + { + "epoch": 36.222857142857144, + "grad_norm": 22.779130935668945, + "learning_rate": 1.530793650793651e-05, + "loss": 0.1843, + "step": 6339 + }, + { + "epoch": 36.22857142857143, + "grad_norm": 98.26277160644531, + "learning_rate": 1.5301587301587304e-05, + "loss": 0.3517, + "step": 6340 + }, + { + "epoch": 36.23428571428571, + "grad_norm": 35.74067687988281, + "learning_rate": 1.5295238095238097e-05, + "loss": 0.1514, + "step": 6341 + }, + { + "epoch": 36.24, + "grad_norm": 95.01213073730469, + "learning_rate": 1.528888888888889e-05, + "loss": 0.1429, + "step": 6342 + }, + { + "epoch": 36.245714285714286, + "grad_norm": 48.54473114013672, + "learning_rate": 1.5282539682539686e-05, + "loss": 0.2102, + "step": 6343 + }, + { + "epoch": 36.25142857142857, + "grad_norm": 50.201717376708984, + "learning_rate": 1.527619047619048e-05, + "loss": 0.1812, + "step": 6344 + }, + { + "epoch": 36.25714285714286, + "grad_norm": 1488.32763671875, + "learning_rate": 1.526984126984127e-05, + "loss": 0.1958, + "step": 6345 + }, + { + "epoch": 36.26285714285714, + "grad_norm": 54.18905258178711, + "learning_rate": 1.5263492063492064e-05, + "loss": 0.1536, + "step": 6346 + }, + { + "epoch": 36.26857142857143, + "grad_norm": 39.101261138916016, + "learning_rate": 1.5257142857142858e-05, + "loss": 0.183, + "step": 6347 + }, + { + "epoch": 36.27428571428572, + "grad_norm": 92.49483489990234, + "learning_rate": 1.5250793650793652e-05, + "loss": 0.181, + "step": 6348 + }, + { + "epoch": 36.28, + "grad_norm": 32.887935638427734, + "learning_rate": 1.5244444444444445e-05, + "loss": 0.2423, + "step": 6349 + }, + { + "epoch": 36.285714285714285, + "grad_norm": 177.63185119628906, + "learning_rate": 1.5238095238095241e-05, + "loss": 0.2762, + "step": 6350 + }, + { + "epoch": 36.29142857142857, + "grad_norm": 77.8404541015625, + "learning_rate": 1.5231746031746034e-05, + "loss": 0.1804, + "step": 6351 + }, + { + "epoch": 36.29714285714286, + "grad_norm": 32.94889831542969, + "learning_rate": 1.5225396825396826e-05, + "loss": 0.2209, + "step": 6352 + }, + { + "epoch": 36.30285714285714, + "grad_norm": 63.828739166259766, + "learning_rate": 1.5219047619047619e-05, + "loss": 0.226, + "step": 6353 + }, + { + "epoch": 36.308571428571426, + "grad_norm": 43.031734466552734, + "learning_rate": 1.5212698412698415e-05, + "loss": 0.2416, + "step": 6354 + }, + { + "epoch": 36.31428571428572, + "grad_norm": 22.917098999023438, + "learning_rate": 1.5206349206349208e-05, + "loss": 0.1953, + "step": 6355 + }, + { + "epoch": 36.32, + "grad_norm": 54.05869674682617, + "learning_rate": 1.52e-05, + "loss": 0.1965, + "step": 6356 + }, + { + "epoch": 36.325714285714284, + "grad_norm": 107.59943389892578, + "learning_rate": 1.5193650793650793e-05, + "loss": 0.2637, + "step": 6357 + }, + { + "epoch": 36.331428571428575, + "grad_norm": 46.563541412353516, + "learning_rate": 1.5187301587301589e-05, + "loss": 0.2627, + "step": 6358 + }, + { + "epoch": 36.33714285714286, + "grad_norm": 31.009504318237305, + "learning_rate": 1.5180952380952381e-05, + "loss": 0.228, + "step": 6359 + }, + { + "epoch": 36.34285714285714, + "grad_norm": 27.440643310546875, + "learning_rate": 1.5174603174603174e-05, + "loss": 0.2416, + "step": 6360 + }, + { + "epoch": 36.348571428571425, + "grad_norm": 59.989654541015625, + "learning_rate": 1.5168253968253968e-05, + "loss": 0.1932, + "step": 6361 + }, + { + "epoch": 36.354285714285716, + "grad_norm": 83.95958709716797, + "learning_rate": 1.5161904761904763e-05, + "loss": 0.26, + "step": 6362 + }, + { + "epoch": 36.36, + "grad_norm": 48.0991096496582, + "learning_rate": 1.5155555555555555e-05, + "loss": 0.2212, + "step": 6363 + }, + { + "epoch": 36.36571428571428, + "grad_norm": 44.27768325805664, + "learning_rate": 1.514920634920635e-05, + "loss": 0.2014, + "step": 6364 + }, + { + "epoch": 36.371428571428574, + "grad_norm": 45.149261474609375, + "learning_rate": 1.5142857142857144e-05, + "loss": 0.1485, + "step": 6365 + }, + { + "epoch": 36.37714285714286, + "grad_norm": 90.59193420410156, + "learning_rate": 1.5136507936507938e-05, + "loss": 0.1746, + "step": 6366 + }, + { + "epoch": 36.38285714285714, + "grad_norm": 67.58226013183594, + "learning_rate": 1.5130158730158731e-05, + "loss": 0.3087, + "step": 6367 + }, + { + "epoch": 36.38857142857143, + "grad_norm": 68.11504364013672, + "learning_rate": 1.5123809523809524e-05, + "loss": 0.172, + "step": 6368 + }, + { + "epoch": 36.394285714285715, + "grad_norm": 49.809810638427734, + "learning_rate": 1.511746031746032e-05, + "loss": 0.2154, + "step": 6369 + }, + { + "epoch": 36.4, + "grad_norm": 53.1560173034668, + "learning_rate": 1.5111111111111112e-05, + "loss": 0.1507, + "step": 6370 + }, + { + "epoch": 36.40571428571428, + "grad_norm": 45.67121124267578, + "learning_rate": 1.5104761904761905e-05, + "loss": 0.1551, + "step": 6371 + }, + { + "epoch": 36.41142857142857, + "grad_norm": 105.23786163330078, + "learning_rate": 1.5098412698412698e-05, + "loss": 0.2679, + "step": 6372 + }, + { + "epoch": 36.417142857142856, + "grad_norm": 88.73975372314453, + "learning_rate": 1.5092063492063494e-05, + "loss": 0.1415, + "step": 6373 + }, + { + "epoch": 36.42285714285714, + "grad_norm": 84.12977600097656, + "learning_rate": 1.5085714285714286e-05, + "loss": 0.2191, + "step": 6374 + }, + { + "epoch": 36.42857142857143, + "grad_norm": 23.05473518371582, + "learning_rate": 1.5079365079365079e-05, + "loss": 0.1559, + "step": 6375 + }, + { + "epoch": 36.434285714285714, + "grad_norm": 28.77918815612793, + "learning_rate": 1.5073015873015875e-05, + "loss": 0.1839, + "step": 6376 + }, + { + "epoch": 36.44, + "grad_norm": 37.2401008605957, + "learning_rate": 1.5066666666666668e-05, + "loss": 0.1908, + "step": 6377 + }, + { + "epoch": 36.44571428571429, + "grad_norm": 53.14858627319336, + "learning_rate": 1.506031746031746e-05, + "loss": 0.2473, + "step": 6378 + }, + { + "epoch": 36.45142857142857, + "grad_norm": 30.43804168701172, + "learning_rate": 1.5053968253968253e-05, + "loss": 0.1916, + "step": 6379 + }, + { + "epoch": 36.457142857142856, + "grad_norm": 50.2374153137207, + "learning_rate": 1.5047619047619049e-05, + "loss": 0.2085, + "step": 6380 + }, + { + "epoch": 36.462857142857146, + "grad_norm": 28.409648895263672, + "learning_rate": 1.5041269841269841e-05, + "loss": 0.2252, + "step": 6381 + }, + { + "epoch": 36.46857142857143, + "grad_norm": 67.96338653564453, + "learning_rate": 1.5034920634920636e-05, + "loss": 0.1687, + "step": 6382 + }, + { + "epoch": 36.47428571428571, + "grad_norm": 17.873964309692383, + "learning_rate": 1.5028571428571428e-05, + "loss": 0.1904, + "step": 6383 + }, + { + "epoch": 36.48, + "grad_norm": 1682.106201171875, + "learning_rate": 1.5022222222222224e-05, + "loss": 0.2735, + "step": 6384 + }, + { + "epoch": 36.48571428571429, + "grad_norm": 51.79334259033203, + "learning_rate": 1.5015873015873017e-05, + "loss": 0.276, + "step": 6385 + }, + { + "epoch": 36.49142857142857, + "grad_norm": 38.2587890625, + "learning_rate": 1.500952380952381e-05, + "loss": 0.217, + "step": 6386 + }, + { + "epoch": 36.497142857142855, + "grad_norm": 29.614765167236328, + "learning_rate": 1.5003174603174606e-05, + "loss": 0.2729, + "step": 6387 + }, + { + "epoch": 36.502857142857145, + "grad_norm": 49.26810836791992, + "learning_rate": 1.4996825396825398e-05, + "loss": 0.2213, + "step": 6388 + }, + { + "epoch": 36.50857142857143, + "grad_norm": 79.2518539428711, + "learning_rate": 1.4990476190476191e-05, + "loss": 0.2216, + "step": 6389 + }, + { + "epoch": 36.51428571428571, + "grad_norm": 180.6911163330078, + "learning_rate": 1.4984126984126984e-05, + "loss": 0.2687, + "step": 6390 + }, + { + "epoch": 36.52, + "grad_norm": 51.260284423828125, + "learning_rate": 1.497777777777778e-05, + "loss": 0.2545, + "step": 6391 + }, + { + "epoch": 36.52571428571429, + "grad_norm": 21.773143768310547, + "learning_rate": 1.4971428571428572e-05, + "loss": 0.2171, + "step": 6392 + }, + { + "epoch": 36.53142857142857, + "grad_norm": 47.9932746887207, + "learning_rate": 1.4965079365079365e-05, + "loss": 0.1611, + "step": 6393 + }, + { + "epoch": 36.537142857142854, + "grad_norm": 19.89521598815918, + "learning_rate": 1.4958730158730158e-05, + "loss": 0.1706, + "step": 6394 + }, + { + "epoch": 36.542857142857144, + "grad_norm": 20.085758209228516, + "learning_rate": 1.4952380952380954e-05, + "loss": 0.138, + "step": 6395 + }, + { + "epoch": 36.54857142857143, + "grad_norm": 33.8851203918457, + "learning_rate": 1.4946031746031746e-05, + "loss": 0.2037, + "step": 6396 + }, + { + "epoch": 36.55428571428571, + "grad_norm": 49.09003448486328, + "learning_rate": 1.4939682539682539e-05, + "loss": 0.2313, + "step": 6397 + }, + { + "epoch": 36.56, + "grad_norm": 452.2305603027344, + "learning_rate": 1.4933333333333335e-05, + "loss": 0.2614, + "step": 6398 + }, + { + "epoch": 36.565714285714286, + "grad_norm": 47.73259353637695, + "learning_rate": 1.4926984126984128e-05, + "loss": 0.2196, + "step": 6399 + }, + { + "epoch": 36.57142857142857, + "grad_norm": 19.27354621887207, + "learning_rate": 1.4920634920634922e-05, + "loss": 0.28, + "step": 6400 + }, + { + "epoch": 36.57714285714286, + "grad_norm": 19.988956451416016, + "learning_rate": 1.4914285714285715e-05, + "loss": 0.1931, + "step": 6401 + }, + { + "epoch": 36.582857142857144, + "grad_norm": 219.86468505859375, + "learning_rate": 1.4907936507936509e-05, + "loss": 0.2611, + "step": 6402 + }, + { + "epoch": 36.58857142857143, + "grad_norm": 973.735595703125, + "learning_rate": 1.4901587301587303e-05, + "loss": 0.2148, + "step": 6403 + }, + { + "epoch": 36.59428571428572, + "grad_norm": 39.37485122680664, + "learning_rate": 1.4895238095238096e-05, + "loss": 0.1662, + "step": 6404 + }, + { + "epoch": 36.6, + "grad_norm": 22.795944213867188, + "learning_rate": 1.4888888888888888e-05, + "loss": 0.3078, + "step": 6405 + }, + { + "epoch": 36.605714285714285, + "grad_norm": 37.41624069213867, + "learning_rate": 1.4882539682539684e-05, + "loss": 0.1794, + "step": 6406 + }, + { + "epoch": 36.61142857142857, + "grad_norm": 54.39463806152344, + "learning_rate": 1.4876190476190477e-05, + "loss": 0.2123, + "step": 6407 + }, + { + "epoch": 36.61714285714286, + "grad_norm": 18.27862548828125, + "learning_rate": 1.486984126984127e-05, + "loss": 0.1803, + "step": 6408 + }, + { + "epoch": 36.62285714285714, + "grad_norm": 50.36140060424805, + "learning_rate": 1.4863492063492066e-05, + "loss": 0.2265, + "step": 6409 + }, + { + "epoch": 36.628571428571426, + "grad_norm": 26.36475372314453, + "learning_rate": 1.4857142857142858e-05, + "loss": 0.2193, + "step": 6410 + }, + { + "epoch": 36.63428571428572, + "grad_norm": 33.291900634765625, + "learning_rate": 1.4850793650793651e-05, + "loss": 0.2075, + "step": 6411 + }, + { + "epoch": 36.64, + "grad_norm": 42.37389373779297, + "learning_rate": 1.4844444444444444e-05, + "loss": 0.2289, + "step": 6412 + }, + { + "epoch": 36.645714285714284, + "grad_norm": 27.10552978515625, + "learning_rate": 1.483809523809524e-05, + "loss": 0.2085, + "step": 6413 + }, + { + "epoch": 36.651428571428575, + "grad_norm": 16.50041389465332, + "learning_rate": 1.4831746031746032e-05, + "loss": 0.1601, + "step": 6414 + }, + { + "epoch": 36.65714285714286, + "grad_norm": 24.146190643310547, + "learning_rate": 1.4825396825396825e-05, + "loss": 0.1922, + "step": 6415 + }, + { + "epoch": 36.66285714285714, + "grad_norm": 36.586544036865234, + "learning_rate": 1.481904761904762e-05, + "loss": 0.1325, + "step": 6416 + }, + { + "epoch": 36.668571428571425, + "grad_norm": 40.86701583862305, + "learning_rate": 1.4812698412698414e-05, + "loss": 0.1962, + "step": 6417 + }, + { + "epoch": 36.674285714285716, + "grad_norm": 65.42292022705078, + "learning_rate": 1.4806349206349206e-05, + "loss": 0.2012, + "step": 6418 + }, + { + "epoch": 36.68, + "grad_norm": 57.85270690917969, + "learning_rate": 1.48e-05, + "loss": 0.2726, + "step": 6419 + }, + { + "epoch": 36.68571428571428, + "grad_norm": 35.29873275756836, + "learning_rate": 1.4793650793650795e-05, + "loss": 0.2092, + "step": 6420 + }, + { + "epoch": 36.691428571428574, + "grad_norm": 135.01405334472656, + "learning_rate": 1.478730158730159e-05, + "loss": 0.22, + "step": 6421 + }, + { + "epoch": 36.69714285714286, + "grad_norm": 78.34668731689453, + "learning_rate": 1.4780952380952382e-05, + "loss": 0.195, + "step": 6422 + }, + { + "epoch": 36.70285714285714, + "grad_norm": 19.63154411315918, + "learning_rate": 1.4774603174603175e-05, + "loss": 0.205, + "step": 6423 + }, + { + "epoch": 36.70857142857143, + "grad_norm": 66.94962310791016, + "learning_rate": 1.476825396825397e-05, + "loss": 0.1626, + "step": 6424 + }, + { + "epoch": 36.714285714285715, + "grad_norm": 126.7664566040039, + "learning_rate": 1.4761904761904763e-05, + "loss": 0.2621, + "step": 6425 + }, + { + "epoch": 36.72, + "grad_norm": 21.17292022705078, + "learning_rate": 1.4755555555555556e-05, + "loss": 0.1923, + "step": 6426 + }, + { + "epoch": 36.72571428571428, + "grad_norm": 59.32046127319336, + "learning_rate": 1.4749206349206348e-05, + "loss": 0.2314, + "step": 6427 + }, + { + "epoch": 36.73142857142857, + "grad_norm": 192.17601013183594, + "learning_rate": 1.4742857142857144e-05, + "loss": 0.1811, + "step": 6428 + }, + { + "epoch": 36.73714285714286, + "grad_norm": 50.06969451904297, + "learning_rate": 1.4736507936507937e-05, + "loss": 0.2278, + "step": 6429 + }, + { + "epoch": 36.74285714285714, + "grad_norm": 29.369829177856445, + "learning_rate": 1.473015873015873e-05, + "loss": 0.1904, + "step": 6430 + }, + { + "epoch": 36.74857142857143, + "grad_norm": 18.54184341430664, + "learning_rate": 1.4723809523809526e-05, + "loss": 0.2018, + "step": 6431 + }, + { + "epoch": 36.754285714285714, + "grad_norm": 37.97724533081055, + "learning_rate": 1.4717460317460318e-05, + "loss": 0.2711, + "step": 6432 + }, + { + "epoch": 36.76, + "grad_norm": 116.49739837646484, + "learning_rate": 1.4711111111111111e-05, + "loss": 0.2082, + "step": 6433 + }, + { + "epoch": 36.76571428571429, + "grad_norm": 99.23430633544922, + "learning_rate": 1.4704761904761904e-05, + "loss": 0.2001, + "step": 6434 + }, + { + "epoch": 36.77142857142857, + "grad_norm": 51.099456787109375, + "learning_rate": 1.46984126984127e-05, + "loss": 0.2481, + "step": 6435 + }, + { + "epoch": 36.777142857142856, + "grad_norm": 59.015628814697266, + "learning_rate": 1.4692063492063492e-05, + "loss": 0.1419, + "step": 6436 + }, + { + "epoch": 36.78285714285714, + "grad_norm": 88.4302749633789, + "learning_rate": 1.4685714285714287e-05, + "loss": 0.3196, + "step": 6437 + }, + { + "epoch": 36.78857142857143, + "grad_norm": 42.81243133544922, + "learning_rate": 1.467936507936508e-05, + "loss": 0.1468, + "step": 6438 + }, + { + "epoch": 36.794285714285714, + "grad_norm": 23.068126678466797, + "learning_rate": 1.4673015873015875e-05, + "loss": 0.19, + "step": 6439 + }, + { + "epoch": 36.8, + "grad_norm": 44.7575798034668, + "learning_rate": 1.4666666666666668e-05, + "loss": 0.207, + "step": 6440 + }, + { + "epoch": 36.80571428571429, + "grad_norm": 35.08080291748047, + "learning_rate": 1.466031746031746e-05, + "loss": 0.1938, + "step": 6441 + }, + { + "epoch": 36.81142857142857, + "grad_norm": 991.8641357421875, + "learning_rate": 1.4653968253968257e-05, + "loss": 0.2564, + "step": 6442 + }, + { + "epoch": 36.817142857142855, + "grad_norm": 35.81486129760742, + "learning_rate": 1.464761904761905e-05, + "loss": 0.2436, + "step": 6443 + }, + { + "epoch": 36.822857142857146, + "grad_norm": 72.99882507324219, + "learning_rate": 1.4641269841269842e-05, + "loss": 0.2167, + "step": 6444 + }, + { + "epoch": 36.82857142857143, + "grad_norm": 70.42816925048828, + "learning_rate": 1.4634920634920635e-05, + "loss": 0.2139, + "step": 6445 + }, + { + "epoch": 36.83428571428571, + "grad_norm": 52.384498596191406, + "learning_rate": 1.462857142857143e-05, + "loss": 0.2429, + "step": 6446 + }, + { + "epoch": 36.84, + "grad_norm": 325.8592224121094, + "learning_rate": 1.4622222222222223e-05, + "loss": 0.2057, + "step": 6447 + }, + { + "epoch": 36.84571428571429, + "grad_norm": 77.25653076171875, + "learning_rate": 1.4615873015873016e-05, + "loss": 0.1883, + "step": 6448 + }, + { + "epoch": 36.85142857142857, + "grad_norm": 25.819324493408203, + "learning_rate": 1.4609523809523808e-05, + "loss": 0.3005, + "step": 6449 + }, + { + "epoch": 36.857142857142854, + "grad_norm": 34.40315246582031, + "learning_rate": 1.4603174603174605e-05, + "loss": 0.1597, + "step": 6450 + }, + { + "epoch": 36.862857142857145, + "grad_norm": 55.17396545410156, + "learning_rate": 1.4596825396825397e-05, + "loss": 0.1773, + "step": 6451 + }, + { + "epoch": 36.86857142857143, + "grad_norm": 50.16841125488281, + "learning_rate": 1.459047619047619e-05, + "loss": 0.1644, + "step": 6452 + }, + { + "epoch": 36.87428571428571, + "grad_norm": 38.792442321777344, + "learning_rate": 1.4584126984126986e-05, + "loss": 0.174, + "step": 6453 + }, + { + "epoch": 36.88, + "grad_norm": 70.29129791259766, + "learning_rate": 1.4577777777777778e-05, + "loss": 0.2756, + "step": 6454 + }, + { + "epoch": 36.885714285714286, + "grad_norm": 31.52997398376465, + "learning_rate": 1.4571428571428573e-05, + "loss": 0.1333, + "step": 6455 + }, + { + "epoch": 36.89142857142857, + "grad_norm": 54.99339294433594, + "learning_rate": 1.4565079365079365e-05, + "loss": 0.1977, + "step": 6456 + }, + { + "epoch": 36.89714285714286, + "grad_norm": 17.953866958618164, + "learning_rate": 1.455873015873016e-05, + "loss": 0.2214, + "step": 6457 + }, + { + "epoch": 36.902857142857144, + "grad_norm": 163.7252197265625, + "learning_rate": 1.4552380952380954e-05, + "loss": 0.2708, + "step": 6458 + }, + { + "epoch": 36.90857142857143, + "grad_norm": 33.827884674072266, + "learning_rate": 1.4546031746031747e-05, + "loss": 0.2193, + "step": 6459 + }, + { + "epoch": 36.91428571428571, + "grad_norm": 60.707435607910156, + "learning_rate": 1.453968253968254e-05, + "loss": 0.1865, + "step": 6460 + }, + { + "epoch": 36.92, + "grad_norm": 55.701087951660156, + "learning_rate": 1.4533333333333335e-05, + "loss": 0.1736, + "step": 6461 + }, + { + "epoch": 36.925714285714285, + "grad_norm": 21.98332977294922, + "learning_rate": 1.4526984126984128e-05, + "loss": 0.1634, + "step": 6462 + }, + { + "epoch": 36.93142857142857, + "grad_norm": 75.26737213134766, + "learning_rate": 1.452063492063492e-05, + "loss": 0.1949, + "step": 6463 + }, + { + "epoch": 36.93714285714286, + "grad_norm": 29.80234718322754, + "learning_rate": 1.4514285714285713e-05, + "loss": 0.1972, + "step": 6464 + }, + { + "epoch": 36.94285714285714, + "grad_norm": 68.53266143798828, + "learning_rate": 1.450793650793651e-05, + "loss": 0.1938, + "step": 6465 + }, + { + "epoch": 36.94857142857143, + "grad_norm": 96.14524841308594, + "learning_rate": 1.4501587301587302e-05, + "loss": 0.1771, + "step": 6466 + }, + { + "epoch": 36.95428571428572, + "grad_norm": 31.954814910888672, + "learning_rate": 1.4495238095238095e-05, + "loss": 0.1835, + "step": 6467 + }, + { + "epoch": 36.96, + "grad_norm": 103.53461456298828, + "learning_rate": 1.448888888888889e-05, + "loss": 0.2957, + "step": 6468 + }, + { + "epoch": 36.965714285714284, + "grad_norm": 154.65966796875, + "learning_rate": 1.4482539682539683e-05, + "loss": 0.2546, + "step": 6469 + }, + { + "epoch": 36.97142857142857, + "grad_norm": 27.442543029785156, + "learning_rate": 1.4476190476190476e-05, + "loss": 0.3516, + "step": 6470 + }, + { + "epoch": 36.97714285714286, + "grad_norm": 21.102581024169922, + "learning_rate": 1.446984126984127e-05, + "loss": 0.1881, + "step": 6471 + }, + { + "epoch": 36.98285714285714, + "grad_norm": 140.93621826171875, + "learning_rate": 1.4463492063492065e-05, + "loss": 0.2462, + "step": 6472 + }, + { + "epoch": 36.988571428571426, + "grad_norm": 20.03640365600586, + "learning_rate": 1.4457142857142857e-05, + "loss": 0.1546, + "step": 6473 + }, + { + "epoch": 36.994285714285716, + "grad_norm": 24.340862274169922, + "learning_rate": 1.4450793650793651e-05, + "loss": 0.1989, + "step": 6474 + }, + { + "epoch": 37.0, + "grad_norm": 33.04745864868164, + "learning_rate": 1.4444444444444444e-05, + "loss": 0.2284, + "step": 6475 + }, + { + "epoch": 37.0, + "eval_classes": 0, + "eval_loss": 0.5924772024154663, + "eval_map": 0.9348, + "eval_map_50": 0.9709, + "eval_map_75": 0.9658, + "eval_map_large": 0.9348, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9348, + "eval_map_small": -1.0, + "eval_mar_1": 0.7867, + "eval_mar_10": 0.9698, + "eval_mar_100": 0.9733, + "eval_mar_100_per_class": 0.9733, + "eval_mar_large": 0.9733, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.3816, + "eval_samples_per_second": 21.97, + "eval_steps_per_second": 2.765, + "step": 6475 + }, + { + "epoch": 37.005714285714284, + "grad_norm": 42.18199920654297, + "learning_rate": 1.443809523809524e-05, + "loss": 0.166, + "step": 6476 + }, + { + "epoch": 37.011428571428574, + "grad_norm": 33.291439056396484, + "learning_rate": 1.4431746031746033e-05, + "loss": 0.1766, + "step": 6477 + }, + { + "epoch": 37.01714285714286, + "grad_norm": 73.50809478759766, + "learning_rate": 1.4425396825396825e-05, + "loss": 0.2126, + "step": 6478 + }, + { + "epoch": 37.02285714285714, + "grad_norm": 102.65695190429688, + "learning_rate": 1.4419047619047621e-05, + "loss": 0.356, + "step": 6479 + }, + { + "epoch": 37.02857142857143, + "grad_norm": 486.31036376953125, + "learning_rate": 1.4412698412698414e-05, + "loss": 0.2182, + "step": 6480 + }, + { + "epoch": 37.034285714285716, + "grad_norm": 32.039756774902344, + "learning_rate": 1.4406349206349207e-05, + "loss": 0.1329, + "step": 6481 + }, + { + "epoch": 37.04, + "grad_norm": 83.30311584472656, + "learning_rate": 1.44e-05, + "loss": 0.1776, + "step": 6482 + }, + { + "epoch": 37.04571428571428, + "grad_norm": 42.36127853393555, + "learning_rate": 1.4393650793650795e-05, + "loss": 0.1668, + "step": 6483 + }, + { + "epoch": 37.05142857142857, + "grad_norm": 57.76752853393555, + "learning_rate": 1.4387301587301588e-05, + "loss": 0.2634, + "step": 6484 + }, + { + "epoch": 37.05714285714286, + "grad_norm": 25.50920295715332, + "learning_rate": 1.438095238095238e-05, + "loss": 0.1957, + "step": 6485 + }, + { + "epoch": 37.06285714285714, + "grad_norm": 34.5374641418457, + "learning_rate": 1.4374603174603173e-05, + "loss": 0.1514, + "step": 6486 + }, + { + "epoch": 37.06857142857143, + "grad_norm": 64.24982452392578, + "learning_rate": 1.436825396825397e-05, + "loss": 0.1747, + "step": 6487 + }, + { + "epoch": 37.074285714285715, + "grad_norm": 33.088592529296875, + "learning_rate": 1.4361904761904762e-05, + "loss": 0.1769, + "step": 6488 + }, + { + "epoch": 37.08, + "grad_norm": 48.71031188964844, + "learning_rate": 1.4355555555555556e-05, + "loss": 0.2206, + "step": 6489 + }, + { + "epoch": 37.08571428571429, + "grad_norm": 46.32160186767578, + "learning_rate": 1.434920634920635e-05, + "loss": 0.1781, + "step": 6490 + }, + { + "epoch": 37.09142857142857, + "grad_norm": 57.94861602783203, + "learning_rate": 1.4342857142857143e-05, + "loss": 0.1981, + "step": 6491 + }, + { + "epoch": 37.097142857142856, + "grad_norm": 33.29441833496094, + "learning_rate": 1.4336507936507938e-05, + "loss": 0.1875, + "step": 6492 + }, + { + "epoch": 37.10285714285714, + "grad_norm": 85.9415512084961, + "learning_rate": 1.433015873015873e-05, + "loss": 0.2358, + "step": 6493 + }, + { + "epoch": 37.10857142857143, + "grad_norm": 30.147438049316406, + "learning_rate": 1.4323809523809526e-05, + "loss": 0.1804, + "step": 6494 + }, + { + "epoch": 37.114285714285714, + "grad_norm": 70.6728744506836, + "learning_rate": 1.4317460317460319e-05, + "loss": 0.1938, + "step": 6495 + }, + { + "epoch": 37.12, + "grad_norm": 45.146568298339844, + "learning_rate": 1.4311111111111111e-05, + "loss": 0.1775, + "step": 6496 + }, + { + "epoch": 37.12571428571429, + "grad_norm": 25.29806137084961, + "learning_rate": 1.4304761904761904e-05, + "loss": 0.1952, + "step": 6497 + }, + { + "epoch": 37.13142857142857, + "grad_norm": 62.408451080322266, + "learning_rate": 1.42984126984127e-05, + "loss": 0.246, + "step": 6498 + }, + { + "epoch": 37.137142857142855, + "grad_norm": 56.218135833740234, + "learning_rate": 1.4292063492063493e-05, + "loss": 0.2028, + "step": 6499 + }, + { + "epoch": 37.142857142857146, + "grad_norm": 30.245328903198242, + "learning_rate": 1.4285714285714285e-05, + "loss": 0.16, + "step": 6500 + }, + { + "epoch": 37.14857142857143, + "grad_norm": 40.37526321411133, + "learning_rate": 1.4279365079365081e-05, + "loss": 0.1785, + "step": 6501 + }, + { + "epoch": 37.15428571428571, + "grad_norm": 28.770414352416992, + "learning_rate": 1.4273015873015874e-05, + "loss": 0.2346, + "step": 6502 + }, + { + "epoch": 37.16, + "grad_norm": 47.9056510925293, + "learning_rate": 1.4266666666666667e-05, + "loss": 0.2143, + "step": 6503 + }, + { + "epoch": 37.16571428571429, + "grad_norm": 35.36516189575195, + "learning_rate": 1.426031746031746e-05, + "loss": 0.2356, + "step": 6504 + }, + { + "epoch": 37.17142857142857, + "grad_norm": 30.85645294189453, + "learning_rate": 1.4253968253968255e-05, + "loss": 0.2061, + "step": 6505 + }, + { + "epoch": 37.177142857142854, + "grad_norm": 21.12093162536621, + "learning_rate": 1.4247619047619048e-05, + "loss": 0.1627, + "step": 6506 + }, + { + "epoch": 37.182857142857145, + "grad_norm": 38.55308532714844, + "learning_rate": 1.424126984126984e-05, + "loss": 0.1425, + "step": 6507 + }, + { + "epoch": 37.18857142857143, + "grad_norm": 22.23093032836914, + "learning_rate": 1.4234920634920635e-05, + "loss": 0.2234, + "step": 6508 + }, + { + "epoch": 37.19428571428571, + "grad_norm": 138.44908142089844, + "learning_rate": 1.422857142857143e-05, + "loss": 0.2175, + "step": 6509 + }, + { + "epoch": 37.2, + "grad_norm": 29.336397171020508, + "learning_rate": 1.4222222222222224e-05, + "loss": 0.2913, + "step": 6510 + }, + { + "epoch": 37.205714285714286, + "grad_norm": 114.45040130615234, + "learning_rate": 1.4215873015873016e-05, + "loss": 0.2199, + "step": 6511 + }, + { + "epoch": 37.21142857142857, + "grad_norm": 35.597721099853516, + "learning_rate": 1.420952380952381e-05, + "loss": 0.2706, + "step": 6512 + }, + { + "epoch": 37.21714285714286, + "grad_norm": 72.16755676269531, + "learning_rate": 1.4203174603174605e-05, + "loss": 0.1653, + "step": 6513 + }, + { + "epoch": 37.222857142857144, + "grad_norm": 27.48973274230957, + "learning_rate": 1.4196825396825398e-05, + "loss": 0.2711, + "step": 6514 + }, + { + "epoch": 37.22857142857143, + "grad_norm": 44.54720687866211, + "learning_rate": 1.419047619047619e-05, + "loss": 0.1249, + "step": 6515 + }, + { + "epoch": 37.23428571428571, + "grad_norm": 48.198299407958984, + "learning_rate": 1.4184126984126986e-05, + "loss": 0.1662, + "step": 6516 + }, + { + "epoch": 37.24, + "grad_norm": 49.05499267578125, + "learning_rate": 1.4177777777777779e-05, + "loss": 0.1691, + "step": 6517 + }, + { + "epoch": 37.245714285714286, + "grad_norm": 128.78123474121094, + "learning_rate": 1.4171428571428572e-05, + "loss": 0.2054, + "step": 6518 + }, + { + "epoch": 37.25142857142857, + "grad_norm": 56.933284759521484, + "learning_rate": 1.4165079365079364e-05, + "loss": 0.2684, + "step": 6519 + }, + { + "epoch": 37.25714285714286, + "grad_norm": 21.544979095458984, + "learning_rate": 1.415873015873016e-05, + "loss": 0.1621, + "step": 6520 + }, + { + "epoch": 37.26285714285714, + "grad_norm": 63.549198150634766, + "learning_rate": 1.4152380952380953e-05, + "loss": 0.1631, + "step": 6521 + }, + { + "epoch": 37.26857142857143, + "grad_norm": 80.13479614257812, + "learning_rate": 1.4146031746031745e-05, + "loss": 0.2853, + "step": 6522 + }, + { + "epoch": 37.27428571428572, + "grad_norm": 28.332656860351562, + "learning_rate": 1.4139682539682541e-05, + "loss": 0.1501, + "step": 6523 + }, + { + "epoch": 37.28, + "grad_norm": 33.4707145690918, + "learning_rate": 1.4133333333333334e-05, + "loss": 0.358, + "step": 6524 + }, + { + "epoch": 37.285714285714285, + "grad_norm": 40.02132797241211, + "learning_rate": 1.4126984126984127e-05, + "loss": 0.2312, + "step": 6525 + }, + { + "epoch": 37.29142857142857, + "grad_norm": 22.119464874267578, + "learning_rate": 1.4120634920634921e-05, + "loss": 0.176, + "step": 6526 + }, + { + "epoch": 37.29714285714286, + "grad_norm": 48.0070686340332, + "learning_rate": 1.4114285714285715e-05, + "loss": 0.2432, + "step": 6527 + }, + { + "epoch": 37.30285714285714, + "grad_norm": 40.382179260253906, + "learning_rate": 1.410793650793651e-05, + "loss": 0.1805, + "step": 6528 + }, + { + "epoch": 37.308571428571426, + "grad_norm": 67.17555236816406, + "learning_rate": 1.4101587301587302e-05, + "loss": 0.2174, + "step": 6529 + }, + { + "epoch": 37.31428571428572, + "grad_norm": 35.86476516723633, + "learning_rate": 1.4095238095238095e-05, + "loss": 0.2377, + "step": 6530 + }, + { + "epoch": 37.32, + "grad_norm": 1549.185791015625, + "learning_rate": 1.4088888888888891e-05, + "loss": 0.1954, + "step": 6531 + }, + { + "epoch": 37.325714285714284, + "grad_norm": 22.005220413208008, + "learning_rate": 1.4082539682539684e-05, + "loss": 0.2031, + "step": 6532 + }, + { + "epoch": 37.331428571428575, + "grad_norm": 84.3892593383789, + "learning_rate": 1.4076190476190476e-05, + "loss": 0.1654, + "step": 6533 + }, + { + "epoch": 37.33714285714286, + "grad_norm": 34.993690490722656, + "learning_rate": 1.4069841269841272e-05, + "loss": 0.1242, + "step": 6534 + }, + { + "epoch": 37.34285714285714, + "grad_norm": 70.59294891357422, + "learning_rate": 1.4063492063492065e-05, + "loss": 0.1917, + "step": 6535 + }, + { + "epoch": 37.348571428571425, + "grad_norm": 85.08328247070312, + "learning_rate": 1.4057142857142858e-05, + "loss": 0.1536, + "step": 6536 + }, + { + "epoch": 37.354285714285716, + "grad_norm": 42.231712341308594, + "learning_rate": 1.405079365079365e-05, + "loss": 0.1806, + "step": 6537 + }, + { + "epoch": 37.36, + "grad_norm": 29.59331512451172, + "learning_rate": 1.4044444444444446e-05, + "loss": 0.271, + "step": 6538 + }, + { + "epoch": 37.36571428571428, + "grad_norm": 62.4417610168457, + "learning_rate": 1.4038095238095239e-05, + "loss": 0.2398, + "step": 6539 + }, + { + "epoch": 37.371428571428574, + "grad_norm": 733.9818725585938, + "learning_rate": 1.4031746031746032e-05, + "loss": 0.1833, + "step": 6540 + }, + { + "epoch": 37.37714285714286, + "grad_norm": 68.64158630371094, + "learning_rate": 1.4025396825396824e-05, + "loss": 0.193, + "step": 6541 + }, + { + "epoch": 37.38285714285714, + "grad_norm": 127.94831085205078, + "learning_rate": 1.401904761904762e-05, + "loss": 0.2419, + "step": 6542 + }, + { + "epoch": 37.38857142857143, + "grad_norm": 79.00579071044922, + "learning_rate": 1.4012698412698413e-05, + "loss": 0.218, + "step": 6543 + }, + { + "epoch": 37.394285714285715, + "grad_norm": 45.025455474853516, + "learning_rate": 1.4006349206349207e-05, + "loss": 0.2585, + "step": 6544 + }, + { + "epoch": 37.4, + "grad_norm": 48.02553939819336, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.2066, + "step": 6545 + }, + { + "epoch": 37.40571428571428, + "grad_norm": 80.67889404296875, + "learning_rate": 1.3993650793650794e-05, + "loss": 0.1473, + "step": 6546 + }, + { + "epoch": 37.41142857142857, + "grad_norm": 680.8949584960938, + "learning_rate": 1.3987301587301588e-05, + "loss": 0.2644, + "step": 6547 + }, + { + "epoch": 37.417142857142856, + "grad_norm": 35.802101135253906, + "learning_rate": 1.3980952380952381e-05, + "loss": 0.1428, + "step": 6548 + }, + { + "epoch": 37.42285714285714, + "grad_norm": 28.695444107055664, + "learning_rate": 1.3974603174603177e-05, + "loss": 0.2259, + "step": 6549 + }, + { + "epoch": 37.42857142857143, + "grad_norm": 43.1397705078125, + "learning_rate": 1.396825396825397e-05, + "loss": 0.3066, + "step": 6550 + }, + { + "epoch": 37.434285714285714, + "grad_norm": 45.93376922607422, + "learning_rate": 1.3961904761904762e-05, + "loss": 0.1508, + "step": 6551 + }, + { + "epoch": 37.44, + "grad_norm": 67.8795394897461, + "learning_rate": 1.3955555555555555e-05, + "loss": 0.1917, + "step": 6552 + }, + { + "epoch": 37.44571428571429, + "grad_norm": 22.79017448425293, + "learning_rate": 1.3949206349206351e-05, + "loss": 0.1577, + "step": 6553 + }, + { + "epoch": 37.45142857142857, + "grad_norm": 328.4523620605469, + "learning_rate": 1.3942857142857144e-05, + "loss": 0.2288, + "step": 6554 + }, + { + "epoch": 37.457142857142856, + "grad_norm": 57.735652923583984, + "learning_rate": 1.3936507936507936e-05, + "loss": 0.1693, + "step": 6555 + }, + { + "epoch": 37.462857142857146, + "grad_norm": 46.90785217285156, + "learning_rate": 1.3930158730158732e-05, + "loss": 0.1789, + "step": 6556 + }, + { + "epoch": 37.46857142857143, + "grad_norm": 28.03649139404297, + "learning_rate": 1.3923809523809525e-05, + "loss": 0.2141, + "step": 6557 + }, + { + "epoch": 37.47428571428571, + "grad_norm": 76.3325424194336, + "learning_rate": 1.3917460317460318e-05, + "loss": 0.1989, + "step": 6558 + }, + { + "epoch": 37.48, + "grad_norm": 40.22255325317383, + "learning_rate": 1.391111111111111e-05, + "loss": 0.2684, + "step": 6559 + }, + { + "epoch": 37.48571428571429, + "grad_norm": 47.31599044799805, + "learning_rate": 1.3904761904761906e-05, + "loss": 0.2641, + "step": 6560 + }, + { + "epoch": 37.49142857142857, + "grad_norm": 96.81246185302734, + "learning_rate": 1.3898412698412699e-05, + "loss": 0.2587, + "step": 6561 + }, + { + "epoch": 37.497142857142855, + "grad_norm": 62.16279983520508, + "learning_rate": 1.3892063492063492e-05, + "loss": 0.2024, + "step": 6562 + }, + { + "epoch": 37.502857142857145, + "grad_norm": 62.037052154541016, + "learning_rate": 1.3885714285714286e-05, + "loss": 0.1812, + "step": 6563 + }, + { + "epoch": 37.50857142857143, + "grad_norm": 25.21396255493164, + "learning_rate": 1.387936507936508e-05, + "loss": 0.2129, + "step": 6564 + }, + { + "epoch": 37.51428571428571, + "grad_norm": 77.27635192871094, + "learning_rate": 1.3873015873015875e-05, + "loss": 0.2085, + "step": 6565 + }, + { + "epoch": 37.52, + "grad_norm": 54.68073272705078, + "learning_rate": 1.3866666666666667e-05, + "loss": 0.2428, + "step": 6566 + }, + { + "epoch": 37.52571428571429, + "grad_norm": 62.48573303222656, + "learning_rate": 1.3860317460317463e-05, + "loss": 0.194, + "step": 6567 + }, + { + "epoch": 37.53142857142857, + "grad_norm": 17.646411895751953, + "learning_rate": 1.3853968253968256e-05, + "loss": 0.1942, + "step": 6568 + }, + { + "epoch": 37.537142857142854, + "grad_norm": 18.57720184326172, + "learning_rate": 1.3847619047619048e-05, + "loss": 0.1564, + "step": 6569 + }, + { + "epoch": 37.542857142857144, + "grad_norm": 48.49040985107422, + "learning_rate": 1.3841269841269841e-05, + "loss": 0.1695, + "step": 6570 + }, + { + "epoch": 37.54857142857143, + "grad_norm": 33.05989456176758, + "learning_rate": 1.3834920634920637e-05, + "loss": 0.2705, + "step": 6571 + }, + { + "epoch": 37.55428571428571, + "grad_norm": 85.05858612060547, + "learning_rate": 1.382857142857143e-05, + "loss": 0.1861, + "step": 6572 + }, + { + "epoch": 37.56, + "grad_norm": 39.465545654296875, + "learning_rate": 1.3822222222222222e-05, + "loss": 0.2664, + "step": 6573 + }, + { + "epoch": 37.565714285714286, + "grad_norm": 26.797693252563477, + "learning_rate": 1.3815873015873015e-05, + "loss": 0.1232, + "step": 6574 + }, + { + "epoch": 37.57142857142857, + "grad_norm": 48.3971061706543, + "learning_rate": 1.3809523809523811e-05, + "loss": 0.2674, + "step": 6575 + }, + { + "epoch": 37.57714285714286, + "grad_norm": 72.57957458496094, + "learning_rate": 1.3803174603174604e-05, + "loss": 0.2684, + "step": 6576 + }, + { + "epoch": 37.582857142857144, + "grad_norm": 61.59852981567383, + "learning_rate": 1.3796825396825396e-05, + "loss": 0.2086, + "step": 6577 + }, + { + "epoch": 37.58857142857143, + "grad_norm": 57.468589782714844, + "learning_rate": 1.3790476190476189e-05, + "loss": 0.2249, + "step": 6578 + }, + { + "epoch": 37.59428571428572, + "grad_norm": 127.7385482788086, + "learning_rate": 1.3784126984126985e-05, + "loss": 0.2528, + "step": 6579 + }, + { + "epoch": 37.6, + "grad_norm": 43.52939224243164, + "learning_rate": 1.3777777777777778e-05, + "loss": 0.2149, + "step": 6580 + }, + { + "epoch": 37.605714285714285, + "grad_norm": 73.5933609008789, + "learning_rate": 1.3771428571428572e-05, + "loss": 0.2148, + "step": 6581 + }, + { + "epoch": 37.61142857142857, + "grad_norm": 32.99773025512695, + "learning_rate": 1.3765079365079366e-05, + "loss": 0.2071, + "step": 6582 + }, + { + "epoch": 37.61714285714286, + "grad_norm": 39.80093002319336, + "learning_rate": 1.375873015873016e-05, + "loss": 0.239, + "step": 6583 + }, + { + "epoch": 37.62285714285714, + "grad_norm": 23.65448570251465, + "learning_rate": 1.3752380952380953e-05, + "loss": 0.1962, + "step": 6584 + }, + { + "epoch": 37.628571428571426, + "grad_norm": 63.404449462890625, + "learning_rate": 1.3746031746031746e-05, + "loss": 0.2124, + "step": 6585 + }, + { + "epoch": 37.63428571428572, + "grad_norm": 40.21116256713867, + "learning_rate": 1.3739682539682542e-05, + "loss": 0.1729, + "step": 6586 + }, + { + "epoch": 37.64, + "grad_norm": 116.45388793945312, + "learning_rate": 1.3733333333333335e-05, + "loss": 0.3224, + "step": 6587 + }, + { + "epoch": 37.645714285714284, + "grad_norm": 38.228187561035156, + "learning_rate": 1.3726984126984127e-05, + "loss": 0.2296, + "step": 6588 + }, + { + "epoch": 37.651428571428575, + "grad_norm": 26.770566940307617, + "learning_rate": 1.372063492063492e-05, + "loss": 0.2197, + "step": 6589 + }, + { + "epoch": 37.65714285714286, + "grad_norm": 71.01809692382812, + "learning_rate": 1.3714285714285716e-05, + "loss": 0.114, + "step": 6590 + }, + { + "epoch": 37.66285714285714, + "grad_norm": 61.19960021972656, + "learning_rate": 1.3707936507936508e-05, + "loss": 0.2393, + "step": 6591 + }, + { + "epoch": 37.668571428571425, + "grad_norm": 69.12433624267578, + "learning_rate": 1.3701587301587301e-05, + "loss": 0.2768, + "step": 6592 + }, + { + "epoch": 37.674285714285716, + "grad_norm": 28.768465042114258, + "learning_rate": 1.3695238095238097e-05, + "loss": 0.1579, + "step": 6593 + }, + { + "epoch": 37.68, + "grad_norm": 71.30499267578125, + "learning_rate": 1.368888888888889e-05, + "loss": 0.2484, + "step": 6594 + }, + { + "epoch": 37.68571428571428, + "grad_norm": 37.34684371948242, + "learning_rate": 1.3682539682539682e-05, + "loss": 0.1406, + "step": 6595 + }, + { + "epoch": 37.691428571428574, + "grad_norm": 36.19123077392578, + "learning_rate": 1.3676190476190475e-05, + "loss": 0.1883, + "step": 6596 + }, + { + "epoch": 37.69714285714286, + "grad_norm": 31.692096710205078, + "learning_rate": 1.3669841269841271e-05, + "loss": 0.1799, + "step": 6597 + }, + { + "epoch": 37.70285714285714, + "grad_norm": 23.94115447998047, + "learning_rate": 1.3663492063492064e-05, + "loss": 0.2304, + "step": 6598 + }, + { + "epoch": 37.70857142857143, + "grad_norm": 67.50762939453125, + "learning_rate": 1.3657142857142858e-05, + "loss": 0.2755, + "step": 6599 + }, + { + "epoch": 37.714285714285715, + "grad_norm": 62.673072814941406, + "learning_rate": 1.365079365079365e-05, + "loss": 0.1291, + "step": 6600 + }, + { + "epoch": 37.72, + "grad_norm": 25.134071350097656, + "learning_rate": 1.3644444444444445e-05, + "loss": 0.2184, + "step": 6601 + }, + { + "epoch": 37.72571428571428, + "grad_norm": 48.198097229003906, + "learning_rate": 1.363809523809524e-05, + "loss": 0.1866, + "step": 6602 + }, + { + "epoch": 37.73142857142857, + "grad_norm": 68.64222717285156, + "learning_rate": 1.3631746031746032e-05, + "loss": 0.2145, + "step": 6603 + }, + { + "epoch": 37.73714285714286, + "grad_norm": 51.29213333129883, + "learning_rate": 1.3625396825396828e-05, + "loss": 0.1511, + "step": 6604 + }, + { + "epoch": 37.74285714285714, + "grad_norm": 37.644134521484375, + "learning_rate": 1.361904761904762e-05, + "loss": 0.3308, + "step": 6605 + }, + { + "epoch": 37.74857142857143, + "grad_norm": 25.032150268554688, + "learning_rate": 1.3612698412698413e-05, + "loss": 0.2497, + "step": 6606 + }, + { + "epoch": 37.754285714285714, + "grad_norm": 88.25428009033203, + "learning_rate": 1.3606349206349206e-05, + "loss": 0.1934, + "step": 6607 + }, + { + "epoch": 37.76, + "grad_norm": 30.80008316040039, + "learning_rate": 1.3600000000000002e-05, + "loss": 0.143, + "step": 6608 + }, + { + "epoch": 37.76571428571429, + "grad_norm": 51.51103973388672, + "learning_rate": 1.3593650793650795e-05, + "loss": 0.1618, + "step": 6609 + }, + { + "epoch": 37.77142857142857, + "grad_norm": 38.2564811706543, + "learning_rate": 1.3587301587301587e-05, + "loss": 0.2607, + "step": 6610 + }, + { + "epoch": 37.777142857142856, + "grad_norm": 51.70465087890625, + "learning_rate": 1.358095238095238e-05, + "loss": 0.2431, + "step": 6611 + }, + { + "epoch": 37.78285714285714, + "grad_norm": 39.037296295166016, + "learning_rate": 1.3574603174603176e-05, + "loss": 0.1339, + "step": 6612 + }, + { + "epoch": 37.78857142857143, + "grad_norm": 143.49896240234375, + "learning_rate": 1.3568253968253968e-05, + "loss": 0.1926, + "step": 6613 + }, + { + "epoch": 37.794285714285714, + "grad_norm": 82.4721450805664, + "learning_rate": 1.3561904761904761e-05, + "loss": 0.1531, + "step": 6614 + }, + { + "epoch": 37.8, + "grad_norm": 30.966434478759766, + "learning_rate": 1.3555555555555557e-05, + "loss": 0.1682, + "step": 6615 + }, + { + "epoch": 37.80571428571429, + "grad_norm": 133.61900329589844, + "learning_rate": 1.354920634920635e-05, + "loss": 0.1517, + "step": 6616 + }, + { + "epoch": 37.81142857142857, + "grad_norm": 77.1509017944336, + "learning_rate": 1.3542857142857142e-05, + "loss": 0.2169, + "step": 6617 + }, + { + "epoch": 37.817142857142855, + "grad_norm": 51.897560119628906, + "learning_rate": 1.3536507936507937e-05, + "loss": 0.1983, + "step": 6618 + }, + { + "epoch": 37.822857142857146, + "grad_norm": 38.72932434082031, + "learning_rate": 1.3530158730158731e-05, + "loss": 0.2047, + "step": 6619 + }, + { + "epoch": 37.82857142857143, + "grad_norm": 55.455074310302734, + "learning_rate": 1.3523809523809525e-05, + "loss": 0.1845, + "step": 6620 + }, + { + "epoch": 37.83428571428571, + "grad_norm": 52.98893356323242, + "learning_rate": 1.3517460317460318e-05, + "loss": 0.1929, + "step": 6621 + }, + { + "epoch": 37.84, + "grad_norm": 89.14239501953125, + "learning_rate": 1.351111111111111e-05, + "loss": 0.2095, + "step": 6622 + }, + { + "epoch": 37.84571428571429, + "grad_norm": 28.368410110473633, + "learning_rate": 1.3504761904761907e-05, + "loss": 0.126, + "step": 6623 + }, + { + "epoch": 37.85142857142857, + "grad_norm": 34.06582260131836, + "learning_rate": 1.34984126984127e-05, + "loss": 0.1847, + "step": 6624 + }, + { + "epoch": 37.857142857142854, + "grad_norm": 71.48139190673828, + "learning_rate": 1.3492063492063492e-05, + "loss": 0.1883, + "step": 6625 + }, + { + "epoch": 37.862857142857145, + "grad_norm": 69.56444549560547, + "learning_rate": 1.3485714285714288e-05, + "loss": 0.219, + "step": 6626 + }, + { + "epoch": 37.86857142857143, + "grad_norm": 29.609966278076172, + "learning_rate": 1.347936507936508e-05, + "loss": 0.1876, + "step": 6627 + }, + { + "epoch": 37.87428571428571, + "grad_norm": 18.406742095947266, + "learning_rate": 1.3473015873015873e-05, + "loss": 0.1945, + "step": 6628 + }, + { + "epoch": 37.88, + "grad_norm": 46.326324462890625, + "learning_rate": 1.3466666666666666e-05, + "loss": 0.2001, + "step": 6629 + }, + { + "epoch": 37.885714285714286, + "grad_norm": 54.06023406982422, + "learning_rate": 1.3460317460317462e-05, + "loss": 0.2189, + "step": 6630 + }, + { + "epoch": 37.89142857142857, + "grad_norm": 17.997806549072266, + "learning_rate": 1.3453968253968255e-05, + "loss": 0.2359, + "step": 6631 + }, + { + "epoch": 37.89714285714286, + "grad_norm": 32.82901382446289, + "learning_rate": 1.3447619047619047e-05, + "loss": 0.1972, + "step": 6632 + }, + { + "epoch": 37.902857142857144, + "grad_norm": 86.90510559082031, + "learning_rate": 1.344126984126984e-05, + "loss": 0.2213, + "step": 6633 + }, + { + "epoch": 37.90857142857143, + "grad_norm": 28.77446937561035, + "learning_rate": 1.3434920634920636e-05, + "loss": 0.1452, + "step": 6634 + }, + { + "epoch": 37.91428571428571, + "grad_norm": 41.217227935791016, + "learning_rate": 1.3428571428571429e-05, + "loss": 0.1812, + "step": 6635 + }, + { + "epoch": 37.92, + "grad_norm": 85.83415222167969, + "learning_rate": 1.3422222222222223e-05, + "loss": 0.2149, + "step": 6636 + }, + { + "epoch": 37.925714285714285, + "grad_norm": 32.781455993652344, + "learning_rate": 1.3415873015873017e-05, + "loss": 0.1277, + "step": 6637 + }, + { + "epoch": 37.93142857142857, + "grad_norm": 33.95721435546875, + "learning_rate": 1.3409523809523811e-05, + "loss": 0.1987, + "step": 6638 + }, + { + "epoch": 37.93714285714286, + "grad_norm": 277.95037841796875, + "learning_rate": 1.3403174603174604e-05, + "loss": 0.2591, + "step": 6639 + }, + { + "epoch": 37.94285714285714, + "grad_norm": 126.31138610839844, + "learning_rate": 1.3396825396825397e-05, + "loss": 0.2452, + "step": 6640 + }, + { + "epoch": 37.94857142857143, + "grad_norm": 55.9384880065918, + "learning_rate": 1.3390476190476193e-05, + "loss": 0.1754, + "step": 6641 + }, + { + "epoch": 37.95428571428572, + "grad_norm": 43.94575881958008, + "learning_rate": 1.3384126984126985e-05, + "loss": 0.4073, + "step": 6642 + }, + { + "epoch": 37.96, + "grad_norm": 36.381553649902344, + "learning_rate": 1.3377777777777778e-05, + "loss": 0.1684, + "step": 6643 + }, + { + "epoch": 37.965714285714284, + "grad_norm": 25.80427360534668, + "learning_rate": 1.337142857142857e-05, + "loss": 0.1604, + "step": 6644 + }, + { + "epoch": 37.97142857142857, + "grad_norm": 40.44108200073242, + "learning_rate": 1.3365079365079367e-05, + "loss": 0.1649, + "step": 6645 + }, + { + "epoch": 37.97714285714286, + "grad_norm": 60.07323455810547, + "learning_rate": 1.335873015873016e-05, + "loss": 0.1774, + "step": 6646 + }, + { + "epoch": 37.98285714285714, + "grad_norm": 39.3565559387207, + "learning_rate": 1.3352380952380952e-05, + "loss": 0.2106, + "step": 6647 + }, + { + "epoch": 37.988571428571426, + "grad_norm": 23.168386459350586, + "learning_rate": 1.3346031746031748e-05, + "loss": 0.1758, + "step": 6648 + }, + { + "epoch": 37.994285714285716, + "grad_norm": 28.189754486083984, + "learning_rate": 1.333968253968254e-05, + "loss": 0.1882, + "step": 6649 + }, + { + "epoch": 38.0, + "grad_norm": 62.98371124267578, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.254, + "step": 6650 + }, + { + "epoch": 38.0, + "eval_classes": 0, + "eval_loss": 0.5931137800216675, + "eval_map": 0.9322, + "eval_map_50": 0.9651, + "eval_map_75": 0.9605, + "eval_map_large": 0.9322, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9322, + "eval_map_small": -1.0, + "eval_mar_1": 0.7854, + "eval_mar_10": 0.9737, + "eval_mar_100": 0.9746, + "eval_mar_100_per_class": 0.9746, + "eval_mar_large": 0.9746, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 12.7097, + "eval_samples_per_second": 23.132, + "eval_steps_per_second": 2.911, + "step": 6650 + }, + { + "epoch": 38.005714285714284, + "grad_norm": 65.34095764160156, + "learning_rate": 1.3326984126984126e-05, + "loss": 0.2391, + "step": 6651 + }, + { + "epoch": 38.011428571428574, + "grad_norm": 44.28903579711914, + "learning_rate": 1.3320634920634922e-05, + "loss": 0.1608, + "step": 6652 + }, + { + "epoch": 38.01714285714286, + "grad_norm": 39.059871673583984, + "learning_rate": 1.3314285714285715e-05, + "loss": 0.1272, + "step": 6653 + }, + { + "epoch": 38.02285714285714, + "grad_norm": 99.69107818603516, + "learning_rate": 1.3307936507936509e-05, + "loss": 0.2719, + "step": 6654 + }, + { + "epoch": 38.02857142857143, + "grad_norm": 84.99723052978516, + "learning_rate": 1.3301587301587302e-05, + "loss": 0.2298, + "step": 6655 + }, + { + "epoch": 38.034285714285716, + "grad_norm": 50.17849349975586, + "learning_rate": 1.3295238095238096e-05, + "loss": 0.2187, + "step": 6656 + }, + { + "epoch": 38.04, + "grad_norm": 76.07999420166016, + "learning_rate": 1.328888888888889e-05, + "loss": 0.2283, + "step": 6657 + }, + { + "epoch": 38.04571428571428, + "grad_norm": 113.50109100341797, + "learning_rate": 1.3282539682539683e-05, + "loss": 0.1673, + "step": 6658 + }, + { + "epoch": 38.05142857142857, + "grad_norm": 31.31656265258789, + "learning_rate": 1.3276190476190479e-05, + "loss": 0.1588, + "step": 6659 + }, + { + "epoch": 38.05714285714286, + "grad_norm": 74.41105651855469, + "learning_rate": 1.3269841269841272e-05, + "loss": 0.2309, + "step": 6660 + }, + { + "epoch": 38.06285714285714, + "grad_norm": 160.22439575195312, + "learning_rate": 1.3263492063492064e-05, + "loss": 0.1336, + "step": 6661 + }, + { + "epoch": 38.06857142857143, + "grad_norm": 60.0269660949707, + "learning_rate": 1.3257142857142857e-05, + "loss": 0.1297, + "step": 6662 + }, + { + "epoch": 38.074285714285715, + "grad_norm": 58.124393463134766, + "learning_rate": 1.3250793650793653e-05, + "loss": 0.1698, + "step": 6663 + }, + { + "epoch": 38.08, + "grad_norm": 44.31272888183594, + "learning_rate": 1.3244444444444445e-05, + "loss": 0.2337, + "step": 6664 + }, + { + "epoch": 38.08571428571429, + "grad_norm": 34.844215393066406, + "learning_rate": 1.3238095238095238e-05, + "loss": 0.1659, + "step": 6665 + }, + { + "epoch": 38.09142857142857, + "grad_norm": 18.713459014892578, + "learning_rate": 1.323174603174603e-05, + "loss": 0.1737, + "step": 6666 + }, + { + "epoch": 38.097142857142856, + "grad_norm": 49.55125427246094, + "learning_rate": 1.3225396825396827e-05, + "loss": 0.1242, + "step": 6667 + }, + { + "epoch": 38.10285714285714, + "grad_norm": 62.112083435058594, + "learning_rate": 1.321904761904762e-05, + "loss": 0.3271, + "step": 6668 + }, + { + "epoch": 38.10857142857143, + "grad_norm": 24.108840942382812, + "learning_rate": 1.3212698412698412e-05, + "loss": 0.2122, + "step": 6669 + }, + { + "epoch": 38.114285714285714, + "grad_norm": 63.91169738769531, + "learning_rate": 1.3206349206349208e-05, + "loss": 0.1323, + "step": 6670 + }, + { + "epoch": 38.12, + "grad_norm": 52.37907028198242, + "learning_rate": 1.32e-05, + "loss": 0.201, + "step": 6671 + }, + { + "epoch": 38.12571428571429, + "grad_norm": 29.8988037109375, + "learning_rate": 1.3193650793650793e-05, + "loss": 0.2407, + "step": 6672 + }, + { + "epoch": 38.13142857142857, + "grad_norm": 20.84734344482422, + "learning_rate": 1.3187301587301588e-05, + "loss": 0.1484, + "step": 6673 + }, + { + "epoch": 38.137142857142855, + "grad_norm": 45.14535903930664, + "learning_rate": 1.3180952380952382e-05, + "loss": 0.2555, + "step": 6674 + }, + { + "epoch": 38.142857142857146, + "grad_norm": 1290.4554443359375, + "learning_rate": 1.3174603174603176e-05, + "loss": 0.192, + "step": 6675 + }, + { + "epoch": 38.14857142857143, + "grad_norm": 26.882118225097656, + "learning_rate": 1.3168253968253969e-05, + "loss": 0.1516, + "step": 6676 + }, + { + "epoch": 38.15428571428571, + "grad_norm": 56.91200637817383, + "learning_rate": 1.3161904761904762e-05, + "loss": 0.27, + "step": 6677 + }, + { + "epoch": 38.16, + "grad_norm": 47.5983772277832, + "learning_rate": 1.3155555555555558e-05, + "loss": 0.1391, + "step": 6678 + }, + { + "epoch": 38.16571428571429, + "grad_norm": 13.200896263122559, + "learning_rate": 1.314920634920635e-05, + "loss": 0.1617, + "step": 6679 + }, + { + "epoch": 38.17142857142857, + "grad_norm": 67.04146575927734, + "learning_rate": 1.3142857142857143e-05, + "loss": 0.2372, + "step": 6680 + }, + { + "epoch": 38.177142857142854, + "grad_norm": 38.76677322387695, + "learning_rate": 1.3136507936507936e-05, + "loss": 0.1755, + "step": 6681 + }, + { + "epoch": 38.182857142857145, + "grad_norm": 36.118228912353516, + "learning_rate": 1.3130158730158732e-05, + "loss": 0.1801, + "step": 6682 + }, + { + "epoch": 38.18857142857143, + "grad_norm": 61.32090377807617, + "learning_rate": 1.3123809523809524e-05, + "loss": 0.2072, + "step": 6683 + }, + { + "epoch": 38.19428571428571, + "grad_norm": 82.47146606445312, + "learning_rate": 1.3117460317460317e-05, + "loss": 0.2513, + "step": 6684 + }, + { + "epoch": 38.2, + "grad_norm": 40.535369873046875, + "learning_rate": 1.3111111111111113e-05, + "loss": 0.2075, + "step": 6685 + }, + { + "epoch": 38.205714285714286, + "grad_norm": 19.5318546295166, + "learning_rate": 1.3104761904761905e-05, + "loss": 0.1199, + "step": 6686 + }, + { + "epoch": 38.21142857142857, + "grad_norm": 61.96049499511719, + "learning_rate": 1.3098412698412698e-05, + "loss": 0.1723, + "step": 6687 + }, + { + "epoch": 38.21714285714286, + "grad_norm": 55.3178596496582, + "learning_rate": 1.3092063492063492e-05, + "loss": 0.1558, + "step": 6688 + }, + { + "epoch": 38.222857142857144, + "grad_norm": 52.19147491455078, + "learning_rate": 1.3085714285714287e-05, + "loss": 0.2442, + "step": 6689 + }, + { + "epoch": 38.22857142857143, + "grad_norm": 58.848388671875, + "learning_rate": 1.307936507936508e-05, + "loss": 0.2005, + "step": 6690 + }, + { + "epoch": 38.23428571428571, + "grad_norm": 28.111515045166016, + "learning_rate": 1.3073015873015874e-05, + "loss": 0.1551, + "step": 6691 + }, + { + "epoch": 38.24, + "grad_norm": 38.20722579956055, + "learning_rate": 1.3066666666666666e-05, + "loss": 0.1225, + "step": 6692 + }, + { + "epoch": 38.245714285714286, + "grad_norm": 16.029094696044922, + "learning_rate": 1.3060317460317462e-05, + "loss": 0.1604, + "step": 6693 + }, + { + "epoch": 38.25142857142857, + "grad_norm": 17.04863739013672, + "learning_rate": 1.3053968253968255e-05, + "loss": 0.2181, + "step": 6694 + }, + { + "epoch": 38.25714285714286, + "grad_norm": 54.93702697753906, + "learning_rate": 1.3047619047619048e-05, + "loss": 0.1461, + "step": 6695 + }, + { + "epoch": 38.26285714285714, + "grad_norm": 36.58909606933594, + "learning_rate": 1.3041269841269844e-05, + "loss": 0.134, + "step": 6696 + }, + { + "epoch": 38.26857142857143, + "grad_norm": 50.77967834472656, + "learning_rate": 1.3034920634920636e-05, + "loss": 0.2437, + "step": 6697 + }, + { + "epoch": 38.27428571428572, + "grad_norm": 41.20557403564453, + "learning_rate": 1.3028571428571429e-05, + "loss": 0.1681, + "step": 6698 + }, + { + "epoch": 38.28, + "grad_norm": 56.51957702636719, + "learning_rate": 1.3022222222222222e-05, + "loss": 0.1245, + "step": 6699 + }, + { + "epoch": 38.285714285714285, + "grad_norm": 95.08709716796875, + "learning_rate": 1.3015873015873018e-05, + "loss": 0.1658, + "step": 6700 + }, + { + "epoch": 38.29142857142857, + "grad_norm": 18.885896682739258, + "learning_rate": 1.300952380952381e-05, + "loss": 0.1287, + "step": 6701 + }, + { + "epoch": 38.29714285714286, + "grad_norm": 27.77571678161621, + "learning_rate": 1.3003174603174603e-05, + "loss": 0.1597, + "step": 6702 + }, + { + "epoch": 38.30285714285714, + "grad_norm": 59.84832763671875, + "learning_rate": 1.2996825396825396e-05, + "loss": 0.1875, + "step": 6703 + }, + { + "epoch": 38.308571428571426, + "grad_norm": 79.235107421875, + "learning_rate": 1.2990476190476192e-05, + "loss": 0.2608, + "step": 6704 + }, + { + "epoch": 38.31428571428572, + "grad_norm": 40.934234619140625, + "learning_rate": 1.2984126984126984e-05, + "loss": 0.1936, + "step": 6705 + }, + { + "epoch": 38.32, + "grad_norm": 45.221771240234375, + "learning_rate": 1.2977777777777777e-05, + "loss": 0.2032, + "step": 6706 + }, + { + "epoch": 38.325714285714284, + "grad_norm": 30.7657527923584, + "learning_rate": 1.2971428571428573e-05, + "loss": 0.1654, + "step": 6707 + }, + { + "epoch": 38.331428571428575, + "grad_norm": 84.09855651855469, + "learning_rate": 1.2965079365079365e-05, + "loss": 0.2027, + "step": 6708 + }, + { + "epoch": 38.33714285714286, + "grad_norm": 34.646461486816406, + "learning_rate": 1.295873015873016e-05, + "loss": 0.1374, + "step": 6709 + }, + { + "epoch": 38.34285714285714, + "grad_norm": 36.283782958984375, + "learning_rate": 1.2952380952380952e-05, + "loss": 0.2018, + "step": 6710 + }, + { + "epoch": 38.348571428571425, + "grad_norm": 33.82155227661133, + "learning_rate": 1.2946031746031748e-05, + "loss": 0.1951, + "step": 6711 + }, + { + "epoch": 38.354285714285716, + "grad_norm": 20.42057991027832, + "learning_rate": 1.2939682539682541e-05, + "loss": 0.1787, + "step": 6712 + }, + { + "epoch": 38.36, + "grad_norm": 62.836246490478516, + "learning_rate": 1.2933333333333334e-05, + "loss": 0.2216, + "step": 6713 + }, + { + "epoch": 38.36571428571428, + "grad_norm": 44.87102127075195, + "learning_rate": 1.2926984126984126e-05, + "loss": 0.1178, + "step": 6714 + }, + { + "epoch": 38.371428571428574, + "grad_norm": 55.18440628051758, + "learning_rate": 1.2920634920634922e-05, + "loss": 0.1506, + "step": 6715 + }, + { + "epoch": 38.37714285714286, + "grad_norm": 38.266483306884766, + "learning_rate": 1.2914285714285715e-05, + "loss": 0.1882, + "step": 6716 + }, + { + "epoch": 38.38285714285714, + "grad_norm": 77.53422546386719, + "learning_rate": 1.2907936507936508e-05, + "loss": 0.2116, + "step": 6717 + }, + { + "epoch": 38.38857142857143, + "grad_norm": 128.25515747070312, + "learning_rate": 1.2901587301587304e-05, + "loss": 0.268, + "step": 6718 + }, + { + "epoch": 38.394285714285715, + "grad_norm": 21.647573471069336, + "learning_rate": 1.2895238095238096e-05, + "loss": 0.1649, + "step": 6719 + }, + { + "epoch": 38.4, + "grad_norm": 27.34562110900879, + "learning_rate": 1.2888888888888889e-05, + "loss": 0.214, + "step": 6720 + }, + { + "epoch": 38.40571428571428, + "grad_norm": 221.07102966308594, + "learning_rate": 1.2882539682539682e-05, + "loss": 0.1625, + "step": 6721 + }, + { + "epoch": 38.41142857142857, + "grad_norm": 53.76383972167969, + "learning_rate": 1.2876190476190478e-05, + "loss": 0.1781, + "step": 6722 + }, + { + "epoch": 38.417142857142856, + "grad_norm": 55.51323318481445, + "learning_rate": 1.286984126984127e-05, + "loss": 0.1353, + "step": 6723 + }, + { + "epoch": 38.42285714285714, + "grad_norm": 192.99331665039062, + "learning_rate": 1.2863492063492063e-05, + "loss": 0.1696, + "step": 6724 + }, + { + "epoch": 38.42857142857143, + "grad_norm": 38.05833053588867, + "learning_rate": 1.2857142857142857e-05, + "loss": 0.2002, + "step": 6725 + }, + { + "epoch": 38.434285714285714, + "grad_norm": 24.698301315307617, + "learning_rate": 1.2850793650793652e-05, + "loss": 0.1436, + "step": 6726 + }, + { + "epoch": 38.44, + "grad_norm": 27.2955322265625, + "learning_rate": 1.2844444444444446e-05, + "loss": 0.1325, + "step": 6727 + }, + { + "epoch": 38.44571428571429, + "grad_norm": 29.796966552734375, + "learning_rate": 1.2838095238095239e-05, + "loss": 0.1694, + "step": 6728 + }, + { + "epoch": 38.45142857142857, + "grad_norm": 46.824832916259766, + "learning_rate": 1.2831746031746033e-05, + "loss": 0.1103, + "step": 6729 + }, + { + "epoch": 38.457142857142856, + "grad_norm": 31.10439109802246, + "learning_rate": 1.2825396825396827e-05, + "loss": 0.2151, + "step": 6730 + }, + { + "epoch": 38.462857142857146, + "grad_norm": 60.536861419677734, + "learning_rate": 1.281904761904762e-05, + "loss": 0.1598, + "step": 6731 + }, + { + "epoch": 38.46857142857143, + "grad_norm": 42.14023208618164, + "learning_rate": 1.2812698412698412e-05, + "loss": 0.185, + "step": 6732 + }, + { + "epoch": 38.47428571428571, + "grad_norm": 44.388004302978516, + "learning_rate": 1.2806349206349208e-05, + "loss": 0.1841, + "step": 6733 + }, + { + "epoch": 38.48, + "grad_norm": 50.36425018310547, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.1711, + "step": 6734 + }, + { + "epoch": 38.48571428571429, + "grad_norm": 33.617488861083984, + "learning_rate": 1.2793650793650794e-05, + "loss": 0.2432, + "step": 6735 + }, + { + "epoch": 38.49142857142857, + "grad_norm": 72.66381072998047, + "learning_rate": 1.2787301587301586e-05, + "loss": 0.2402, + "step": 6736 + }, + { + "epoch": 38.497142857142855, + "grad_norm": 35.83653259277344, + "learning_rate": 1.2780952380952382e-05, + "loss": 0.14, + "step": 6737 + }, + { + "epoch": 38.502857142857145, + "grad_norm": 53.331642150878906, + "learning_rate": 1.2774603174603175e-05, + "loss": 0.1875, + "step": 6738 + }, + { + "epoch": 38.50857142857143, + "grad_norm": 30.422245025634766, + "learning_rate": 1.2768253968253968e-05, + "loss": 0.2175, + "step": 6739 + }, + { + "epoch": 38.51428571428571, + "grad_norm": 57.215858459472656, + "learning_rate": 1.2761904761904764e-05, + "loss": 0.2662, + "step": 6740 + }, + { + "epoch": 38.52, + "grad_norm": 164.78506469726562, + "learning_rate": 1.2755555555555556e-05, + "loss": 0.2561, + "step": 6741 + }, + { + "epoch": 38.52571428571429, + "grad_norm": 70.9798812866211, + "learning_rate": 1.2749206349206349e-05, + "loss": 0.2059, + "step": 6742 + }, + { + "epoch": 38.53142857142857, + "grad_norm": 19.46929359436035, + "learning_rate": 1.2742857142857143e-05, + "loss": 0.1481, + "step": 6743 + }, + { + "epoch": 38.537142857142854, + "grad_norm": 41.63719177246094, + "learning_rate": 1.2736507936507938e-05, + "loss": 0.1382, + "step": 6744 + }, + { + "epoch": 38.542857142857144, + "grad_norm": 70.39461517333984, + "learning_rate": 1.273015873015873e-05, + "loss": 0.1886, + "step": 6745 + }, + { + "epoch": 38.54857142857143, + "grad_norm": 406.7051696777344, + "learning_rate": 1.2723809523809525e-05, + "loss": 0.2394, + "step": 6746 + }, + { + "epoch": 38.55428571428571, + "grad_norm": 33.21284484863281, + "learning_rate": 1.2717460317460317e-05, + "loss": 0.1635, + "step": 6747 + }, + { + "epoch": 38.56, + "grad_norm": 37.28272247314453, + "learning_rate": 1.2711111111111113e-05, + "loss": 0.2497, + "step": 6748 + }, + { + "epoch": 38.565714285714286, + "grad_norm": 58.15789031982422, + "learning_rate": 1.2704761904761906e-05, + "loss": 0.1683, + "step": 6749 + }, + { + "epoch": 38.57142857142857, + "grad_norm": 55.796722412109375, + "learning_rate": 1.2698412698412699e-05, + "loss": 0.1919, + "step": 6750 + }, + { + "epoch": 38.57714285714286, + "grad_norm": 32.311729431152344, + "learning_rate": 1.2692063492063495e-05, + "loss": 0.1835, + "step": 6751 + }, + { + "epoch": 38.582857142857144, + "grad_norm": 35.519737243652344, + "learning_rate": 1.2685714285714287e-05, + "loss": 0.193, + "step": 6752 + }, + { + "epoch": 38.58857142857143, + "grad_norm": 42.47575378417969, + "learning_rate": 1.267936507936508e-05, + "loss": 0.2063, + "step": 6753 + }, + { + "epoch": 38.59428571428572, + "grad_norm": 52.6748161315918, + "learning_rate": 1.2673015873015872e-05, + "loss": 0.2368, + "step": 6754 + }, + { + "epoch": 38.6, + "grad_norm": 32.23313522338867, + "learning_rate": 1.2666666666666668e-05, + "loss": 0.2966, + "step": 6755 + }, + { + "epoch": 38.605714285714285, + "grad_norm": 71.69877624511719, + "learning_rate": 1.2660317460317461e-05, + "loss": 0.283, + "step": 6756 + }, + { + "epoch": 38.61142857142857, + "grad_norm": 29.855546951293945, + "learning_rate": 1.2653968253968254e-05, + "loss": 0.1624, + "step": 6757 + }, + { + "epoch": 38.61714285714286, + "grad_norm": 28.4865665435791, + "learning_rate": 1.2647619047619046e-05, + "loss": 0.1689, + "step": 6758 + }, + { + "epoch": 38.62285714285714, + "grad_norm": 82.87152099609375, + "learning_rate": 1.2641269841269842e-05, + "loss": 0.1722, + "step": 6759 + }, + { + "epoch": 38.628571428571426, + "grad_norm": 35.51962661743164, + "learning_rate": 1.2634920634920635e-05, + "loss": 0.2133, + "step": 6760 + }, + { + "epoch": 38.63428571428572, + "grad_norm": 53.56692886352539, + "learning_rate": 1.2628571428571428e-05, + "loss": 0.1715, + "step": 6761 + }, + { + "epoch": 38.64, + "grad_norm": 34.60567855834961, + "learning_rate": 1.2622222222222224e-05, + "loss": 0.1807, + "step": 6762 + }, + { + "epoch": 38.645714285714284, + "grad_norm": 77.77987670898438, + "learning_rate": 1.2615873015873016e-05, + "loss": 0.2579, + "step": 6763 + }, + { + "epoch": 38.651428571428575, + "grad_norm": 41.919795989990234, + "learning_rate": 1.260952380952381e-05, + "loss": 0.195, + "step": 6764 + }, + { + "epoch": 38.65714285714286, + "grad_norm": 30.047168731689453, + "learning_rate": 1.2603174603174603e-05, + "loss": 0.1852, + "step": 6765 + }, + { + "epoch": 38.66285714285714, + "grad_norm": 286.2810363769531, + "learning_rate": 1.25968253968254e-05, + "loss": 0.1347, + "step": 6766 + }, + { + "epoch": 38.668571428571425, + "grad_norm": 71.60431671142578, + "learning_rate": 1.2590476190476192e-05, + "loss": 0.1787, + "step": 6767 + }, + { + "epoch": 38.674285714285716, + "grad_norm": 48.86784744262695, + "learning_rate": 1.2584126984126985e-05, + "loss": 0.1596, + "step": 6768 + }, + { + "epoch": 38.68, + "grad_norm": 26.185443878173828, + "learning_rate": 1.2577777777777777e-05, + "loss": 0.1406, + "step": 6769 + }, + { + "epoch": 38.68571428571428, + "grad_norm": 51.83404541015625, + "learning_rate": 1.2571428571428573e-05, + "loss": 0.1557, + "step": 6770 + }, + { + "epoch": 38.691428571428574, + "grad_norm": 52.92089080810547, + "learning_rate": 1.2565079365079366e-05, + "loss": 0.1805, + "step": 6771 + }, + { + "epoch": 38.69714285714286, + "grad_norm": 53.78825759887695, + "learning_rate": 1.2558730158730159e-05, + "loss": 0.2165, + "step": 6772 + }, + { + "epoch": 38.70285714285714, + "grad_norm": 81.38287353515625, + "learning_rate": 1.2552380952380955e-05, + "loss": 0.2713, + "step": 6773 + }, + { + "epoch": 38.70857142857143, + "grad_norm": 43.09062194824219, + "learning_rate": 1.2546031746031747e-05, + "loss": 0.187, + "step": 6774 + }, + { + "epoch": 38.714285714285715, + "grad_norm": 50.66502380371094, + "learning_rate": 1.253968253968254e-05, + "loss": 0.182, + "step": 6775 + }, + { + "epoch": 38.72, + "grad_norm": 25.26409149169922, + "learning_rate": 1.2533333333333332e-05, + "loss": 0.1729, + "step": 6776 + }, + { + "epoch": 38.72571428571428, + "grad_norm": 45.1693115234375, + "learning_rate": 1.2526984126984129e-05, + "loss": 0.1879, + "step": 6777 + }, + { + "epoch": 38.73142857142857, + "grad_norm": 50.87795639038086, + "learning_rate": 1.2520634920634921e-05, + "loss": 0.1999, + "step": 6778 + }, + { + "epoch": 38.73714285714286, + "grad_norm": 50.23199462890625, + "learning_rate": 1.2514285714285714e-05, + "loss": 0.1491, + "step": 6779 + }, + { + "epoch": 38.74285714285714, + "grad_norm": 46.01109313964844, + "learning_rate": 1.2507936507936508e-05, + "loss": 0.1764, + "step": 6780 + }, + { + "epoch": 38.74857142857143, + "grad_norm": 84.88690185546875, + "learning_rate": 1.2501587301587302e-05, + "loss": 0.2689, + "step": 6781 + }, + { + "epoch": 38.754285714285714, + "grad_norm": 59.24235153198242, + "learning_rate": 1.2495238095238097e-05, + "loss": 0.2036, + "step": 6782 + }, + { + "epoch": 38.76, + "grad_norm": 168.68218994140625, + "learning_rate": 1.248888888888889e-05, + "loss": 0.3532, + "step": 6783 + }, + { + "epoch": 38.76571428571429, + "grad_norm": 53.04069900512695, + "learning_rate": 1.2482539682539684e-05, + "loss": 0.229, + "step": 6784 + }, + { + "epoch": 38.77142857142857, + "grad_norm": 20.792329788208008, + "learning_rate": 1.2476190476190478e-05, + "loss": 0.2167, + "step": 6785 + }, + { + "epoch": 38.777142857142856, + "grad_norm": 80.4717025756836, + "learning_rate": 1.246984126984127e-05, + "loss": 0.2723, + "step": 6786 + }, + { + "epoch": 38.78285714285714, + "grad_norm": 67.51244354248047, + "learning_rate": 1.2463492063492065e-05, + "loss": 0.2109, + "step": 6787 + }, + { + "epoch": 38.78857142857143, + "grad_norm": 22.83247947692871, + "learning_rate": 1.2457142857142858e-05, + "loss": 0.1464, + "step": 6788 + }, + { + "epoch": 38.794285714285714, + "grad_norm": 19.295541763305664, + "learning_rate": 1.2450793650793652e-05, + "loss": 0.1823, + "step": 6789 + }, + { + "epoch": 38.8, + "grad_norm": 52.79185485839844, + "learning_rate": 1.2444444444444445e-05, + "loss": 0.1479, + "step": 6790 + }, + { + "epoch": 38.80571428571429, + "grad_norm": 66.22676086425781, + "learning_rate": 1.2438095238095239e-05, + "loss": 0.1586, + "step": 6791 + }, + { + "epoch": 38.81142857142857, + "grad_norm": 331.5667419433594, + "learning_rate": 1.2431746031746032e-05, + "loss": 0.2117, + "step": 6792 + }, + { + "epoch": 38.817142857142855, + "grad_norm": 593.6991577148438, + "learning_rate": 1.2425396825396826e-05, + "loss": 0.1801, + "step": 6793 + }, + { + "epoch": 38.822857142857146, + "grad_norm": 39.879024505615234, + "learning_rate": 1.2419047619047619e-05, + "loss": 0.2398, + "step": 6794 + }, + { + "epoch": 38.82857142857143, + "grad_norm": 584.5859375, + "learning_rate": 1.2412698412698413e-05, + "loss": 0.1553, + "step": 6795 + }, + { + "epoch": 38.83428571428571, + "grad_norm": 34.988075256347656, + "learning_rate": 1.2406349206349207e-05, + "loss": 0.2081, + "step": 6796 + }, + { + "epoch": 38.84, + "grad_norm": 54.2407341003418, + "learning_rate": 1.24e-05, + "loss": 0.1424, + "step": 6797 + }, + { + "epoch": 38.84571428571429, + "grad_norm": 39.45024108886719, + "learning_rate": 1.2393650793650794e-05, + "loss": 0.2575, + "step": 6798 + }, + { + "epoch": 38.85142857142857, + "grad_norm": 32.569522857666016, + "learning_rate": 1.2387301587301589e-05, + "loss": 0.1605, + "step": 6799 + }, + { + "epoch": 38.857142857142854, + "grad_norm": 24.382707595825195, + "learning_rate": 1.2380952380952381e-05, + "loss": 0.1615, + "step": 6800 + }, + { + "epoch": 38.862857142857145, + "grad_norm": 26.837656021118164, + "learning_rate": 1.2374603174603175e-05, + "loss": 0.1472, + "step": 6801 + }, + { + "epoch": 38.86857142857143, + "grad_norm": 377.7287902832031, + "learning_rate": 1.236825396825397e-05, + "loss": 0.2733, + "step": 6802 + }, + { + "epoch": 38.87428571428571, + "grad_norm": 25.140079498291016, + "learning_rate": 1.2361904761904762e-05, + "loss": 0.1528, + "step": 6803 + }, + { + "epoch": 38.88, + "grad_norm": 20.54310417175293, + "learning_rate": 1.2355555555555557e-05, + "loss": 0.2128, + "step": 6804 + }, + { + "epoch": 38.885714285714286, + "grad_norm": 17.899181365966797, + "learning_rate": 1.234920634920635e-05, + "loss": 0.2293, + "step": 6805 + }, + { + "epoch": 38.89142857142857, + "grad_norm": 123.19231414794922, + "learning_rate": 1.2342857142857144e-05, + "loss": 0.1704, + "step": 6806 + }, + { + "epoch": 38.89714285714286, + "grad_norm": 86.83697509765625, + "learning_rate": 1.2336507936507938e-05, + "loss": 0.1935, + "step": 6807 + }, + { + "epoch": 38.902857142857144, + "grad_norm": 35.76729202270508, + "learning_rate": 1.233015873015873e-05, + "loss": 0.1891, + "step": 6808 + }, + { + "epoch": 38.90857142857143, + "grad_norm": 59.41086959838867, + "learning_rate": 1.2323809523809525e-05, + "loss": 0.2542, + "step": 6809 + }, + { + "epoch": 38.91428571428571, + "grad_norm": 29.30520248413086, + "learning_rate": 1.2317460317460318e-05, + "loss": 0.1732, + "step": 6810 + }, + { + "epoch": 38.92, + "grad_norm": 61.0394287109375, + "learning_rate": 1.2311111111111112e-05, + "loss": 0.1548, + "step": 6811 + }, + { + "epoch": 38.925714285714285, + "grad_norm": 147.2695770263672, + "learning_rate": 1.2304761904761905e-05, + "loss": 0.1875, + "step": 6812 + }, + { + "epoch": 38.93142857142857, + "grad_norm": 50.43434143066406, + "learning_rate": 1.2298412698412699e-05, + "loss": 0.1551, + "step": 6813 + }, + { + "epoch": 38.93714285714286, + "grad_norm": 43.08822250366211, + "learning_rate": 1.2292063492063492e-05, + "loss": 0.1686, + "step": 6814 + }, + { + "epoch": 38.94285714285714, + "grad_norm": 59.40639877319336, + "learning_rate": 1.2285714285714286e-05, + "loss": 0.2309, + "step": 6815 + }, + { + "epoch": 38.94857142857143, + "grad_norm": 34.71112060546875, + "learning_rate": 1.2279365079365079e-05, + "loss": 0.151, + "step": 6816 + }, + { + "epoch": 38.95428571428572, + "grad_norm": 67.87384796142578, + "learning_rate": 1.2273015873015873e-05, + "loss": 0.2314, + "step": 6817 + }, + { + "epoch": 38.96, + "grad_norm": 29.375036239624023, + "learning_rate": 1.2266666666666667e-05, + "loss": 0.2571, + "step": 6818 + }, + { + "epoch": 38.965714285714284, + "grad_norm": 54.79815673828125, + "learning_rate": 1.2260317460317462e-05, + "loss": 0.2185, + "step": 6819 + }, + { + "epoch": 38.97142857142857, + "grad_norm": 27.393661499023438, + "learning_rate": 1.2253968253968256e-05, + "loss": 0.1525, + "step": 6820 + }, + { + "epoch": 38.97714285714286, + "grad_norm": 46.51731872558594, + "learning_rate": 1.2247619047619049e-05, + "loss": 0.1962, + "step": 6821 + }, + { + "epoch": 38.98285714285714, + "grad_norm": 48.312679290771484, + "learning_rate": 1.2241269841269843e-05, + "loss": 0.2483, + "step": 6822 + }, + { + "epoch": 38.988571428571426, + "grad_norm": 1494.267578125, + "learning_rate": 1.2234920634920636e-05, + "loss": 0.2636, + "step": 6823 + }, + { + "epoch": 38.994285714285716, + "grad_norm": 70.67123413085938, + "learning_rate": 1.222857142857143e-05, + "loss": 0.2336, + "step": 6824 + }, + { + "epoch": 39.0, + "grad_norm": 52.84934616088867, + "learning_rate": 1.2222222222222222e-05, + "loss": 0.2018, + "step": 6825 + }, + { + "epoch": 39.0, + "eval_classes": 0, + "eval_loss": 0.5795198678970337, + "eval_map": 0.929, + "eval_map_50": 0.9651, + "eval_map_75": 0.9564, + "eval_map_large": 0.929, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.929, + "eval_map_small": -1.0, + "eval_mar_1": 0.7895, + "eval_mar_10": 0.9724, + "eval_mar_100": 0.9733, + "eval_mar_100_per_class": 0.9733, + "eval_mar_large": 0.9733, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.0539, + "eval_samples_per_second": 22.522, + "eval_steps_per_second": 2.834, + "step": 6825 + }, + { + "epoch": 39.005714285714284, + "grad_norm": 47.03675842285156, + "learning_rate": 1.2215873015873017e-05, + "loss": 0.1622, + "step": 6826 + }, + { + "epoch": 39.011428571428574, + "grad_norm": 36.863685607910156, + "learning_rate": 1.220952380952381e-05, + "loss": 0.1944, + "step": 6827 + }, + { + "epoch": 39.01714285714286, + "grad_norm": 77.5301284790039, + "learning_rate": 1.2203174603174604e-05, + "loss": 0.2398, + "step": 6828 + }, + { + "epoch": 39.02285714285714, + "grad_norm": 25.368093490600586, + "learning_rate": 1.2196825396825396e-05, + "loss": 0.1982, + "step": 6829 + }, + { + "epoch": 39.02857142857143, + "grad_norm": 76.29206085205078, + "learning_rate": 1.219047619047619e-05, + "loss": 0.2138, + "step": 6830 + }, + { + "epoch": 39.034285714285716, + "grad_norm": 19.772838592529297, + "learning_rate": 1.2184126984126985e-05, + "loss": 0.1461, + "step": 6831 + }, + { + "epoch": 39.04, + "grad_norm": 109.93661499023438, + "learning_rate": 1.2177777777777778e-05, + "loss": 0.1707, + "step": 6832 + }, + { + "epoch": 39.04571428571428, + "grad_norm": 69.74849700927734, + "learning_rate": 1.2171428571428572e-05, + "loss": 0.1404, + "step": 6833 + }, + { + "epoch": 39.05142857142857, + "grad_norm": 34.377532958984375, + "learning_rate": 1.2165079365079365e-05, + "loss": 0.199, + "step": 6834 + }, + { + "epoch": 39.05714285714286, + "grad_norm": 1229.432373046875, + "learning_rate": 1.2158730158730159e-05, + "loss": 0.2692, + "step": 6835 + }, + { + "epoch": 39.06285714285714, + "grad_norm": 55.80887985229492, + "learning_rate": 1.2152380952380953e-05, + "loss": 0.2974, + "step": 6836 + }, + { + "epoch": 39.06857142857143, + "grad_norm": 16.48853302001953, + "learning_rate": 1.2146031746031748e-05, + "loss": 0.223, + "step": 6837 + }, + { + "epoch": 39.074285714285715, + "grad_norm": 32.252227783203125, + "learning_rate": 1.213968253968254e-05, + "loss": 0.2561, + "step": 6838 + }, + { + "epoch": 39.08, + "grad_norm": 275.59722900390625, + "learning_rate": 1.2133333333333335e-05, + "loss": 0.2649, + "step": 6839 + }, + { + "epoch": 39.08571428571429, + "grad_norm": 17.12618064880371, + "learning_rate": 1.2126984126984127e-05, + "loss": 0.1722, + "step": 6840 + }, + { + "epoch": 39.09142857142857, + "grad_norm": 52.881526947021484, + "learning_rate": 1.2120634920634922e-05, + "loss": 0.1779, + "step": 6841 + }, + { + "epoch": 39.097142857142856, + "grad_norm": 52.06009292602539, + "learning_rate": 1.2114285714285716e-05, + "loss": 0.2009, + "step": 6842 + }, + { + "epoch": 39.10285714285714, + "grad_norm": 74.94755554199219, + "learning_rate": 1.2107936507936509e-05, + "loss": 0.1997, + "step": 6843 + }, + { + "epoch": 39.10857142857143, + "grad_norm": 45.03340530395508, + "learning_rate": 1.2101587301587303e-05, + "loss": 0.161, + "step": 6844 + }, + { + "epoch": 39.114285714285714, + "grad_norm": 45.907249450683594, + "learning_rate": 1.2095238095238096e-05, + "loss": 0.1382, + "step": 6845 + }, + { + "epoch": 39.12, + "grad_norm": 76.25215148925781, + "learning_rate": 1.208888888888889e-05, + "loss": 0.2699, + "step": 6846 + }, + { + "epoch": 39.12571428571429, + "grad_norm": 46.23180389404297, + "learning_rate": 1.2082539682539682e-05, + "loss": 0.1699, + "step": 6847 + }, + { + "epoch": 39.13142857142857, + "grad_norm": 40.75603485107422, + "learning_rate": 1.2076190476190477e-05, + "loss": 0.1141, + "step": 6848 + }, + { + "epoch": 39.137142857142855, + "grad_norm": 27.2888126373291, + "learning_rate": 1.206984126984127e-05, + "loss": 0.1497, + "step": 6849 + }, + { + "epoch": 39.142857142857146, + "grad_norm": 39.695926666259766, + "learning_rate": 1.2063492063492064e-05, + "loss": 0.2857, + "step": 6850 + }, + { + "epoch": 39.14857142857143, + "grad_norm": 34.217193603515625, + "learning_rate": 1.2057142857142856e-05, + "loss": 0.2368, + "step": 6851 + }, + { + "epoch": 39.15428571428571, + "grad_norm": 38.376861572265625, + "learning_rate": 1.205079365079365e-05, + "loss": 0.1819, + "step": 6852 + }, + { + "epoch": 39.16, + "grad_norm": 30.317337036132812, + "learning_rate": 1.2044444444444445e-05, + "loss": 0.196, + "step": 6853 + }, + { + "epoch": 39.16571428571429, + "grad_norm": 35.691986083984375, + "learning_rate": 1.203809523809524e-05, + "loss": 0.2613, + "step": 6854 + }, + { + "epoch": 39.17142857142857, + "grad_norm": 40.64143371582031, + "learning_rate": 1.2031746031746032e-05, + "loss": 0.2197, + "step": 6855 + }, + { + "epoch": 39.177142857142854, + "grad_norm": 21.734079360961914, + "learning_rate": 1.2025396825396826e-05, + "loss": 0.1838, + "step": 6856 + }, + { + "epoch": 39.182857142857145, + "grad_norm": 54.33609390258789, + "learning_rate": 1.201904761904762e-05, + "loss": 0.1806, + "step": 6857 + }, + { + "epoch": 39.18857142857143, + "grad_norm": 30.630271911621094, + "learning_rate": 1.2012698412698413e-05, + "loss": 0.1703, + "step": 6858 + }, + { + "epoch": 39.19428571428571, + "grad_norm": 87.60661315917969, + "learning_rate": 1.2006349206349208e-05, + "loss": 0.1838, + "step": 6859 + }, + { + "epoch": 39.2, + "grad_norm": 35.05860137939453, + "learning_rate": 1.2e-05, + "loss": 0.2497, + "step": 6860 + }, + { + "epoch": 39.205714285714286, + "grad_norm": 27.76970863342285, + "learning_rate": 1.1993650793650795e-05, + "loss": 0.3259, + "step": 6861 + }, + { + "epoch": 39.21142857142857, + "grad_norm": 51.48834991455078, + "learning_rate": 1.1987301587301587e-05, + "loss": 0.1175, + "step": 6862 + }, + { + "epoch": 39.21714285714286, + "grad_norm": 83.65272521972656, + "learning_rate": 1.1980952380952382e-05, + "loss": 0.163, + "step": 6863 + }, + { + "epoch": 39.222857142857144, + "grad_norm": 59.87739562988281, + "learning_rate": 1.1974603174603174e-05, + "loss": 0.1557, + "step": 6864 + }, + { + "epoch": 39.22857142857143, + "grad_norm": 22.3823299407959, + "learning_rate": 1.1968253968253969e-05, + "loss": 0.2134, + "step": 6865 + }, + { + "epoch": 39.23428571428571, + "grad_norm": 273.3268737792969, + "learning_rate": 1.1961904761904763e-05, + "loss": 0.1774, + "step": 6866 + }, + { + "epoch": 39.24, + "grad_norm": 49.09288024902344, + "learning_rate": 1.1955555555555556e-05, + "loss": 0.1455, + "step": 6867 + }, + { + "epoch": 39.245714285714286, + "grad_norm": 69.58202362060547, + "learning_rate": 1.194920634920635e-05, + "loss": 0.1459, + "step": 6868 + }, + { + "epoch": 39.25142857142857, + "grad_norm": 21.42776107788086, + "learning_rate": 1.1942857142857142e-05, + "loss": 0.1862, + "step": 6869 + }, + { + "epoch": 39.25714285714286, + "grad_norm": 487.3718566894531, + "learning_rate": 1.1936507936507937e-05, + "loss": 0.184, + "step": 6870 + }, + { + "epoch": 39.26285714285714, + "grad_norm": 33.55144119262695, + "learning_rate": 1.1930158730158731e-05, + "loss": 0.1283, + "step": 6871 + }, + { + "epoch": 39.26857142857143, + "grad_norm": 1602.2911376953125, + "learning_rate": 1.1923809523809524e-05, + "loss": 0.2526, + "step": 6872 + }, + { + "epoch": 39.27428571428572, + "grad_norm": 30.238550186157227, + "learning_rate": 1.1917460317460318e-05, + "loss": 0.1724, + "step": 6873 + }, + { + "epoch": 39.28, + "grad_norm": 37.69853210449219, + "learning_rate": 1.1911111111111112e-05, + "loss": 0.2636, + "step": 6874 + }, + { + "epoch": 39.285714285714285, + "grad_norm": 199.5904541015625, + "learning_rate": 1.1904761904761905e-05, + "loss": 0.1469, + "step": 6875 + }, + { + "epoch": 39.29142857142857, + "grad_norm": 884.3235473632812, + "learning_rate": 1.18984126984127e-05, + "loss": 0.191, + "step": 6876 + }, + { + "epoch": 39.29714285714286, + "grad_norm": 33.138519287109375, + "learning_rate": 1.1892063492063494e-05, + "loss": 0.1767, + "step": 6877 + }, + { + "epoch": 39.30285714285714, + "grad_norm": 34.070228576660156, + "learning_rate": 1.1885714285714286e-05, + "loss": 0.1881, + "step": 6878 + }, + { + "epoch": 39.308571428571426, + "grad_norm": 81.01456451416016, + "learning_rate": 1.187936507936508e-05, + "loss": 0.1829, + "step": 6879 + }, + { + "epoch": 39.31428571428572, + "grad_norm": 93.02015686035156, + "learning_rate": 1.1873015873015873e-05, + "loss": 0.1867, + "step": 6880 + }, + { + "epoch": 39.32, + "grad_norm": 369.30487060546875, + "learning_rate": 1.1866666666666668e-05, + "loss": 0.2575, + "step": 6881 + }, + { + "epoch": 39.325714285714284, + "grad_norm": 29.82270622253418, + "learning_rate": 1.186031746031746e-05, + "loss": 0.1923, + "step": 6882 + }, + { + "epoch": 39.331428571428575, + "grad_norm": 48.5175666809082, + "learning_rate": 1.1853968253968255e-05, + "loss": 0.2272, + "step": 6883 + }, + { + "epoch": 39.33714285714286, + "grad_norm": 51.21316146850586, + "learning_rate": 1.1847619047619047e-05, + "loss": 0.1988, + "step": 6884 + }, + { + "epoch": 39.34285714285714, + "grad_norm": 55.662986755371094, + "learning_rate": 1.1841269841269842e-05, + "loss": 0.1732, + "step": 6885 + }, + { + "epoch": 39.348571428571425, + "grad_norm": 30.418460845947266, + "learning_rate": 1.1834920634920634e-05, + "loss": 0.16, + "step": 6886 + }, + { + "epoch": 39.354285714285716, + "grad_norm": 35.29583740234375, + "learning_rate": 1.1828571428571429e-05, + "loss": 0.1157, + "step": 6887 + }, + { + "epoch": 39.36, + "grad_norm": 26.877500534057617, + "learning_rate": 1.1822222222222223e-05, + "loss": 0.2222, + "step": 6888 + }, + { + "epoch": 39.36571428571428, + "grad_norm": 24.766796112060547, + "learning_rate": 1.1815873015873016e-05, + "loss": 0.1343, + "step": 6889 + }, + { + "epoch": 39.371428571428574, + "grad_norm": 35.40179443359375, + "learning_rate": 1.180952380952381e-05, + "loss": 0.1445, + "step": 6890 + }, + { + "epoch": 39.37714285714286, + "grad_norm": 20.078857421875, + "learning_rate": 1.1803174603174604e-05, + "loss": 0.1617, + "step": 6891 + }, + { + "epoch": 39.38285714285714, + "grad_norm": 37.09568405151367, + "learning_rate": 1.1796825396825399e-05, + "loss": 0.2622, + "step": 6892 + }, + { + "epoch": 39.38857142857143, + "grad_norm": 43.9173698425293, + "learning_rate": 1.1790476190476191e-05, + "loss": 0.2056, + "step": 6893 + }, + { + "epoch": 39.394285714285715, + "grad_norm": 60.2230339050293, + "learning_rate": 1.1784126984126986e-05, + "loss": 0.2513, + "step": 6894 + }, + { + "epoch": 39.4, + "grad_norm": 121.24998474121094, + "learning_rate": 1.1777777777777778e-05, + "loss": 0.1214, + "step": 6895 + }, + { + "epoch": 39.40571428571428, + "grad_norm": 66.94969177246094, + "learning_rate": 1.1771428571428572e-05, + "loss": 0.2068, + "step": 6896 + }, + { + "epoch": 39.41142857142857, + "grad_norm": 49.56264877319336, + "learning_rate": 1.1765079365079365e-05, + "loss": 0.2071, + "step": 6897 + }, + { + "epoch": 39.417142857142856, + "grad_norm": 51.70158386230469, + "learning_rate": 1.175873015873016e-05, + "loss": 0.2056, + "step": 6898 + }, + { + "epoch": 39.42285714285714, + "grad_norm": 76.47562408447266, + "learning_rate": 1.1752380952380954e-05, + "loss": 0.213, + "step": 6899 + }, + { + "epoch": 39.42857142857143, + "grad_norm": 58.22590637207031, + "learning_rate": 1.1746031746031746e-05, + "loss": 0.2217, + "step": 6900 + }, + { + "epoch": 39.434285714285714, + "grad_norm": 35.62051010131836, + "learning_rate": 1.173968253968254e-05, + "loss": 0.1365, + "step": 6901 + }, + { + "epoch": 39.44, + "grad_norm": 56.19094467163086, + "learning_rate": 1.1733333333333333e-05, + "loss": 0.1971, + "step": 6902 + }, + { + "epoch": 39.44571428571429, + "grad_norm": 462.1736755371094, + "learning_rate": 1.1726984126984128e-05, + "loss": 0.2218, + "step": 6903 + }, + { + "epoch": 39.45142857142857, + "grad_norm": 25.76043701171875, + "learning_rate": 1.172063492063492e-05, + "loss": 0.4593, + "step": 6904 + }, + { + "epoch": 39.457142857142856, + "grad_norm": 27.850421905517578, + "learning_rate": 1.1714285714285715e-05, + "loss": 0.1536, + "step": 6905 + }, + { + "epoch": 39.462857142857146, + "grad_norm": 320.2368469238281, + "learning_rate": 1.1707936507936507e-05, + "loss": 0.1716, + "step": 6906 + }, + { + "epoch": 39.46857142857143, + "grad_norm": 40.94053649902344, + "learning_rate": 1.1701587301587302e-05, + "loss": 0.1689, + "step": 6907 + }, + { + "epoch": 39.47428571428571, + "grad_norm": 62.56889343261719, + "learning_rate": 1.1695238095238096e-05, + "loss": 0.1877, + "step": 6908 + }, + { + "epoch": 39.48, + "grad_norm": 44.383026123046875, + "learning_rate": 1.168888888888889e-05, + "loss": 0.157, + "step": 6909 + }, + { + "epoch": 39.48571428571429, + "grad_norm": 59.0760498046875, + "learning_rate": 1.1682539682539685e-05, + "loss": 0.1679, + "step": 6910 + }, + { + "epoch": 39.49142857142857, + "grad_norm": 55.42938995361328, + "learning_rate": 1.1676190476190477e-05, + "loss": 0.2307, + "step": 6911 + }, + { + "epoch": 39.497142857142855, + "grad_norm": 65.94779968261719, + "learning_rate": 1.1669841269841272e-05, + "loss": 0.2264, + "step": 6912 + }, + { + "epoch": 39.502857142857145, + "grad_norm": 89.81292724609375, + "learning_rate": 1.1663492063492064e-05, + "loss": 0.2101, + "step": 6913 + }, + { + "epoch": 39.50857142857143, + "grad_norm": 28.558591842651367, + "learning_rate": 1.1657142857142859e-05, + "loss": 0.1894, + "step": 6914 + }, + { + "epoch": 39.51428571428571, + "grad_norm": 46.673946380615234, + "learning_rate": 1.1650793650793651e-05, + "loss": 0.3003, + "step": 6915 + }, + { + "epoch": 39.52, + "grad_norm": 34.23926544189453, + "learning_rate": 1.1644444444444446e-05, + "loss": 0.2361, + "step": 6916 + }, + { + "epoch": 39.52571428571429, + "grad_norm": 71.27877044677734, + "learning_rate": 1.1638095238095238e-05, + "loss": 0.2106, + "step": 6917 + }, + { + "epoch": 39.53142857142857, + "grad_norm": 68.56620025634766, + "learning_rate": 1.1631746031746032e-05, + "loss": 0.2345, + "step": 6918 + }, + { + "epoch": 39.537142857142854, + "grad_norm": 20.94009780883789, + "learning_rate": 1.1625396825396825e-05, + "loss": 0.1667, + "step": 6919 + }, + { + "epoch": 39.542857142857144, + "grad_norm": 37.50666427612305, + "learning_rate": 1.161904761904762e-05, + "loss": 0.1454, + "step": 6920 + }, + { + "epoch": 39.54857142857143, + "grad_norm": 68.03517150878906, + "learning_rate": 1.1612698412698412e-05, + "loss": 0.1825, + "step": 6921 + }, + { + "epoch": 39.55428571428571, + "grad_norm": 23.844459533691406, + "learning_rate": 1.1606349206349206e-05, + "loss": 0.1927, + "step": 6922 + }, + { + "epoch": 39.56, + "grad_norm": 84.80583953857422, + "learning_rate": 1.16e-05, + "loss": 0.1556, + "step": 6923 + }, + { + "epoch": 39.565714285714286, + "grad_norm": 23.745746612548828, + "learning_rate": 1.1593650793650793e-05, + "loss": 0.1969, + "step": 6924 + }, + { + "epoch": 39.57142857142857, + "grad_norm": 94.98046875, + "learning_rate": 1.1587301587301588e-05, + "loss": 0.139, + "step": 6925 + }, + { + "epoch": 39.57714285714286, + "grad_norm": 76.0391845703125, + "learning_rate": 1.1580952380952382e-05, + "loss": 0.2522, + "step": 6926 + }, + { + "epoch": 39.582857142857144, + "grad_norm": 80.9561538696289, + "learning_rate": 1.1574603174603175e-05, + "loss": 0.1765, + "step": 6927 + }, + { + "epoch": 39.58857142857143, + "grad_norm": 76.00508117675781, + "learning_rate": 1.1568253968253969e-05, + "loss": 0.1807, + "step": 6928 + }, + { + "epoch": 39.59428571428572, + "grad_norm": 116.78324890136719, + "learning_rate": 1.1561904761904763e-05, + "loss": 0.1959, + "step": 6929 + }, + { + "epoch": 39.6, + "grad_norm": 53.20307159423828, + "learning_rate": 1.1555555555555556e-05, + "loss": 0.1495, + "step": 6930 + }, + { + "epoch": 39.605714285714285, + "grad_norm": 49.313350677490234, + "learning_rate": 1.154920634920635e-05, + "loss": 0.1888, + "step": 6931 + }, + { + "epoch": 39.61142857142857, + "grad_norm": 974.427734375, + "learning_rate": 1.1542857142857143e-05, + "loss": 0.244, + "step": 6932 + }, + { + "epoch": 39.61714285714286, + "grad_norm": 68.5076675415039, + "learning_rate": 1.1536507936507937e-05, + "loss": 0.1971, + "step": 6933 + }, + { + "epoch": 39.62285714285714, + "grad_norm": 48.14308547973633, + "learning_rate": 1.1530158730158732e-05, + "loss": 0.1892, + "step": 6934 + }, + { + "epoch": 39.628571428571426, + "grad_norm": 493.64166259765625, + "learning_rate": 1.1523809523809524e-05, + "loss": 0.1761, + "step": 6935 + }, + { + "epoch": 39.63428571428572, + "grad_norm": 70.71945190429688, + "learning_rate": 1.1517460317460319e-05, + "loss": 0.2291, + "step": 6936 + }, + { + "epoch": 39.64, + "grad_norm": 21.475210189819336, + "learning_rate": 1.1511111111111111e-05, + "loss": 0.17, + "step": 6937 + }, + { + "epoch": 39.645714285714284, + "grad_norm": 48.795433044433594, + "learning_rate": 1.1504761904761906e-05, + "loss": 0.2267, + "step": 6938 + }, + { + "epoch": 39.651428571428575, + "grad_norm": 42.746124267578125, + "learning_rate": 1.1498412698412698e-05, + "loss": 0.1506, + "step": 6939 + }, + { + "epoch": 39.65714285714286, + "grad_norm": 94.1886215209961, + "learning_rate": 1.1492063492063492e-05, + "loss": 0.2586, + "step": 6940 + }, + { + "epoch": 39.66285714285714, + "grad_norm": 507.9591369628906, + "learning_rate": 1.1485714285714285e-05, + "loss": 0.1645, + "step": 6941 + }, + { + "epoch": 39.668571428571425, + "grad_norm": 41.86248016357422, + "learning_rate": 1.147936507936508e-05, + "loss": 0.171, + "step": 6942 + }, + { + "epoch": 39.674285714285716, + "grad_norm": 51.88438415527344, + "learning_rate": 1.1473015873015872e-05, + "loss": 0.1331, + "step": 6943 + }, + { + "epoch": 39.68, + "grad_norm": 353.7901611328125, + "learning_rate": 1.1466666666666666e-05, + "loss": 0.251, + "step": 6944 + }, + { + "epoch": 39.68571428571428, + "grad_norm": 19.806060791015625, + "learning_rate": 1.146031746031746e-05, + "loss": 0.1681, + "step": 6945 + }, + { + "epoch": 39.691428571428574, + "grad_norm": 46.80093002319336, + "learning_rate": 1.1453968253968255e-05, + "loss": 0.2254, + "step": 6946 + }, + { + "epoch": 39.69714285714286, + "grad_norm": 33.5659294128418, + "learning_rate": 1.144761904761905e-05, + "loss": 0.1462, + "step": 6947 + }, + { + "epoch": 39.70285714285714, + "grad_norm": 323.8052062988281, + "learning_rate": 1.1441269841269842e-05, + "loss": 0.2597, + "step": 6948 + }, + { + "epoch": 39.70857142857143, + "grad_norm": 27.59954071044922, + "learning_rate": 1.1434920634920636e-05, + "loss": 0.2973, + "step": 6949 + }, + { + "epoch": 39.714285714285715, + "grad_norm": 80.96438598632812, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.2237, + "step": 6950 + }, + { + "epoch": 39.72, + "grad_norm": 52.7150764465332, + "learning_rate": 1.1422222222222223e-05, + "loss": 0.1318, + "step": 6951 + }, + { + "epoch": 39.72571428571428, + "grad_norm": 65.87142181396484, + "learning_rate": 1.1415873015873016e-05, + "loss": 0.1935, + "step": 6952 + }, + { + "epoch": 39.73142857142857, + "grad_norm": 34.133445739746094, + "learning_rate": 1.140952380952381e-05, + "loss": 0.2051, + "step": 6953 + }, + { + "epoch": 39.73714285714286, + "grad_norm": 29.996055603027344, + "learning_rate": 1.1403174603174603e-05, + "loss": 0.1625, + "step": 6954 + }, + { + "epoch": 39.74285714285714, + "grad_norm": 35.15446472167969, + "learning_rate": 1.1396825396825397e-05, + "loss": 0.2354, + "step": 6955 + }, + { + "epoch": 39.74857142857143, + "grad_norm": 30.359275817871094, + "learning_rate": 1.1390476190476192e-05, + "loss": 0.1966, + "step": 6956 + }, + { + "epoch": 39.754285714285714, + "grad_norm": 25.765777587890625, + "learning_rate": 1.1384126984126984e-05, + "loss": 0.2597, + "step": 6957 + }, + { + "epoch": 39.76, + "grad_norm": 49.04469299316406, + "learning_rate": 1.1377777777777779e-05, + "loss": 0.1757, + "step": 6958 + }, + { + "epoch": 39.76571428571429, + "grad_norm": 83.82494354248047, + "learning_rate": 1.1371428571428571e-05, + "loss": 0.1962, + "step": 6959 + }, + { + "epoch": 39.77142857142857, + "grad_norm": 115.36933898925781, + "learning_rate": 1.1365079365079366e-05, + "loss": 0.2437, + "step": 6960 + }, + { + "epoch": 39.777142857142856, + "grad_norm": 42.116424560546875, + "learning_rate": 1.1358730158730158e-05, + "loss": 0.2258, + "step": 6961 + }, + { + "epoch": 39.78285714285714, + "grad_norm": 33.92232131958008, + "learning_rate": 1.1352380952380953e-05, + "loss": 0.2757, + "step": 6962 + }, + { + "epoch": 39.78857142857143, + "grad_norm": 27.678110122680664, + "learning_rate": 1.1346031746031747e-05, + "loss": 0.1723, + "step": 6963 + }, + { + "epoch": 39.794285714285714, + "grad_norm": 46.185401916503906, + "learning_rate": 1.1339682539682541e-05, + "loss": 0.1713, + "step": 6964 + }, + { + "epoch": 39.8, + "grad_norm": 34.324920654296875, + "learning_rate": 1.1333333333333334e-05, + "loss": 0.1703, + "step": 6965 + }, + { + "epoch": 39.80571428571429, + "grad_norm": 29.69679832458496, + "learning_rate": 1.1326984126984128e-05, + "loss": 0.1478, + "step": 6966 + }, + { + "epoch": 39.81142857142857, + "grad_norm": 30.579904556274414, + "learning_rate": 1.1320634920634922e-05, + "loss": 0.1566, + "step": 6967 + }, + { + "epoch": 39.817142857142855, + "grad_norm": 40.814144134521484, + "learning_rate": 1.1314285714285715e-05, + "loss": 0.1769, + "step": 6968 + }, + { + "epoch": 39.822857142857146, + "grad_norm": 45.01163101196289, + "learning_rate": 1.130793650793651e-05, + "loss": 0.1629, + "step": 6969 + }, + { + "epoch": 39.82857142857143, + "grad_norm": 77.55461120605469, + "learning_rate": 1.1301587301587302e-05, + "loss": 0.1999, + "step": 6970 + }, + { + "epoch": 39.83428571428571, + "grad_norm": 43.04883575439453, + "learning_rate": 1.1295238095238096e-05, + "loss": 0.1714, + "step": 6971 + }, + { + "epoch": 39.84, + "grad_norm": 45.441444396972656, + "learning_rate": 1.1288888888888889e-05, + "loss": 0.1509, + "step": 6972 + }, + { + "epoch": 39.84571428571429, + "grad_norm": 60.79438018798828, + "learning_rate": 1.1282539682539683e-05, + "loss": 0.278, + "step": 6973 + }, + { + "epoch": 39.85142857142857, + "grad_norm": 35.067474365234375, + "learning_rate": 1.1276190476190476e-05, + "loss": 0.1926, + "step": 6974 + }, + { + "epoch": 39.857142857142854, + "grad_norm": 41.40991973876953, + "learning_rate": 1.126984126984127e-05, + "loss": 0.2465, + "step": 6975 + }, + { + "epoch": 39.862857142857145, + "grad_norm": 42.7786750793457, + "learning_rate": 1.1263492063492063e-05, + "loss": 0.1523, + "step": 6976 + }, + { + "epoch": 39.86857142857143, + "grad_norm": 43.09067916870117, + "learning_rate": 1.1257142857142857e-05, + "loss": 0.1933, + "step": 6977 + }, + { + "epoch": 39.87428571428571, + "grad_norm": 22.420692443847656, + "learning_rate": 1.125079365079365e-05, + "loss": 0.172, + "step": 6978 + }, + { + "epoch": 39.88, + "grad_norm": 18.63651466369629, + "learning_rate": 1.1244444444444444e-05, + "loss": 0.1813, + "step": 6979 + }, + { + "epoch": 39.885714285714286, + "grad_norm": 87.5951919555664, + "learning_rate": 1.1238095238095239e-05, + "loss": 0.1922, + "step": 6980 + }, + { + "epoch": 39.89142857142857, + "grad_norm": 23.170183181762695, + "learning_rate": 1.1231746031746033e-05, + "loss": 0.1528, + "step": 6981 + }, + { + "epoch": 39.89714285714286, + "grad_norm": 35.52743148803711, + "learning_rate": 1.1225396825396827e-05, + "loss": 0.1756, + "step": 6982 + }, + { + "epoch": 39.902857142857144, + "grad_norm": 22.46258544921875, + "learning_rate": 1.121904761904762e-05, + "loss": 0.1842, + "step": 6983 + }, + { + "epoch": 39.90857142857143, + "grad_norm": 38.78075408935547, + "learning_rate": 1.1212698412698414e-05, + "loss": 0.1482, + "step": 6984 + }, + { + "epoch": 39.91428571428571, + "grad_norm": 42.88488006591797, + "learning_rate": 1.1206349206349207e-05, + "loss": 0.1541, + "step": 6985 + }, + { + "epoch": 39.92, + "grad_norm": 86.86996459960938, + "learning_rate": 1.1200000000000001e-05, + "loss": 0.1831, + "step": 6986 + }, + { + "epoch": 39.925714285714285, + "grad_norm": 43.99440002441406, + "learning_rate": 1.1193650793650794e-05, + "loss": 0.2159, + "step": 6987 + }, + { + "epoch": 39.93142857142857, + "grad_norm": 43.604984283447266, + "learning_rate": 1.1187301587301588e-05, + "loss": 0.2432, + "step": 6988 + }, + { + "epoch": 39.93714285714286, + "grad_norm": 22.00592803955078, + "learning_rate": 1.118095238095238e-05, + "loss": 0.1796, + "step": 6989 + }, + { + "epoch": 39.94285714285714, + "grad_norm": 50.48416519165039, + "learning_rate": 1.1174603174603175e-05, + "loss": 0.2066, + "step": 6990 + }, + { + "epoch": 39.94857142857143, + "grad_norm": 35.589725494384766, + "learning_rate": 1.116825396825397e-05, + "loss": 0.1294, + "step": 6991 + }, + { + "epoch": 39.95428571428572, + "grad_norm": 23.348114013671875, + "learning_rate": 1.1161904761904762e-05, + "loss": 0.2204, + "step": 6992 + }, + { + "epoch": 39.96, + "grad_norm": 89.19754791259766, + "learning_rate": 1.1155555555555556e-05, + "loss": 0.2061, + "step": 6993 + }, + { + "epoch": 39.965714285714284, + "grad_norm": 38.78104782104492, + "learning_rate": 1.1149206349206349e-05, + "loss": 0.1605, + "step": 6994 + }, + { + "epoch": 39.97142857142857, + "grad_norm": 41.04331588745117, + "learning_rate": 1.1142857142857143e-05, + "loss": 0.201, + "step": 6995 + }, + { + "epoch": 39.97714285714286, + "grad_norm": 29.142427444458008, + "learning_rate": 1.1136507936507936e-05, + "loss": 0.1823, + "step": 6996 + }, + { + "epoch": 39.98285714285714, + "grad_norm": 42.97498321533203, + "learning_rate": 1.113015873015873e-05, + "loss": 0.173, + "step": 6997 + }, + { + "epoch": 39.988571428571426, + "grad_norm": 19.229862213134766, + "learning_rate": 1.1123809523809525e-05, + "loss": 0.1316, + "step": 6998 + }, + { + "epoch": 39.994285714285716, + "grad_norm": 78.10343933105469, + "learning_rate": 1.1117460317460317e-05, + "loss": 0.1944, + "step": 6999 + }, + { + "epoch": 40.0, + "grad_norm": 29.40753936767578, + "learning_rate": 1.1111111111111112e-05, + "loss": 0.1638, + "step": 7000 + }, + { + "epoch": 40.0, + "eval_classes": 0, + "eval_loss": 0.5729948282241821, + "eval_map": 0.9403, + "eval_map_50": 0.9713, + "eval_map_75": 0.9656, + "eval_map_large": 0.9404, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9403, + "eval_map_small": -1.0, + "eval_mar_1": 0.7908, + "eval_mar_10": 0.9775, + "eval_mar_100": 0.9784, + "eval_mar_100_per_class": 0.9784, + "eval_mar_large": 0.9784, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.2563, + "eval_samples_per_second": 22.178, + "eval_steps_per_second": 2.791, + "step": 7000 + }, + { + "epoch": 40.005714285714284, + "grad_norm": 30.414432525634766, + "learning_rate": 1.1104761904761906e-05, + "loss": 0.1652, + "step": 7001 + }, + { + "epoch": 40.011428571428574, + "grad_norm": 39.77214813232422, + "learning_rate": 1.10984126984127e-05, + "loss": 0.4243, + "step": 7002 + }, + { + "epoch": 40.01714285714286, + "grad_norm": 33.90891647338867, + "learning_rate": 1.1092063492063493e-05, + "loss": 0.1739, + "step": 7003 + }, + { + "epoch": 40.02285714285714, + "grad_norm": 16.878549575805664, + "learning_rate": 1.1085714285714287e-05, + "loss": 0.2202, + "step": 7004 + }, + { + "epoch": 40.02857142857143, + "grad_norm": 38.1385612487793, + "learning_rate": 1.107936507936508e-05, + "loss": 0.1297, + "step": 7005 + }, + { + "epoch": 40.034285714285716, + "grad_norm": 53.239200592041016, + "learning_rate": 1.1073015873015874e-05, + "loss": 0.2195, + "step": 7006 + }, + { + "epoch": 40.04, + "grad_norm": 36.35694122314453, + "learning_rate": 1.1066666666666667e-05, + "loss": 0.1495, + "step": 7007 + }, + { + "epoch": 40.04571428571428, + "grad_norm": 80.08834075927734, + "learning_rate": 1.1060317460317461e-05, + "loss": 0.1739, + "step": 7008 + }, + { + "epoch": 40.05142857142857, + "grad_norm": 37.675682067871094, + "learning_rate": 1.1053968253968254e-05, + "loss": 0.1584, + "step": 7009 + }, + { + "epoch": 40.05714285714286, + "grad_norm": 42.608245849609375, + "learning_rate": 1.1047619047619048e-05, + "loss": 0.1318, + "step": 7010 + }, + { + "epoch": 40.06285714285714, + "grad_norm": 76.56353759765625, + "learning_rate": 1.104126984126984e-05, + "loss": 0.1978, + "step": 7011 + }, + { + "epoch": 40.06857142857143, + "grad_norm": 29.985271453857422, + "learning_rate": 1.1034920634920635e-05, + "loss": 0.2171, + "step": 7012 + }, + { + "epoch": 40.074285714285715, + "grad_norm": 23.408645629882812, + "learning_rate": 1.102857142857143e-05, + "loss": 0.1768, + "step": 7013 + }, + { + "epoch": 40.08, + "grad_norm": 35.90739440917969, + "learning_rate": 1.1022222222222222e-05, + "loss": 0.1585, + "step": 7014 + }, + { + "epoch": 40.08571428571429, + "grad_norm": 29.93295669555664, + "learning_rate": 1.1015873015873016e-05, + "loss": 0.1715, + "step": 7015 + }, + { + "epoch": 40.09142857142857, + "grad_norm": 174.803955078125, + "learning_rate": 1.1009523809523809e-05, + "loss": 0.1777, + "step": 7016 + }, + { + "epoch": 40.097142857142856, + "grad_norm": 46.307621002197266, + "learning_rate": 1.1003174603174603e-05, + "loss": 0.1968, + "step": 7017 + }, + { + "epoch": 40.10285714285714, + "grad_norm": 30.228389739990234, + "learning_rate": 1.0996825396825398e-05, + "loss": 0.1642, + "step": 7018 + }, + { + "epoch": 40.10857142857143, + "grad_norm": 53.23178482055664, + "learning_rate": 1.0990476190476192e-05, + "loss": 0.1606, + "step": 7019 + }, + { + "epoch": 40.114285714285714, + "grad_norm": 36.800209045410156, + "learning_rate": 1.0984126984126985e-05, + "loss": 0.1575, + "step": 7020 + }, + { + "epoch": 40.12, + "grad_norm": 36.75564193725586, + "learning_rate": 1.0977777777777779e-05, + "loss": 0.143, + "step": 7021 + }, + { + "epoch": 40.12571428571429, + "grad_norm": 67.83435821533203, + "learning_rate": 1.0971428571428572e-05, + "loss": 0.2065, + "step": 7022 + }, + { + "epoch": 40.13142857142857, + "grad_norm": 30.078880310058594, + "learning_rate": 1.0965079365079366e-05, + "loss": 0.3438, + "step": 7023 + }, + { + "epoch": 40.137142857142855, + "grad_norm": 66.90591430664062, + "learning_rate": 1.0958730158730159e-05, + "loss": 0.1947, + "step": 7024 + }, + { + "epoch": 40.142857142857146, + "grad_norm": 33.890262603759766, + "learning_rate": 1.0952380952380953e-05, + "loss": 0.1603, + "step": 7025 + }, + { + "epoch": 40.14857142857143, + "grad_norm": 49.547325134277344, + "learning_rate": 1.0946031746031747e-05, + "loss": 0.221, + "step": 7026 + }, + { + "epoch": 40.15428571428571, + "grad_norm": 62.14547348022461, + "learning_rate": 1.093968253968254e-05, + "loss": 0.2098, + "step": 7027 + }, + { + "epoch": 40.16, + "grad_norm": 26.917131423950195, + "learning_rate": 1.0933333333333334e-05, + "loss": 0.1699, + "step": 7028 + }, + { + "epoch": 40.16571428571429, + "grad_norm": 35.044677734375, + "learning_rate": 1.0926984126984127e-05, + "loss": 0.1271, + "step": 7029 + }, + { + "epoch": 40.17142857142857, + "grad_norm": 93.6030502319336, + "learning_rate": 1.0920634920634921e-05, + "loss": 0.2828, + "step": 7030 + }, + { + "epoch": 40.177142857142854, + "grad_norm": 46.65414810180664, + "learning_rate": 1.0914285714285714e-05, + "loss": 0.1463, + "step": 7031 + }, + { + "epoch": 40.182857142857145, + "grad_norm": 24.331409454345703, + "learning_rate": 1.0907936507936508e-05, + "loss": 0.2234, + "step": 7032 + }, + { + "epoch": 40.18857142857143, + "grad_norm": 42.96149826049805, + "learning_rate": 1.09015873015873e-05, + "loss": 0.1677, + "step": 7033 + }, + { + "epoch": 40.19428571428571, + "grad_norm": 41.679786682128906, + "learning_rate": 1.0895238095238095e-05, + "loss": 0.1671, + "step": 7034 + }, + { + "epoch": 40.2, + "grad_norm": 41.53316879272461, + "learning_rate": 1.088888888888889e-05, + "loss": 0.145, + "step": 7035 + }, + { + "epoch": 40.205714285714286, + "grad_norm": 44.54117202758789, + "learning_rate": 1.0882539682539684e-05, + "loss": 0.2878, + "step": 7036 + }, + { + "epoch": 40.21142857142857, + "grad_norm": 116.3648910522461, + "learning_rate": 1.0876190476190478e-05, + "loss": 0.1527, + "step": 7037 + }, + { + "epoch": 40.21714285714286, + "grad_norm": 75.6655044555664, + "learning_rate": 1.086984126984127e-05, + "loss": 0.1768, + "step": 7038 + }, + { + "epoch": 40.222857142857144, + "grad_norm": 41.37080764770508, + "learning_rate": 1.0863492063492065e-05, + "loss": 0.2164, + "step": 7039 + }, + { + "epoch": 40.22857142857143, + "grad_norm": 36.2270393371582, + "learning_rate": 1.0857142857142858e-05, + "loss": 0.198, + "step": 7040 + }, + { + "epoch": 40.23428571428571, + "grad_norm": 52.032310485839844, + "learning_rate": 1.0850793650793652e-05, + "loss": 0.1649, + "step": 7041 + }, + { + "epoch": 40.24, + "grad_norm": 31.848403930664062, + "learning_rate": 1.0844444444444445e-05, + "loss": 0.1873, + "step": 7042 + }, + { + "epoch": 40.245714285714286, + "grad_norm": 63.652000427246094, + "learning_rate": 1.0838095238095239e-05, + "loss": 0.1516, + "step": 7043 + }, + { + "epoch": 40.25142857142857, + "grad_norm": 55.00597381591797, + "learning_rate": 1.0831746031746032e-05, + "loss": 0.2101, + "step": 7044 + }, + { + "epoch": 40.25714285714286, + "grad_norm": 90.49565887451172, + "learning_rate": 1.0825396825396826e-05, + "loss": 0.1857, + "step": 7045 + }, + { + "epoch": 40.26285714285714, + "grad_norm": 38.97643280029297, + "learning_rate": 1.0819047619047619e-05, + "loss": 0.1871, + "step": 7046 + }, + { + "epoch": 40.26857142857143, + "grad_norm": 60.40447998046875, + "learning_rate": 1.0812698412698413e-05, + "loss": 0.1452, + "step": 7047 + }, + { + "epoch": 40.27428571428572, + "grad_norm": 40.19563293457031, + "learning_rate": 1.0806349206349207e-05, + "loss": 0.284, + "step": 7048 + }, + { + "epoch": 40.28, + "grad_norm": 27.480947494506836, + "learning_rate": 1.08e-05, + "loss": 0.2156, + "step": 7049 + }, + { + "epoch": 40.285714285714285, + "grad_norm": 45.169227600097656, + "learning_rate": 1.0793650793650794e-05, + "loss": 0.1619, + "step": 7050 + }, + { + "epoch": 40.29142857142857, + "grad_norm": 57.9741325378418, + "learning_rate": 1.0787301587301587e-05, + "loss": 0.2071, + "step": 7051 + }, + { + "epoch": 40.29714285714286, + "grad_norm": 48.04543685913086, + "learning_rate": 1.0780952380952381e-05, + "loss": 0.209, + "step": 7052 + }, + { + "epoch": 40.30285714285714, + "grad_norm": 18.890419006347656, + "learning_rate": 1.0774603174603176e-05, + "loss": 0.1393, + "step": 7053 + }, + { + "epoch": 40.308571428571426, + "grad_norm": 21.3095645904541, + "learning_rate": 1.0768253968253968e-05, + "loss": 0.181, + "step": 7054 + }, + { + "epoch": 40.31428571428572, + "grad_norm": 20.197839736938477, + "learning_rate": 1.0761904761904763e-05, + "loss": 0.1545, + "step": 7055 + }, + { + "epoch": 40.32, + "grad_norm": 67.84980773925781, + "learning_rate": 1.0755555555555557e-05, + "loss": 0.1378, + "step": 7056 + }, + { + "epoch": 40.325714285714284, + "grad_norm": 30.961355209350586, + "learning_rate": 1.074920634920635e-05, + "loss": 0.1575, + "step": 7057 + }, + { + "epoch": 40.331428571428575, + "grad_norm": 61.83423614501953, + "learning_rate": 1.0742857142857144e-05, + "loss": 0.182, + "step": 7058 + }, + { + "epoch": 40.33714285714286, + "grad_norm": 80.88799285888672, + "learning_rate": 1.0736507936507938e-05, + "loss": 0.152, + "step": 7059 + }, + { + "epoch": 40.34285714285714, + "grad_norm": 62.28117752075195, + "learning_rate": 1.073015873015873e-05, + "loss": 0.1789, + "step": 7060 + }, + { + "epoch": 40.348571428571425, + "grad_norm": 102.28784942626953, + "learning_rate": 1.0723809523809525e-05, + "loss": 0.1686, + "step": 7061 + }, + { + "epoch": 40.354285714285716, + "grad_norm": 73.03924560546875, + "learning_rate": 1.0717460317460318e-05, + "loss": 0.1942, + "step": 7062 + }, + { + "epoch": 40.36, + "grad_norm": 75.60195922851562, + "learning_rate": 1.0711111111111112e-05, + "loss": 0.217, + "step": 7063 + }, + { + "epoch": 40.36571428571428, + "grad_norm": 65.61262512207031, + "learning_rate": 1.0704761904761905e-05, + "loss": 0.1986, + "step": 7064 + }, + { + "epoch": 40.371428571428574, + "grad_norm": 498.84271240234375, + "learning_rate": 1.0698412698412699e-05, + "loss": 0.2723, + "step": 7065 + }, + { + "epoch": 40.37714285714286, + "grad_norm": 44.167240142822266, + "learning_rate": 1.0692063492063492e-05, + "loss": 0.2334, + "step": 7066 + }, + { + "epoch": 40.38285714285714, + "grad_norm": 67.59822845458984, + "learning_rate": 1.0685714285714286e-05, + "loss": 0.1676, + "step": 7067 + }, + { + "epoch": 40.38857142857143, + "grad_norm": 41.171634674072266, + "learning_rate": 1.0679365079365079e-05, + "loss": 0.148, + "step": 7068 + }, + { + "epoch": 40.394285714285715, + "grad_norm": 36.31204605102539, + "learning_rate": 1.0673015873015873e-05, + "loss": 0.1669, + "step": 7069 + }, + { + "epoch": 40.4, + "grad_norm": 55.22999572753906, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.1675, + "step": 7070 + }, + { + "epoch": 40.40571428571428, + "grad_norm": 109.92345428466797, + "learning_rate": 1.066031746031746e-05, + "loss": 0.2286, + "step": 7071 + }, + { + "epoch": 40.41142857142857, + "grad_norm": 87.8012466430664, + "learning_rate": 1.0653968253968254e-05, + "loss": 0.1891, + "step": 7072 + }, + { + "epoch": 40.417142857142856, + "grad_norm": 233.1424560546875, + "learning_rate": 1.0647619047619049e-05, + "loss": 0.1949, + "step": 7073 + }, + { + "epoch": 40.42285714285714, + "grad_norm": 56.26198196411133, + "learning_rate": 1.0641269841269843e-05, + "loss": 0.1869, + "step": 7074 + }, + { + "epoch": 40.42857142857143, + "grad_norm": 246.6047821044922, + "learning_rate": 1.0634920634920636e-05, + "loss": 0.1971, + "step": 7075 + }, + { + "epoch": 40.434285714285714, + "grad_norm": 36.35358428955078, + "learning_rate": 1.062857142857143e-05, + "loss": 0.2352, + "step": 7076 + }, + { + "epoch": 40.44, + "grad_norm": 17.073652267456055, + "learning_rate": 1.0622222222222223e-05, + "loss": 0.1667, + "step": 7077 + }, + { + "epoch": 40.44571428571429, + "grad_norm": 34.12038040161133, + "learning_rate": 1.0615873015873017e-05, + "loss": 0.1763, + "step": 7078 + }, + { + "epoch": 40.45142857142857, + "grad_norm": 40.721702575683594, + "learning_rate": 1.060952380952381e-05, + "loss": 0.1756, + "step": 7079 + }, + { + "epoch": 40.457142857142856, + "grad_norm": 66.41804504394531, + "learning_rate": 1.0603174603174604e-05, + "loss": 0.1986, + "step": 7080 + }, + { + "epoch": 40.462857142857146, + "grad_norm": 26.351877212524414, + "learning_rate": 1.0596825396825396e-05, + "loss": 0.1862, + "step": 7081 + }, + { + "epoch": 40.46857142857143, + "grad_norm": 37.494659423828125, + "learning_rate": 1.059047619047619e-05, + "loss": 0.1328, + "step": 7082 + }, + { + "epoch": 40.47428571428571, + "grad_norm": 51.77208709716797, + "learning_rate": 1.0584126984126985e-05, + "loss": 0.1816, + "step": 7083 + }, + { + "epoch": 40.48, + "grad_norm": 91.71420288085938, + "learning_rate": 1.0577777777777778e-05, + "loss": 0.2641, + "step": 7084 + }, + { + "epoch": 40.48571428571429, + "grad_norm": 47.01348114013672, + "learning_rate": 1.0571428571428572e-05, + "loss": 0.201, + "step": 7085 + }, + { + "epoch": 40.49142857142857, + "grad_norm": 24.24553108215332, + "learning_rate": 1.0565079365079365e-05, + "loss": 0.185, + "step": 7086 + }, + { + "epoch": 40.497142857142855, + "grad_norm": 24.89585304260254, + "learning_rate": 1.0558730158730159e-05, + "loss": 0.1371, + "step": 7087 + }, + { + "epoch": 40.502857142857145, + "grad_norm": 67.30992126464844, + "learning_rate": 1.0552380952380952e-05, + "loss": 0.1728, + "step": 7088 + }, + { + "epoch": 40.50857142857143, + "grad_norm": 295.7307434082031, + "learning_rate": 1.0546031746031746e-05, + "loss": 0.1639, + "step": 7089 + }, + { + "epoch": 40.51428571428571, + "grad_norm": 20.085918426513672, + "learning_rate": 1.053968253968254e-05, + "loss": 0.1722, + "step": 7090 + }, + { + "epoch": 40.52, + "grad_norm": 47.78133773803711, + "learning_rate": 1.0533333333333335e-05, + "loss": 0.15, + "step": 7091 + }, + { + "epoch": 40.52571428571429, + "grad_norm": 22.585948944091797, + "learning_rate": 1.0526984126984127e-05, + "loss": 0.1737, + "step": 7092 + }, + { + "epoch": 40.53142857142857, + "grad_norm": 20.893796920776367, + "learning_rate": 1.0520634920634922e-05, + "loss": 0.1234, + "step": 7093 + }, + { + "epoch": 40.537142857142854, + "grad_norm": 48.247596740722656, + "learning_rate": 1.0514285714285716e-05, + "loss": 0.1572, + "step": 7094 + }, + { + "epoch": 40.542857142857144, + "grad_norm": 25.091320037841797, + "learning_rate": 1.0507936507936509e-05, + "loss": 0.1978, + "step": 7095 + }, + { + "epoch": 40.54857142857143, + "grad_norm": 51.04119110107422, + "learning_rate": 1.0501587301587303e-05, + "loss": 0.1781, + "step": 7096 + }, + { + "epoch": 40.55428571428571, + "grad_norm": 42.68234634399414, + "learning_rate": 1.0495238095238096e-05, + "loss": 0.2108, + "step": 7097 + }, + { + "epoch": 40.56, + "grad_norm": 118.75373077392578, + "learning_rate": 1.048888888888889e-05, + "loss": 0.1533, + "step": 7098 + }, + { + "epoch": 40.565714285714286, + "grad_norm": 68.9887924194336, + "learning_rate": 1.0482539682539683e-05, + "loss": 0.2354, + "step": 7099 + }, + { + "epoch": 40.57142857142857, + "grad_norm": 30.95083236694336, + "learning_rate": 1.0476190476190477e-05, + "loss": 0.239, + "step": 7100 + }, + { + "epoch": 40.57714285714286, + "grad_norm": 48.973960876464844, + "learning_rate": 1.046984126984127e-05, + "loss": 0.2646, + "step": 7101 + }, + { + "epoch": 40.582857142857144, + "grad_norm": 216.03866577148438, + "learning_rate": 1.0463492063492064e-05, + "loss": 0.2001, + "step": 7102 + }, + { + "epoch": 40.58857142857143, + "grad_norm": 51.790870666503906, + "learning_rate": 1.0457142857142856e-05, + "loss": 0.2352, + "step": 7103 + }, + { + "epoch": 40.59428571428572, + "grad_norm": 26.630159378051758, + "learning_rate": 1.045079365079365e-05, + "loss": 0.1987, + "step": 7104 + }, + { + "epoch": 40.6, + "grad_norm": 51.316497802734375, + "learning_rate": 1.0444444444444445e-05, + "loss": 0.1359, + "step": 7105 + }, + { + "epoch": 40.605714285714285, + "grad_norm": 53.45886993408203, + "learning_rate": 1.0438095238095238e-05, + "loss": 0.1916, + "step": 7106 + }, + { + "epoch": 40.61142857142857, + "grad_norm": 28.972932815551758, + "learning_rate": 1.0431746031746032e-05, + "loss": 0.2482, + "step": 7107 + }, + { + "epoch": 40.61714285714286, + "grad_norm": 112.40228271484375, + "learning_rate": 1.0425396825396826e-05, + "loss": 0.2062, + "step": 7108 + }, + { + "epoch": 40.62285714285714, + "grad_norm": 35.6661376953125, + "learning_rate": 1.041904761904762e-05, + "loss": 0.1567, + "step": 7109 + }, + { + "epoch": 40.628571428571426, + "grad_norm": 47.076473236083984, + "learning_rate": 1.0412698412698413e-05, + "loss": 0.1987, + "step": 7110 + }, + { + "epoch": 40.63428571428572, + "grad_norm": 27.69927978515625, + "learning_rate": 1.0406349206349208e-05, + "loss": 0.1421, + "step": 7111 + }, + { + "epoch": 40.64, + "grad_norm": 41.67701721191406, + "learning_rate": 1.04e-05, + "loss": 0.1456, + "step": 7112 + }, + { + "epoch": 40.645714285714284, + "grad_norm": 168.35186767578125, + "learning_rate": 1.0393650793650795e-05, + "loss": 0.2464, + "step": 7113 + }, + { + "epoch": 40.651428571428575, + "grad_norm": 60.49980545043945, + "learning_rate": 1.0387301587301587e-05, + "loss": 0.1441, + "step": 7114 + }, + { + "epoch": 40.65714285714286, + "grad_norm": 59.31900405883789, + "learning_rate": 1.0380952380952382e-05, + "loss": 0.1682, + "step": 7115 + }, + { + "epoch": 40.66285714285714, + "grad_norm": 41.835601806640625, + "learning_rate": 1.0374603174603176e-05, + "loss": 0.1564, + "step": 7116 + }, + { + "epoch": 40.668571428571425, + "grad_norm": 51.27973937988281, + "learning_rate": 1.0368253968253969e-05, + "loss": 0.1359, + "step": 7117 + }, + { + "epoch": 40.674285714285716, + "grad_norm": 39.31769943237305, + "learning_rate": 1.0361904761904763e-05, + "loss": 0.168, + "step": 7118 + }, + { + "epoch": 40.68, + "grad_norm": 56.311710357666016, + "learning_rate": 1.0355555555555556e-05, + "loss": 0.1511, + "step": 7119 + }, + { + "epoch": 40.68571428571428, + "grad_norm": 49.366722106933594, + "learning_rate": 1.034920634920635e-05, + "loss": 0.1828, + "step": 7120 + }, + { + "epoch": 40.691428571428574, + "grad_norm": 32.207176208496094, + "learning_rate": 1.0342857142857143e-05, + "loss": 0.1443, + "step": 7121 + }, + { + "epoch": 40.69714285714286, + "grad_norm": 57.174530029296875, + "learning_rate": 1.0336507936507937e-05, + "loss": 0.2462, + "step": 7122 + }, + { + "epoch": 40.70285714285714, + "grad_norm": 33.00163650512695, + "learning_rate": 1.033015873015873e-05, + "loss": 0.1486, + "step": 7123 + }, + { + "epoch": 40.70857142857143, + "grad_norm": 54.939388275146484, + "learning_rate": 1.0323809523809524e-05, + "loss": 0.1501, + "step": 7124 + }, + { + "epoch": 40.714285714285715, + "grad_norm": 47.95927429199219, + "learning_rate": 1.0317460317460318e-05, + "loss": 0.1341, + "step": 7125 + }, + { + "epoch": 40.72, + "grad_norm": 42.815513610839844, + "learning_rate": 1.031111111111111e-05, + "loss": 0.133, + "step": 7126 + }, + { + "epoch": 40.72571428571428, + "grad_norm": 41.889278411865234, + "learning_rate": 1.0304761904761905e-05, + "loss": 0.2395, + "step": 7127 + }, + { + "epoch": 40.73142857142857, + "grad_norm": 27.716449737548828, + "learning_rate": 1.02984126984127e-05, + "loss": 0.1674, + "step": 7128 + }, + { + "epoch": 40.73714285714286, + "grad_norm": 26.745899200439453, + "learning_rate": 1.0292063492063494e-05, + "loss": 0.151, + "step": 7129 + }, + { + "epoch": 40.74285714285714, + "grad_norm": 55.21826171875, + "learning_rate": 1.0285714285714286e-05, + "loss": 0.1403, + "step": 7130 + }, + { + "epoch": 40.74857142857143, + "grad_norm": 23.59458351135254, + "learning_rate": 1.027936507936508e-05, + "loss": 0.1576, + "step": 7131 + }, + { + "epoch": 40.754285714285714, + "grad_norm": 33.98942947387695, + "learning_rate": 1.0273015873015873e-05, + "loss": 0.2031, + "step": 7132 + }, + { + "epoch": 40.76, + "grad_norm": 57.35104751586914, + "learning_rate": 1.0266666666666668e-05, + "loss": 0.1822, + "step": 7133 + }, + { + "epoch": 40.76571428571429, + "grad_norm": 34.38922882080078, + "learning_rate": 1.026031746031746e-05, + "loss": 0.1983, + "step": 7134 + }, + { + "epoch": 40.77142857142857, + "grad_norm": 65.87013244628906, + "learning_rate": 1.0253968253968255e-05, + "loss": 0.2299, + "step": 7135 + }, + { + "epoch": 40.777142857142856, + "grad_norm": 25.383821487426758, + "learning_rate": 1.0247619047619047e-05, + "loss": 0.148, + "step": 7136 + }, + { + "epoch": 40.78285714285714, + "grad_norm": 181.95584106445312, + "learning_rate": 1.0241269841269842e-05, + "loss": 0.1647, + "step": 7137 + }, + { + "epoch": 40.78857142857143, + "grad_norm": 83.68648529052734, + "learning_rate": 1.0234920634920634e-05, + "loss": 0.3434, + "step": 7138 + }, + { + "epoch": 40.794285714285714, + "grad_norm": 37.009952545166016, + "learning_rate": 1.0228571428571429e-05, + "loss": 0.2471, + "step": 7139 + }, + { + "epoch": 40.8, + "grad_norm": 25.347917556762695, + "learning_rate": 1.0222222222222223e-05, + "loss": 0.1191, + "step": 7140 + }, + { + "epoch": 40.80571428571429, + "grad_norm": 29.95460319519043, + "learning_rate": 1.0215873015873016e-05, + "loss": 0.1561, + "step": 7141 + }, + { + "epoch": 40.81142857142857, + "grad_norm": 62.166805267333984, + "learning_rate": 1.020952380952381e-05, + "loss": 0.1845, + "step": 7142 + }, + { + "epoch": 40.817142857142855, + "grad_norm": 29.537752151489258, + "learning_rate": 1.0203174603174603e-05, + "loss": 0.1622, + "step": 7143 + }, + { + "epoch": 40.822857142857146, + "grad_norm": 45.15113067626953, + "learning_rate": 1.0196825396825397e-05, + "loss": 0.1796, + "step": 7144 + }, + { + "epoch": 40.82857142857143, + "grad_norm": 58.65167999267578, + "learning_rate": 1.0190476190476191e-05, + "loss": 0.2089, + "step": 7145 + }, + { + "epoch": 40.83428571428571, + "grad_norm": 17.80611228942871, + "learning_rate": 1.0184126984126986e-05, + "loss": 0.1626, + "step": 7146 + }, + { + "epoch": 40.84, + "grad_norm": 37.232444763183594, + "learning_rate": 1.0177777777777778e-05, + "loss": 0.1372, + "step": 7147 + }, + { + "epoch": 40.84571428571429, + "grad_norm": 30.31142234802246, + "learning_rate": 1.0171428571428573e-05, + "loss": 0.1886, + "step": 7148 + }, + { + "epoch": 40.85142857142857, + "grad_norm": 47.255409240722656, + "learning_rate": 1.0165079365079365e-05, + "loss": 0.1589, + "step": 7149 + }, + { + "epoch": 40.857142857142854, + "grad_norm": 63.91340255737305, + "learning_rate": 1.015873015873016e-05, + "loss": 0.2037, + "step": 7150 + }, + { + "epoch": 40.862857142857145, + "grad_norm": 17.828096389770508, + "learning_rate": 1.0152380952380954e-05, + "loss": 0.1569, + "step": 7151 + }, + { + "epoch": 40.86857142857143, + "grad_norm": 27.19562530517578, + "learning_rate": 1.0146031746031746e-05, + "loss": 0.2451, + "step": 7152 + }, + { + "epoch": 40.87428571428571, + "grad_norm": 43.03912353515625, + "learning_rate": 1.013968253968254e-05, + "loss": 0.1407, + "step": 7153 + }, + { + "epoch": 40.88, + "grad_norm": 33.057586669921875, + "learning_rate": 1.0133333333333333e-05, + "loss": 0.1286, + "step": 7154 + }, + { + "epoch": 40.885714285714286, + "grad_norm": 183.0701904296875, + "learning_rate": 1.0126984126984128e-05, + "loss": 0.1672, + "step": 7155 + }, + { + "epoch": 40.89142857142857, + "grad_norm": 42.93004608154297, + "learning_rate": 1.012063492063492e-05, + "loss": 0.1786, + "step": 7156 + }, + { + "epoch": 40.89714285714286, + "grad_norm": 991.3026123046875, + "learning_rate": 1.0114285714285715e-05, + "loss": 0.1998, + "step": 7157 + }, + { + "epoch": 40.902857142857144, + "grad_norm": 42.53242492675781, + "learning_rate": 1.0107936507936507e-05, + "loss": 0.1536, + "step": 7158 + }, + { + "epoch": 40.90857142857143, + "grad_norm": 32.176368713378906, + "learning_rate": 1.0101587301587302e-05, + "loss": 0.1624, + "step": 7159 + }, + { + "epoch": 40.91428571428571, + "grad_norm": 99.18122100830078, + "learning_rate": 1.0095238095238094e-05, + "loss": 0.1218, + "step": 7160 + }, + { + "epoch": 40.92, + "grad_norm": 44.048099517822266, + "learning_rate": 1.0088888888888889e-05, + "loss": 0.1807, + "step": 7161 + }, + { + "epoch": 40.925714285714285, + "grad_norm": 31.111604690551758, + "learning_rate": 1.0082539682539683e-05, + "loss": 0.1413, + "step": 7162 + }, + { + "epoch": 40.93142857142857, + "grad_norm": 63.4465217590332, + "learning_rate": 1.0076190476190477e-05, + "loss": 0.1993, + "step": 7163 + }, + { + "epoch": 40.93714285714286, + "grad_norm": 42.592750549316406, + "learning_rate": 1.0069841269841272e-05, + "loss": 0.2017, + "step": 7164 + }, + { + "epoch": 40.94285714285714, + "grad_norm": 24.540151596069336, + "learning_rate": 1.0063492063492064e-05, + "loss": 0.1304, + "step": 7165 + }, + { + "epoch": 40.94857142857143, + "grad_norm": 72.01651000976562, + "learning_rate": 1.0057142857142859e-05, + "loss": 0.3004, + "step": 7166 + }, + { + "epoch": 40.95428571428572, + "grad_norm": 35.35070037841797, + "learning_rate": 1.0050793650793651e-05, + "loss": 0.1546, + "step": 7167 + }, + { + "epoch": 40.96, + "grad_norm": 44.21571350097656, + "learning_rate": 1.0044444444444446e-05, + "loss": 0.3235, + "step": 7168 + }, + { + "epoch": 40.965714285714284, + "grad_norm": 32.9054069519043, + "learning_rate": 1.0038095238095238e-05, + "loss": 0.153, + "step": 7169 + }, + { + "epoch": 40.97142857142857, + "grad_norm": 26.192230224609375, + "learning_rate": 1.0031746031746033e-05, + "loss": 0.1832, + "step": 7170 + }, + { + "epoch": 40.97714285714286, + "grad_norm": 46.06916427612305, + "learning_rate": 1.0025396825396825e-05, + "loss": 0.1509, + "step": 7171 + }, + { + "epoch": 40.98285714285714, + "grad_norm": 40.123329162597656, + "learning_rate": 1.001904761904762e-05, + "loss": 0.1606, + "step": 7172 + }, + { + "epoch": 40.988571428571426, + "grad_norm": 37.24240493774414, + "learning_rate": 1.0012698412698414e-05, + "loss": 0.1792, + "step": 7173 + }, + { + "epoch": 40.994285714285716, + "grad_norm": 50.49718475341797, + "learning_rate": 1.0006349206349206e-05, + "loss": 0.15, + "step": 7174 + }, + { + "epoch": 41.0, + "grad_norm": 25.841310501098633, + "learning_rate": 1e-05, + "loss": 0.1727, + "step": 7175 + }, + { + "epoch": 41.0, + "eval_classes": 0, + "eval_loss": 0.5730687379837036, + "eval_map": 0.9366, + "eval_map_50": 0.9716, + "eval_map_75": 0.9644, + "eval_map_large": 0.9366, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9366, + "eval_map_small": -1.0, + "eval_mar_1": 0.7889, + "eval_mar_10": 0.9762, + "eval_mar_100": 0.9775, + "eval_mar_100_per_class": 0.9775, + "eval_mar_large": 0.9775, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.554, + "eval_samples_per_second": 21.691, + "eval_steps_per_second": 2.73, + "step": 7175 + }, + { + "epoch": 41.005714285714284, + "grad_norm": 38.025718688964844, + "learning_rate": 9.993650793650793e-06, + "loss": 0.1689, + "step": 7176 + }, + { + "epoch": 41.011428571428574, + "grad_norm": 110.06847381591797, + "learning_rate": 9.987301587301588e-06, + "loss": 0.2411, + "step": 7177 + }, + { + "epoch": 41.01714285714286, + "grad_norm": 68.49540710449219, + "learning_rate": 9.98095238095238e-06, + "loss": 0.1803, + "step": 7178 + }, + { + "epoch": 41.02285714285714, + "grad_norm": 22.572601318359375, + "learning_rate": 9.974603174603175e-06, + "loss": 0.151, + "step": 7179 + }, + { + "epoch": 41.02857142857143, + "grad_norm": 35.90503692626953, + "learning_rate": 9.968253968253969e-06, + "loss": 0.1583, + "step": 7180 + }, + { + "epoch": 41.034285714285716, + "grad_norm": 54.24342346191406, + "learning_rate": 9.961904761904763e-06, + "loss": 0.1196, + "step": 7181 + }, + { + "epoch": 41.04, + "grad_norm": 26.808170318603516, + "learning_rate": 9.955555555555556e-06, + "loss": 0.1653, + "step": 7182 + }, + { + "epoch": 41.04571428571428, + "grad_norm": 26.27927017211914, + "learning_rate": 9.94920634920635e-06, + "loss": 0.205, + "step": 7183 + }, + { + "epoch": 41.05142857142857, + "grad_norm": 31.863327026367188, + "learning_rate": 9.942857142857143e-06, + "loss": 0.215, + "step": 7184 + }, + { + "epoch": 41.05714285714286, + "grad_norm": 33.81623840332031, + "learning_rate": 9.936507936507937e-06, + "loss": 0.1212, + "step": 7185 + }, + { + "epoch": 41.06285714285714, + "grad_norm": 22.21336555480957, + "learning_rate": 9.930158730158732e-06, + "loss": 0.2077, + "step": 7186 + }, + { + "epoch": 41.06857142857143, + "grad_norm": 39.59670639038086, + "learning_rate": 9.923809523809524e-06, + "loss": 0.1624, + "step": 7187 + }, + { + "epoch": 41.074285714285715, + "grad_norm": 28.05008888244629, + "learning_rate": 9.917460317460319e-06, + "loss": 0.1848, + "step": 7188 + }, + { + "epoch": 41.08, + "grad_norm": 22.544891357421875, + "learning_rate": 9.911111111111111e-06, + "loss": 0.2286, + "step": 7189 + }, + { + "epoch": 41.08571428571429, + "grad_norm": 33.423702239990234, + "learning_rate": 9.904761904761906e-06, + "loss": 0.1854, + "step": 7190 + }, + { + "epoch": 41.09142857142857, + "grad_norm": 45.96302795410156, + "learning_rate": 9.898412698412698e-06, + "loss": 0.1611, + "step": 7191 + }, + { + "epoch": 41.097142857142856, + "grad_norm": 21.249528884887695, + "learning_rate": 9.892063492063493e-06, + "loss": 0.1757, + "step": 7192 + }, + { + "epoch": 41.10285714285714, + "grad_norm": 55.92581558227539, + "learning_rate": 9.885714285714285e-06, + "loss": 0.4331, + "step": 7193 + }, + { + "epoch": 41.10857142857143, + "grad_norm": 158.06480407714844, + "learning_rate": 9.87936507936508e-06, + "loss": 0.1864, + "step": 7194 + }, + { + "epoch": 41.114285714285714, + "grad_norm": 73.18998718261719, + "learning_rate": 9.873015873015872e-06, + "loss": 0.1886, + "step": 7195 + }, + { + "epoch": 41.12, + "grad_norm": 39.14434051513672, + "learning_rate": 9.866666666666667e-06, + "loss": 0.1682, + "step": 7196 + }, + { + "epoch": 41.12571428571429, + "grad_norm": 169.47976684570312, + "learning_rate": 9.86031746031746e-06, + "loss": 0.1514, + "step": 7197 + }, + { + "epoch": 41.13142857142857, + "grad_norm": 55.46110153198242, + "learning_rate": 9.853968253968253e-06, + "loss": 0.1567, + "step": 7198 + }, + { + "epoch": 41.137142857142855, + "grad_norm": 23.1102294921875, + "learning_rate": 9.847619047619048e-06, + "loss": 0.1469, + "step": 7199 + }, + { + "epoch": 41.142857142857146, + "grad_norm": 65.3264389038086, + "learning_rate": 9.841269841269842e-06, + "loss": 0.2098, + "step": 7200 + }, + { + "epoch": 41.14857142857143, + "grad_norm": 18.760353088378906, + "learning_rate": 9.834920634920636e-06, + "loss": 0.1653, + "step": 7201 + }, + { + "epoch": 41.15428571428571, + "grad_norm": 55.80414581298828, + "learning_rate": 9.828571428571429e-06, + "loss": 0.1389, + "step": 7202 + }, + { + "epoch": 41.16, + "grad_norm": 39.084163665771484, + "learning_rate": 9.822222222222223e-06, + "loss": 0.1083, + "step": 7203 + }, + { + "epoch": 41.16571428571429, + "grad_norm": 29.897830963134766, + "learning_rate": 9.815873015873016e-06, + "loss": 0.183, + "step": 7204 + }, + { + "epoch": 41.17142857142857, + "grad_norm": 44.33466339111328, + "learning_rate": 9.80952380952381e-06, + "loss": 0.0939, + "step": 7205 + }, + { + "epoch": 41.177142857142854, + "grad_norm": 93.39254760742188, + "learning_rate": 9.803174603174603e-06, + "loss": 0.13, + "step": 7206 + }, + { + "epoch": 41.182857142857145, + "grad_norm": 133.07708740234375, + "learning_rate": 9.796825396825397e-06, + "loss": 0.2016, + "step": 7207 + }, + { + "epoch": 41.18857142857143, + "grad_norm": 25.82640838623047, + "learning_rate": 9.790476190476192e-06, + "loss": 0.1519, + "step": 7208 + }, + { + "epoch": 41.19428571428571, + "grad_norm": 32.13935089111328, + "learning_rate": 9.784126984126984e-06, + "loss": 0.1569, + "step": 7209 + }, + { + "epoch": 41.2, + "grad_norm": 28.959239959716797, + "learning_rate": 9.777777777777779e-06, + "loss": 0.151, + "step": 7210 + }, + { + "epoch": 41.205714285714286, + "grad_norm": 45.810340881347656, + "learning_rate": 9.771428571428571e-06, + "loss": 0.1436, + "step": 7211 + }, + { + "epoch": 41.21142857142857, + "grad_norm": 50.805850982666016, + "learning_rate": 9.765079365079366e-06, + "loss": 0.1672, + "step": 7212 + }, + { + "epoch": 41.21714285714286, + "grad_norm": 47.62202835083008, + "learning_rate": 9.758730158730158e-06, + "loss": 0.1782, + "step": 7213 + }, + { + "epoch": 41.222857142857144, + "grad_norm": 506.37432861328125, + "learning_rate": 9.752380952380953e-06, + "loss": 0.2537, + "step": 7214 + }, + { + "epoch": 41.22857142857143, + "grad_norm": 13.947271347045898, + "learning_rate": 9.746031746031745e-06, + "loss": 0.1388, + "step": 7215 + }, + { + "epoch": 41.23428571428571, + "grad_norm": 60.3114013671875, + "learning_rate": 9.73968253968254e-06, + "loss": 0.1152, + "step": 7216 + }, + { + "epoch": 41.24, + "grad_norm": 24.50984001159668, + "learning_rate": 9.733333333333334e-06, + "loss": 0.1979, + "step": 7217 + }, + { + "epoch": 41.245714285714286, + "grad_norm": 27.732975006103516, + "learning_rate": 9.726984126984128e-06, + "loss": 0.1992, + "step": 7218 + }, + { + "epoch": 41.25142857142857, + "grad_norm": 43.578125, + "learning_rate": 9.720634920634923e-06, + "loss": 0.2019, + "step": 7219 + }, + { + "epoch": 41.25714285714286, + "grad_norm": 23.549551010131836, + "learning_rate": 9.714285714285715e-06, + "loss": 0.1809, + "step": 7220 + }, + { + "epoch": 41.26285714285714, + "grad_norm": 29.41553497314453, + "learning_rate": 9.70793650793651e-06, + "loss": 0.1406, + "step": 7221 + }, + { + "epoch": 41.26857142857143, + "grad_norm": 33.58580017089844, + "learning_rate": 9.701587301587302e-06, + "loss": 0.1632, + "step": 7222 + }, + { + "epoch": 41.27428571428572, + "grad_norm": 35.227149963378906, + "learning_rate": 9.695238095238096e-06, + "loss": 0.1733, + "step": 7223 + }, + { + "epoch": 41.28, + "grad_norm": 53.736473083496094, + "learning_rate": 9.688888888888889e-06, + "loss": 0.22, + "step": 7224 + }, + { + "epoch": 41.285714285714285, + "grad_norm": 110.43754577636719, + "learning_rate": 9.682539682539683e-06, + "loss": 0.1869, + "step": 7225 + }, + { + "epoch": 41.29142857142857, + "grad_norm": 33.39349365234375, + "learning_rate": 9.676190476190476e-06, + "loss": 0.2195, + "step": 7226 + }, + { + "epoch": 41.29714285714286, + "grad_norm": 50.573699951171875, + "learning_rate": 9.66984126984127e-06, + "loss": 0.1543, + "step": 7227 + }, + { + "epoch": 41.30285714285714, + "grad_norm": 87.84054565429688, + "learning_rate": 9.663492063492063e-06, + "loss": 0.2164, + "step": 7228 + }, + { + "epoch": 41.308571428571426, + "grad_norm": 2033.5458984375, + "learning_rate": 9.657142857142857e-06, + "loss": 0.1763, + "step": 7229 + }, + { + "epoch": 41.31428571428572, + "grad_norm": 49.382286071777344, + "learning_rate": 9.650793650793652e-06, + "loss": 0.1636, + "step": 7230 + }, + { + "epoch": 41.32, + "grad_norm": 32.18121337890625, + "learning_rate": 9.644444444444444e-06, + "loss": 0.168, + "step": 7231 + }, + { + "epoch": 41.325714285714284, + "grad_norm": 63.839229583740234, + "learning_rate": 9.638095238095239e-06, + "loss": 0.1657, + "step": 7232 + }, + { + "epoch": 41.331428571428575, + "grad_norm": 36.92330551147461, + "learning_rate": 9.631746031746031e-06, + "loss": 0.1305, + "step": 7233 + }, + { + "epoch": 41.33714285714286, + "grad_norm": 57.05158996582031, + "learning_rate": 9.625396825396826e-06, + "loss": 0.1297, + "step": 7234 + }, + { + "epoch": 41.34285714285714, + "grad_norm": 39.037010192871094, + "learning_rate": 9.61904761904762e-06, + "loss": 0.1542, + "step": 7235 + }, + { + "epoch": 41.348571428571425, + "grad_norm": 33.03523635864258, + "learning_rate": 9.612698412698414e-06, + "loss": 0.1577, + "step": 7236 + }, + { + "epoch": 41.354285714285716, + "grad_norm": 21.612871170043945, + "learning_rate": 9.606349206349207e-06, + "loss": 0.156, + "step": 7237 + }, + { + "epoch": 41.36, + "grad_norm": 37.1363639831543, + "learning_rate": 9.600000000000001e-06, + "loss": 0.1448, + "step": 7238 + }, + { + "epoch": 41.36571428571428, + "grad_norm": 72.37059783935547, + "learning_rate": 9.593650793650794e-06, + "loss": 0.1823, + "step": 7239 + }, + { + "epoch": 41.371428571428574, + "grad_norm": 33.95829391479492, + "learning_rate": 9.587301587301588e-06, + "loss": 0.1539, + "step": 7240 + }, + { + "epoch": 41.37714285714286, + "grad_norm": 32.29264450073242, + "learning_rate": 9.580952380952381e-06, + "loss": 0.223, + "step": 7241 + }, + { + "epoch": 41.38285714285714, + "grad_norm": 36.21721267700195, + "learning_rate": 9.574603174603175e-06, + "loss": 0.2386, + "step": 7242 + }, + { + "epoch": 41.38857142857143, + "grad_norm": 46.11216354370117, + "learning_rate": 9.56825396825397e-06, + "loss": 0.1555, + "step": 7243 + }, + { + "epoch": 41.394285714285715, + "grad_norm": 19.189931869506836, + "learning_rate": 9.561904761904762e-06, + "loss": 0.1856, + "step": 7244 + }, + { + "epoch": 41.4, + "grad_norm": 66.90583038330078, + "learning_rate": 9.555555555555556e-06, + "loss": 0.122, + "step": 7245 + }, + { + "epoch": 41.40571428571428, + "grad_norm": 32.81113815307617, + "learning_rate": 9.549206349206349e-06, + "loss": 0.3572, + "step": 7246 + }, + { + "epoch": 41.41142857142857, + "grad_norm": 35.498680114746094, + "learning_rate": 9.542857142857143e-06, + "loss": 0.2171, + "step": 7247 + }, + { + "epoch": 41.417142857142856, + "grad_norm": 76.73021697998047, + "learning_rate": 9.536507936507936e-06, + "loss": 0.1933, + "step": 7248 + }, + { + "epoch": 41.42285714285714, + "grad_norm": 47.33732223510742, + "learning_rate": 9.53015873015873e-06, + "loss": 0.1082, + "step": 7249 + }, + { + "epoch": 41.42857142857143, + "grad_norm": 61.91738510131836, + "learning_rate": 9.523809523809523e-06, + "loss": 0.1747, + "step": 7250 + }, + { + "epoch": 41.434285714285714, + "grad_norm": 35.25819778442383, + "learning_rate": 9.517460317460317e-06, + "loss": 0.1635, + "step": 7251 + }, + { + "epoch": 41.44, + "grad_norm": 26.504154205322266, + "learning_rate": 9.511111111111112e-06, + "loss": 0.1682, + "step": 7252 + }, + { + "epoch": 41.44571428571429, + "grad_norm": 64.45111846923828, + "learning_rate": 9.504761904761906e-06, + "loss": 0.2093, + "step": 7253 + }, + { + "epoch": 41.45142857142857, + "grad_norm": 35.32815933227539, + "learning_rate": 9.498412698412699e-06, + "loss": 0.1466, + "step": 7254 + }, + { + "epoch": 41.457142857142856, + "grad_norm": 29.58865737915039, + "learning_rate": 9.492063492063493e-06, + "loss": 0.163, + "step": 7255 + }, + { + "epoch": 41.462857142857146, + "grad_norm": 23.193740844726562, + "learning_rate": 9.485714285714287e-06, + "loss": 0.2038, + "step": 7256 + }, + { + "epoch": 41.46857142857143, + "grad_norm": 37.13410949707031, + "learning_rate": 9.47936507936508e-06, + "loss": 0.1304, + "step": 7257 + }, + { + "epoch": 41.47428571428571, + "grad_norm": 44.46181106567383, + "learning_rate": 9.473015873015874e-06, + "loss": 0.172, + "step": 7258 + }, + { + "epoch": 41.48, + "grad_norm": 29.92389678955078, + "learning_rate": 9.466666666666667e-06, + "loss": 0.1343, + "step": 7259 + }, + { + "epoch": 41.48571428571429, + "grad_norm": 64.4034194946289, + "learning_rate": 9.460317460317461e-06, + "loss": 0.2648, + "step": 7260 + }, + { + "epoch": 41.49142857142857, + "grad_norm": 26.941253662109375, + "learning_rate": 9.453968253968254e-06, + "loss": 0.1634, + "step": 7261 + }, + { + "epoch": 41.497142857142855, + "grad_norm": 49.134525299072266, + "learning_rate": 9.447619047619048e-06, + "loss": 0.1707, + "step": 7262 + }, + { + "epoch": 41.502857142857145, + "grad_norm": 341.13677978515625, + "learning_rate": 9.441269841269841e-06, + "loss": 0.1977, + "step": 7263 + }, + { + "epoch": 41.50857142857143, + "grad_norm": 53.64311981201172, + "learning_rate": 9.434920634920635e-06, + "loss": 0.1562, + "step": 7264 + }, + { + "epoch": 41.51428571428571, + "grad_norm": 18.503982543945312, + "learning_rate": 9.42857142857143e-06, + "loss": 0.1383, + "step": 7265 + }, + { + "epoch": 41.52, + "grad_norm": 39.2716178894043, + "learning_rate": 9.422222222222222e-06, + "loss": 0.1532, + "step": 7266 + }, + { + "epoch": 41.52571428571429, + "grad_norm": 26.437915802001953, + "learning_rate": 9.415873015873017e-06, + "loss": 0.2173, + "step": 7267 + }, + { + "epoch": 41.53142857142857, + "grad_norm": 22.738313674926758, + "learning_rate": 9.409523809523809e-06, + "loss": 0.1626, + "step": 7268 + }, + { + "epoch": 41.537142857142854, + "grad_norm": 81.11518859863281, + "learning_rate": 9.403174603174603e-06, + "loss": 0.1764, + "step": 7269 + }, + { + "epoch": 41.542857142857144, + "grad_norm": 41.88934326171875, + "learning_rate": 9.396825396825396e-06, + "loss": 0.1465, + "step": 7270 + }, + { + "epoch": 41.54857142857143, + "grad_norm": 53.15003204345703, + "learning_rate": 9.39047619047619e-06, + "loss": 0.1899, + "step": 7271 + }, + { + "epoch": 41.55428571428571, + "grad_norm": 253.06153869628906, + "learning_rate": 9.384126984126985e-06, + "loss": 0.208, + "step": 7272 + }, + { + "epoch": 41.56, + "grad_norm": 23.035747528076172, + "learning_rate": 9.377777777777779e-06, + "loss": 0.1603, + "step": 7273 + }, + { + "epoch": 41.565714285714286, + "grad_norm": 63.308746337890625, + "learning_rate": 9.371428571428572e-06, + "loss": 0.1591, + "step": 7274 + }, + { + "epoch": 41.57142857142857, + "grad_norm": 91.1810302734375, + "learning_rate": 9.365079365079366e-06, + "loss": 0.144, + "step": 7275 + }, + { + "epoch": 41.57714285714286, + "grad_norm": 40.969486236572266, + "learning_rate": 9.35873015873016e-06, + "loss": 0.1754, + "step": 7276 + }, + { + "epoch": 41.582857142857144, + "grad_norm": 19.191484451293945, + "learning_rate": 9.352380952380953e-06, + "loss": 0.1415, + "step": 7277 + }, + { + "epoch": 41.58857142857143, + "grad_norm": 53.4935188293457, + "learning_rate": 9.346031746031747e-06, + "loss": 0.2566, + "step": 7278 + }, + { + "epoch": 41.59428571428572, + "grad_norm": 59.00881576538086, + "learning_rate": 9.33968253968254e-06, + "loss": 0.2594, + "step": 7279 + }, + { + "epoch": 41.6, + "grad_norm": 25.847272872924805, + "learning_rate": 9.333333333333334e-06, + "loss": 0.1878, + "step": 7280 + }, + { + "epoch": 41.605714285714285, + "grad_norm": 21.484376907348633, + "learning_rate": 9.326984126984127e-06, + "loss": 0.1395, + "step": 7281 + }, + { + "epoch": 41.61142857142857, + "grad_norm": 50.11928176879883, + "learning_rate": 9.320634920634921e-06, + "loss": 0.2086, + "step": 7282 + }, + { + "epoch": 41.61714285714286, + "grad_norm": 69.43877410888672, + "learning_rate": 9.314285714285714e-06, + "loss": 0.1617, + "step": 7283 + }, + { + "epoch": 41.62285714285714, + "grad_norm": 61.6375732421875, + "learning_rate": 9.307936507936508e-06, + "loss": 0.1607, + "step": 7284 + }, + { + "epoch": 41.628571428571426, + "grad_norm": 43.103084564208984, + "learning_rate": 9.301587301587301e-06, + "loss": 0.134, + "step": 7285 + }, + { + "epoch": 41.63428571428572, + "grad_norm": 46.29750061035156, + "learning_rate": 9.295238095238095e-06, + "loss": 0.2005, + "step": 7286 + }, + { + "epoch": 41.64, + "grad_norm": 33.02797317504883, + "learning_rate": 9.288888888888888e-06, + "loss": 0.1527, + "step": 7287 + }, + { + "epoch": 41.645714285714284, + "grad_norm": 73.48651123046875, + "learning_rate": 9.282539682539682e-06, + "loss": 0.1214, + "step": 7288 + }, + { + "epoch": 41.651428571428575, + "grad_norm": 46.53531265258789, + "learning_rate": 9.276190476190477e-06, + "loss": 0.1913, + "step": 7289 + }, + { + "epoch": 41.65714285714286, + "grad_norm": 55.551116943359375, + "learning_rate": 9.26984126984127e-06, + "loss": 0.1345, + "step": 7290 + }, + { + "epoch": 41.66285714285714, + "grad_norm": 72.10269927978516, + "learning_rate": 9.263492063492065e-06, + "loss": 0.1673, + "step": 7291 + }, + { + "epoch": 41.668571428571425, + "grad_norm": 47.2354850769043, + "learning_rate": 9.257142857142858e-06, + "loss": 0.2478, + "step": 7292 + }, + { + "epoch": 41.674285714285716, + "grad_norm": 27.38324546813965, + "learning_rate": 9.250793650793652e-06, + "loss": 0.1887, + "step": 7293 + }, + { + "epoch": 41.68, + "grad_norm": 70.26866912841797, + "learning_rate": 9.244444444444445e-06, + "loss": 0.2682, + "step": 7294 + }, + { + "epoch": 41.68571428571428, + "grad_norm": 54.18074417114258, + "learning_rate": 9.238095238095239e-06, + "loss": 0.3105, + "step": 7295 + }, + { + "epoch": 41.691428571428574, + "grad_norm": 162.82015991210938, + "learning_rate": 9.231746031746032e-06, + "loss": 0.1445, + "step": 7296 + }, + { + "epoch": 41.69714285714286, + "grad_norm": 63.81920623779297, + "learning_rate": 9.225396825396826e-06, + "loss": 0.189, + "step": 7297 + }, + { + "epoch": 41.70285714285714, + "grad_norm": 32.952274322509766, + "learning_rate": 9.219047619047619e-06, + "loss": 0.172, + "step": 7298 + }, + { + "epoch": 41.70857142857143, + "grad_norm": 37.01651382446289, + "learning_rate": 9.212698412698413e-06, + "loss": 0.1575, + "step": 7299 + }, + { + "epoch": 41.714285714285715, + "grad_norm": 66.97454833984375, + "learning_rate": 9.206349206349207e-06, + "loss": 0.1425, + "step": 7300 + }, + { + "epoch": 41.72, + "grad_norm": 47.040809631347656, + "learning_rate": 9.2e-06, + "loss": 0.1384, + "step": 7301 + }, + { + "epoch": 41.72571428571428, + "grad_norm": 82.4026870727539, + "learning_rate": 9.193650793650794e-06, + "loss": 0.301, + "step": 7302 + }, + { + "epoch": 41.73142857142857, + "grad_norm": 39.067626953125, + "learning_rate": 9.187301587301587e-06, + "loss": 0.2164, + "step": 7303 + }, + { + "epoch": 41.73714285714286, + "grad_norm": 306.2405090332031, + "learning_rate": 9.180952380952381e-06, + "loss": 0.1345, + "step": 7304 + }, + { + "epoch": 41.74285714285714, + "grad_norm": 29.726070404052734, + "learning_rate": 9.174603174603174e-06, + "loss": 0.1596, + "step": 7305 + }, + { + "epoch": 41.74857142857143, + "grad_norm": 62.11240768432617, + "learning_rate": 9.168253968253968e-06, + "loss": 0.1669, + "step": 7306 + }, + { + "epoch": 41.754285714285714, + "grad_norm": 372.9456787109375, + "learning_rate": 9.161904761904763e-06, + "loss": 0.1711, + "step": 7307 + }, + { + "epoch": 41.76, + "grad_norm": 378.95269775390625, + "learning_rate": 9.155555555555557e-06, + "loss": 0.1879, + "step": 7308 + }, + { + "epoch": 41.76571428571429, + "grad_norm": 33.18974685668945, + "learning_rate": 9.14920634920635e-06, + "loss": 0.1627, + "step": 7309 + }, + { + "epoch": 41.77142857142857, + "grad_norm": 72.96440124511719, + "learning_rate": 9.142857142857144e-06, + "loss": 0.128, + "step": 7310 + }, + { + "epoch": 41.777142857142856, + "grad_norm": 56.02112579345703, + "learning_rate": 9.136507936507938e-06, + "loss": 0.1993, + "step": 7311 + }, + { + "epoch": 41.78285714285714, + "grad_norm": 47.467979431152344, + "learning_rate": 9.130158730158731e-06, + "loss": 0.1318, + "step": 7312 + }, + { + "epoch": 41.78857142857143, + "grad_norm": 83.64862823486328, + "learning_rate": 9.123809523809525e-06, + "loss": 0.2166, + "step": 7313 + }, + { + "epoch": 41.794285714285714, + "grad_norm": 97.51394653320312, + "learning_rate": 9.117460317460318e-06, + "loss": 0.1289, + "step": 7314 + }, + { + "epoch": 41.8, + "grad_norm": 37.590824127197266, + "learning_rate": 9.111111111111112e-06, + "loss": 0.2469, + "step": 7315 + }, + { + "epoch": 41.80571428571429, + "grad_norm": 56.855812072753906, + "learning_rate": 9.104761904761905e-06, + "loss": 0.1568, + "step": 7316 + }, + { + "epoch": 41.81142857142857, + "grad_norm": 60.23370361328125, + "learning_rate": 9.098412698412699e-06, + "loss": 0.1729, + "step": 7317 + }, + { + "epoch": 41.817142857142855, + "grad_norm": 49.858306884765625, + "learning_rate": 9.092063492063492e-06, + "loss": 0.1847, + "step": 7318 + }, + { + "epoch": 41.822857142857146, + "grad_norm": 54.69880294799805, + "learning_rate": 9.085714285714286e-06, + "loss": 0.1988, + "step": 7319 + }, + { + "epoch": 41.82857142857143, + "grad_norm": 59.97076416015625, + "learning_rate": 9.079365079365079e-06, + "loss": 0.1602, + "step": 7320 + }, + { + "epoch": 41.83428571428571, + "grad_norm": 47.91924285888672, + "learning_rate": 9.073015873015873e-06, + "loss": 0.1752, + "step": 7321 + }, + { + "epoch": 41.84, + "grad_norm": 59.64024353027344, + "learning_rate": 9.066666666666667e-06, + "loss": 0.1819, + "step": 7322 + }, + { + "epoch": 41.84571428571429, + "grad_norm": 101.1203384399414, + "learning_rate": 9.06031746031746e-06, + "loss": 0.2232, + "step": 7323 + }, + { + "epoch": 41.85142857142857, + "grad_norm": 30.48805046081543, + "learning_rate": 9.053968253968254e-06, + "loss": 0.1565, + "step": 7324 + }, + { + "epoch": 41.857142857142854, + "grad_norm": 74.9332504272461, + "learning_rate": 9.047619047619047e-06, + "loss": 0.149, + "step": 7325 + }, + { + "epoch": 41.862857142857145, + "grad_norm": 82.43065643310547, + "learning_rate": 9.041269841269841e-06, + "loss": 0.1471, + "step": 7326 + }, + { + "epoch": 41.86857142857143, + "grad_norm": 80.38092041015625, + "learning_rate": 9.034920634920636e-06, + "loss": 0.1401, + "step": 7327 + }, + { + "epoch": 41.87428571428571, + "grad_norm": 16.437274932861328, + "learning_rate": 9.02857142857143e-06, + "loss": 0.1996, + "step": 7328 + }, + { + "epoch": 41.88, + "grad_norm": 36.55046081542969, + "learning_rate": 9.022222222222223e-06, + "loss": 0.1489, + "step": 7329 + }, + { + "epoch": 41.885714285714286, + "grad_norm": 52.14812088012695, + "learning_rate": 9.015873015873017e-06, + "loss": 0.227, + "step": 7330 + }, + { + "epoch": 41.89142857142857, + "grad_norm": 28.186607360839844, + "learning_rate": 9.00952380952381e-06, + "loss": 0.1211, + "step": 7331 + }, + { + "epoch": 41.89714285714286, + "grad_norm": 51.75035095214844, + "learning_rate": 9.003174603174604e-06, + "loss": 0.2142, + "step": 7332 + }, + { + "epoch": 41.902857142857144, + "grad_norm": 67.82548522949219, + "learning_rate": 8.996825396825398e-06, + "loss": 0.1853, + "step": 7333 + }, + { + "epoch": 41.90857142857143, + "grad_norm": 45.53987503051758, + "learning_rate": 8.990476190476191e-06, + "loss": 0.1488, + "step": 7334 + }, + { + "epoch": 41.91428571428571, + "grad_norm": 29.2596435546875, + "learning_rate": 8.984126984126985e-06, + "loss": 0.1507, + "step": 7335 + }, + { + "epoch": 41.92, + "grad_norm": 56.77320861816406, + "learning_rate": 8.977777777777778e-06, + "loss": 0.1727, + "step": 7336 + }, + { + "epoch": 41.925714285714285, + "grad_norm": 41.84785461425781, + "learning_rate": 8.971428571428572e-06, + "loss": 0.1439, + "step": 7337 + }, + { + "epoch": 41.93142857142857, + "grad_norm": 63.32027053833008, + "learning_rate": 8.965079365079365e-06, + "loss": 0.1856, + "step": 7338 + }, + { + "epoch": 41.93714285714286, + "grad_norm": 639.3530883789062, + "learning_rate": 8.958730158730159e-06, + "loss": 0.1719, + "step": 7339 + }, + { + "epoch": 41.94285714285714, + "grad_norm": 55.293338775634766, + "learning_rate": 8.952380952380952e-06, + "loss": 0.1421, + "step": 7340 + }, + { + "epoch": 41.94857142857143, + "grad_norm": 31.818618774414062, + "learning_rate": 8.946031746031746e-06, + "loss": 0.176, + "step": 7341 + }, + { + "epoch": 41.95428571428572, + "grad_norm": 87.29776763916016, + "learning_rate": 8.939682539682539e-06, + "loss": 0.144, + "step": 7342 + }, + { + "epoch": 41.96, + "grad_norm": 16.483863830566406, + "learning_rate": 8.933333333333333e-06, + "loss": 0.1344, + "step": 7343 + }, + { + "epoch": 41.965714285714284, + "grad_norm": 48.55203628540039, + "learning_rate": 8.926984126984127e-06, + "loss": 0.1686, + "step": 7344 + }, + { + "epoch": 41.97142857142857, + "grad_norm": 33.186134338378906, + "learning_rate": 8.920634920634922e-06, + "loss": 0.1719, + "step": 7345 + }, + { + "epoch": 41.97714285714286, + "grad_norm": 66.01484680175781, + "learning_rate": 8.914285714285716e-06, + "loss": 0.1366, + "step": 7346 + }, + { + "epoch": 41.98285714285714, + "grad_norm": 48.887664794921875, + "learning_rate": 8.907936507936509e-06, + "loss": 0.1151, + "step": 7347 + }, + { + "epoch": 41.988571428571426, + "grad_norm": 68.40586853027344, + "learning_rate": 8.901587301587303e-06, + "loss": 0.15, + "step": 7348 + }, + { + "epoch": 41.994285714285716, + "grad_norm": 23.55936622619629, + "learning_rate": 8.895238095238096e-06, + "loss": 0.172, + "step": 7349 + }, + { + "epoch": 42.0, + "grad_norm": 72.13063049316406, + "learning_rate": 8.88888888888889e-06, + "loss": 0.1595, + "step": 7350 + }, + { + "epoch": 42.0, + "eval_classes": 0, + "eval_loss": 0.5726149082183838, + "eval_map": 0.9362, + "eval_map_50": 0.969, + "eval_map_75": 0.9629, + "eval_map_large": 0.9362, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9362, + "eval_map_small": -1.0, + "eval_mar_1": 0.787, + "eval_mar_10": 0.9743, + "eval_mar_100": 0.9749, + "eval_mar_100_per_class": 0.9749, + "eval_mar_large": 0.9749, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.2795, + "eval_samples_per_second": 22.139, + "eval_steps_per_second": 2.786, + "step": 7350 + }, + { + "epoch": 42.005714285714284, + "grad_norm": 58.79615020751953, + "learning_rate": 8.882539682539683e-06, + "loss": 0.1559, + "step": 7351 + }, + { + "epoch": 42.011428571428574, + "grad_norm": 56.658626556396484, + "learning_rate": 8.876190476190477e-06, + "loss": 0.1876, + "step": 7352 + }, + { + "epoch": 42.01714285714286, + "grad_norm": 24.444313049316406, + "learning_rate": 8.86984126984127e-06, + "loss": 0.1505, + "step": 7353 + }, + { + "epoch": 42.02285714285714, + "grad_norm": 83.33272552490234, + "learning_rate": 8.863492063492064e-06, + "loss": 0.1383, + "step": 7354 + }, + { + "epoch": 42.02857142857143, + "grad_norm": 38.414390563964844, + "learning_rate": 8.857142857142857e-06, + "loss": 0.1874, + "step": 7355 + }, + { + "epoch": 42.034285714285716, + "grad_norm": 108.53782653808594, + "learning_rate": 8.850793650793651e-06, + "loss": 0.2398, + "step": 7356 + }, + { + "epoch": 42.04, + "grad_norm": 49.074947357177734, + "learning_rate": 8.844444444444445e-06, + "loss": 0.1487, + "step": 7357 + }, + { + "epoch": 42.04571428571428, + "grad_norm": 29.031755447387695, + "learning_rate": 8.838095238095238e-06, + "loss": 0.1396, + "step": 7358 + }, + { + "epoch": 42.05142857142857, + "grad_norm": 42.39862823486328, + "learning_rate": 8.831746031746032e-06, + "loss": 0.2134, + "step": 7359 + }, + { + "epoch": 42.05714285714286, + "grad_norm": 14.854192733764648, + "learning_rate": 8.825396825396825e-06, + "loss": 0.0998, + "step": 7360 + }, + { + "epoch": 42.06285714285714, + "grad_norm": 25.197086334228516, + "learning_rate": 8.819047619047619e-06, + "loss": 0.233, + "step": 7361 + }, + { + "epoch": 42.06857142857143, + "grad_norm": 218.95265197753906, + "learning_rate": 8.812698412698413e-06, + "loss": 0.1632, + "step": 7362 + }, + { + "epoch": 42.074285714285715, + "grad_norm": 36.32129669189453, + "learning_rate": 8.806349206349208e-06, + "loss": 0.1277, + "step": 7363 + }, + { + "epoch": 42.08, + "grad_norm": 43.652706146240234, + "learning_rate": 8.8e-06, + "loss": 0.1239, + "step": 7364 + }, + { + "epoch": 42.08571428571429, + "grad_norm": 33.010154724121094, + "learning_rate": 8.793650793650795e-06, + "loss": 0.1407, + "step": 7365 + }, + { + "epoch": 42.09142857142857, + "grad_norm": 51.78741455078125, + "learning_rate": 8.787301587301587e-06, + "loss": 0.2169, + "step": 7366 + }, + { + "epoch": 42.097142857142856, + "grad_norm": 20.24148178100586, + "learning_rate": 8.780952380952382e-06, + "loss": 0.2252, + "step": 7367 + }, + { + "epoch": 42.10285714285714, + "grad_norm": 40.71898651123047, + "learning_rate": 8.774603174603176e-06, + "loss": 0.1663, + "step": 7368 + }, + { + "epoch": 42.10857142857143, + "grad_norm": 49.57981491088867, + "learning_rate": 8.768253968253969e-06, + "loss": 0.157, + "step": 7369 + }, + { + "epoch": 42.114285714285714, + "grad_norm": 39.95606231689453, + "learning_rate": 8.761904761904763e-06, + "loss": 0.1447, + "step": 7370 + }, + { + "epoch": 42.12, + "grad_norm": 21.526668548583984, + "learning_rate": 8.755555555555556e-06, + "loss": 0.1345, + "step": 7371 + }, + { + "epoch": 42.12571428571429, + "grad_norm": 17.036447525024414, + "learning_rate": 8.74920634920635e-06, + "loss": 0.1782, + "step": 7372 + }, + { + "epoch": 42.13142857142857, + "grad_norm": 33.06338882446289, + "learning_rate": 8.742857142857143e-06, + "loss": 0.1922, + "step": 7373 + }, + { + "epoch": 42.137142857142855, + "grad_norm": 32.230064392089844, + "learning_rate": 8.736507936507937e-06, + "loss": 0.1713, + "step": 7374 + }, + { + "epoch": 42.142857142857146, + "grad_norm": 49.66074752807617, + "learning_rate": 8.73015873015873e-06, + "loss": 0.2553, + "step": 7375 + }, + { + "epoch": 42.14857142857143, + "grad_norm": 63.30400466918945, + "learning_rate": 8.723809523809524e-06, + "loss": 0.2243, + "step": 7376 + }, + { + "epoch": 42.15428571428571, + "grad_norm": 17.84560203552246, + "learning_rate": 8.717460317460317e-06, + "loss": 0.1485, + "step": 7377 + }, + { + "epoch": 42.16, + "grad_norm": 45.32656478881836, + "learning_rate": 8.711111111111111e-06, + "loss": 0.1368, + "step": 7378 + }, + { + "epoch": 42.16571428571429, + "grad_norm": 65.4549331665039, + "learning_rate": 8.704761904761905e-06, + "loss": 0.1309, + "step": 7379 + }, + { + "epoch": 42.17142857142857, + "grad_norm": 58.42035675048828, + "learning_rate": 8.6984126984127e-06, + "loss": 0.1501, + "step": 7380 + }, + { + "epoch": 42.177142857142854, + "grad_norm": 63.2991943359375, + "learning_rate": 8.692063492063492e-06, + "loss": 0.1632, + "step": 7381 + }, + { + "epoch": 42.182857142857145, + "grad_norm": 24.08550453186035, + "learning_rate": 8.685714285714287e-06, + "loss": 0.1295, + "step": 7382 + }, + { + "epoch": 42.18857142857143, + "grad_norm": 404.84649658203125, + "learning_rate": 8.679365079365081e-06, + "loss": 0.1835, + "step": 7383 + }, + { + "epoch": 42.19428571428571, + "grad_norm": 51.992088317871094, + "learning_rate": 8.673015873015873e-06, + "loss": 0.1745, + "step": 7384 + }, + { + "epoch": 42.2, + "grad_norm": 29.288646697998047, + "learning_rate": 8.666666666666668e-06, + "loss": 0.1602, + "step": 7385 + }, + { + "epoch": 42.205714285714286, + "grad_norm": 29.5358943939209, + "learning_rate": 8.66031746031746e-06, + "loss": 0.1556, + "step": 7386 + }, + { + "epoch": 42.21142857142857, + "grad_norm": 51.27584457397461, + "learning_rate": 8.653968253968255e-06, + "loss": 0.1505, + "step": 7387 + }, + { + "epoch": 42.21714285714286, + "grad_norm": 45.10905456542969, + "learning_rate": 8.647619047619047e-06, + "loss": 0.1207, + "step": 7388 + }, + { + "epoch": 42.222857142857144, + "grad_norm": 32.930137634277344, + "learning_rate": 8.641269841269842e-06, + "loss": 0.1865, + "step": 7389 + }, + { + "epoch": 42.22857142857143, + "grad_norm": 20.60719871520996, + "learning_rate": 8.634920634920636e-06, + "loss": 0.1643, + "step": 7390 + }, + { + "epoch": 42.23428571428571, + "grad_norm": 28.81972885131836, + "learning_rate": 8.628571428571429e-06, + "loss": 0.163, + "step": 7391 + }, + { + "epoch": 42.24, + "grad_norm": 65.38088989257812, + "learning_rate": 8.622222222222223e-06, + "loss": 0.1798, + "step": 7392 + }, + { + "epoch": 42.245714285714286, + "grad_norm": 90.9728775024414, + "learning_rate": 8.615873015873016e-06, + "loss": 0.1716, + "step": 7393 + }, + { + "epoch": 42.25142857142857, + "grad_norm": 1374.4593505859375, + "learning_rate": 8.60952380952381e-06, + "loss": 0.1409, + "step": 7394 + }, + { + "epoch": 42.25714285714286, + "grad_norm": 74.26192474365234, + "learning_rate": 8.603174603174603e-06, + "loss": 0.151, + "step": 7395 + }, + { + "epoch": 42.26285714285714, + "grad_norm": 34.685264587402344, + "learning_rate": 8.596825396825397e-06, + "loss": 0.1559, + "step": 7396 + }, + { + "epoch": 42.26857142857143, + "grad_norm": 54.60033416748047, + "learning_rate": 8.59047619047619e-06, + "loss": 0.1207, + "step": 7397 + }, + { + "epoch": 42.27428571428572, + "grad_norm": 33.01666259765625, + "learning_rate": 8.584126984126984e-06, + "loss": 0.2563, + "step": 7398 + }, + { + "epoch": 42.28, + "grad_norm": 78.32510375976562, + "learning_rate": 8.577777777777778e-06, + "loss": 0.1415, + "step": 7399 + }, + { + "epoch": 42.285714285714285, + "grad_norm": 29.386106491088867, + "learning_rate": 8.571428571428573e-06, + "loss": 0.1619, + "step": 7400 + }, + { + "epoch": 42.29142857142857, + "grad_norm": 19.554582595825195, + "learning_rate": 8.565079365079365e-06, + "loss": 0.1221, + "step": 7401 + }, + { + "epoch": 42.29714285714286, + "grad_norm": 61.935787200927734, + "learning_rate": 8.55873015873016e-06, + "loss": 0.2028, + "step": 7402 + }, + { + "epoch": 42.30285714285714, + "grad_norm": 33.08884048461914, + "learning_rate": 8.552380952380954e-06, + "loss": 0.0932, + "step": 7403 + }, + { + "epoch": 42.308571428571426, + "grad_norm": 19.032835006713867, + "learning_rate": 8.546031746031747e-06, + "loss": 0.13, + "step": 7404 + }, + { + "epoch": 42.31428571428572, + "grad_norm": 28.662944793701172, + "learning_rate": 8.539682539682541e-06, + "loss": 0.1759, + "step": 7405 + }, + { + "epoch": 42.32, + "grad_norm": 30.545503616333008, + "learning_rate": 8.533333333333334e-06, + "loss": 0.1711, + "step": 7406 + }, + { + "epoch": 42.325714285714284, + "grad_norm": 97.189697265625, + "learning_rate": 8.526984126984128e-06, + "loss": 0.1324, + "step": 7407 + }, + { + "epoch": 42.331428571428575, + "grad_norm": 59.2839469909668, + "learning_rate": 8.52063492063492e-06, + "loss": 0.2409, + "step": 7408 + }, + { + "epoch": 42.33714285714286, + "grad_norm": 50.607032775878906, + "learning_rate": 8.514285714285715e-06, + "loss": 0.14, + "step": 7409 + }, + { + "epoch": 42.34285714285714, + "grad_norm": 49.94506072998047, + "learning_rate": 8.507936507936507e-06, + "loss": 0.1492, + "step": 7410 + }, + { + "epoch": 42.348571428571425, + "grad_norm": 24.435863494873047, + "learning_rate": 8.501587301587302e-06, + "loss": 0.4463, + "step": 7411 + }, + { + "epoch": 42.354285714285716, + "grad_norm": 46.557308197021484, + "learning_rate": 8.495238095238094e-06, + "loss": 0.1528, + "step": 7412 + }, + { + "epoch": 42.36, + "grad_norm": 57.091331481933594, + "learning_rate": 8.488888888888889e-06, + "loss": 0.1227, + "step": 7413 + }, + { + "epoch": 42.36571428571428, + "grad_norm": 38.18864059448242, + "learning_rate": 8.482539682539683e-06, + "loss": 0.2634, + "step": 7414 + }, + { + "epoch": 42.371428571428574, + "grad_norm": 25.98653793334961, + "learning_rate": 8.476190476190476e-06, + "loss": 0.2947, + "step": 7415 + }, + { + "epoch": 42.37714285714286, + "grad_norm": 23.898725509643555, + "learning_rate": 8.46984126984127e-06, + "loss": 0.1523, + "step": 7416 + }, + { + "epoch": 42.38285714285714, + "grad_norm": 19.86840057373047, + "learning_rate": 8.463492063492064e-06, + "loss": 0.1333, + "step": 7417 + }, + { + "epoch": 42.38857142857143, + "grad_norm": 33.621646881103516, + "learning_rate": 8.457142857142859e-06, + "loss": 0.243, + "step": 7418 + }, + { + "epoch": 42.394285714285715, + "grad_norm": 39.622520446777344, + "learning_rate": 8.450793650793651e-06, + "loss": 0.2453, + "step": 7419 + }, + { + "epoch": 42.4, + "grad_norm": 43.996707916259766, + "learning_rate": 8.444444444444446e-06, + "loss": 0.1304, + "step": 7420 + }, + { + "epoch": 42.40571428571428, + "grad_norm": 93.22525024414062, + "learning_rate": 8.438095238095238e-06, + "loss": 0.1889, + "step": 7421 + }, + { + "epoch": 42.41142857142857, + "grad_norm": 27.656633377075195, + "learning_rate": 8.431746031746033e-06, + "loss": 0.134, + "step": 7422 + }, + { + "epoch": 42.417142857142856, + "grad_norm": 118.73726654052734, + "learning_rate": 8.425396825396825e-06, + "loss": 0.1986, + "step": 7423 + }, + { + "epoch": 42.42285714285714, + "grad_norm": 47.353248596191406, + "learning_rate": 8.41904761904762e-06, + "loss": 0.1321, + "step": 7424 + }, + { + "epoch": 42.42857142857143, + "grad_norm": 26.87725830078125, + "learning_rate": 8.412698412698414e-06, + "loss": 0.1406, + "step": 7425 + }, + { + "epoch": 42.434285714285714, + "grad_norm": 124.5577392578125, + "learning_rate": 8.406349206349207e-06, + "loss": 0.2331, + "step": 7426 + }, + { + "epoch": 42.44, + "grad_norm": 20.424171447753906, + "learning_rate": 8.400000000000001e-06, + "loss": 0.1607, + "step": 7427 + }, + { + "epoch": 42.44571428571429, + "grad_norm": 82.92603302001953, + "learning_rate": 8.393650793650794e-06, + "loss": 0.1664, + "step": 7428 + }, + { + "epoch": 42.45142857142857, + "grad_norm": 53.30680847167969, + "learning_rate": 8.387301587301588e-06, + "loss": 0.1705, + "step": 7429 + }, + { + "epoch": 42.457142857142856, + "grad_norm": 22.10956573486328, + "learning_rate": 8.38095238095238e-06, + "loss": 0.2412, + "step": 7430 + }, + { + "epoch": 42.462857142857146, + "grad_norm": 27.898704528808594, + "learning_rate": 8.374603174603175e-06, + "loss": 0.1431, + "step": 7431 + }, + { + "epoch": 42.46857142857143, + "grad_norm": 61.94043731689453, + "learning_rate": 8.368253968253967e-06, + "loss": 0.1794, + "step": 7432 + }, + { + "epoch": 42.47428571428571, + "grad_norm": 32.78137969970703, + "learning_rate": 8.361904761904762e-06, + "loss": 0.1466, + "step": 7433 + }, + { + "epoch": 42.48, + "grad_norm": 28.5189266204834, + "learning_rate": 8.355555555555556e-06, + "loss": 0.2008, + "step": 7434 + }, + { + "epoch": 42.48571428571429, + "grad_norm": 55.5509147644043, + "learning_rate": 8.34920634920635e-06, + "loss": 0.1297, + "step": 7435 + }, + { + "epoch": 42.49142857142857, + "grad_norm": 56.201786041259766, + "learning_rate": 8.342857142857143e-06, + "loss": 0.2611, + "step": 7436 + }, + { + "epoch": 42.497142857142855, + "grad_norm": 1000.422607421875, + "learning_rate": 8.336507936507937e-06, + "loss": 0.1611, + "step": 7437 + }, + { + "epoch": 42.502857142857145, + "grad_norm": 19.964914321899414, + "learning_rate": 8.330158730158732e-06, + "loss": 0.2158, + "step": 7438 + }, + { + "epoch": 42.50857142857143, + "grad_norm": 60.65313720703125, + "learning_rate": 8.323809523809524e-06, + "loss": 0.1371, + "step": 7439 + }, + { + "epoch": 42.51428571428571, + "grad_norm": 63.652687072753906, + "learning_rate": 8.317460317460319e-06, + "loss": 0.1351, + "step": 7440 + }, + { + "epoch": 42.52, + "grad_norm": 54.9022331237793, + "learning_rate": 8.311111111111111e-06, + "loss": 0.0954, + "step": 7441 + }, + { + "epoch": 42.52571428571429, + "grad_norm": 55.63421630859375, + "learning_rate": 8.304761904761906e-06, + "loss": 0.1193, + "step": 7442 + }, + { + "epoch": 42.53142857142857, + "grad_norm": 29.859519958496094, + "learning_rate": 8.298412698412698e-06, + "loss": 0.1315, + "step": 7443 + }, + { + "epoch": 42.537142857142854, + "grad_norm": 68.5718765258789, + "learning_rate": 8.292063492063493e-06, + "loss": 0.1668, + "step": 7444 + }, + { + "epoch": 42.542857142857144, + "grad_norm": 40.8301887512207, + "learning_rate": 8.285714285714285e-06, + "loss": 0.1709, + "step": 7445 + }, + { + "epoch": 42.54857142857143, + "grad_norm": 28.545330047607422, + "learning_rate": 8.27936507936508e-06, + "loss": 0.1146, + "step": 7446 + }, + { + "epoch": 42.55428571428571, + "grad_norm": 1060.134033203125, + "learning_rate": 8.273015873015872e-06, + "loss": 0.1951, + "step": 7447 + }, + { + "epoch": 42.56, + "grad_norm": 53.850128173828125, + "learning_rate": 8.266666666666667e-06, + "loss": 0.1573, + "step": 7448 + }, + { + "epoch": 42.565714285714286, + "grad_norm": 17.7730655670166, + "learning_rate": 8.260317460317461e-06, + "loss": 0.2601, + "step": 7449 + }, + { + "epoch": 42.57142857142857, + "grad_norm": 25.573711395263672, + "learning_rate": 8.253968253968254e-06, + "loss": 0.1189, + "step": 7450 + }, + { + "epoch": 42.57714285714286, + "grad_norm": 42.47023391723633, + "learning_rate": 8.247619047619048e-06, + "loss": 0.1348, + "step": 7451 + }, + { + "epoch": 42.582857142857144, + "grad_norm": 27.079898834228516, + "learning_rate": 8.241269841269842e-06, + "loss": 0.1767, + "step": 7452 + }, + { + "epoch": 42.58857142857143, + "grad_norm": 53.77452087402344, + "learning_rate": 8.234920634920635e-06, + "loss": 0.1532, + "step": 7453 + }, + { + "epoch": 42.59428571428572, + "grad_norm": 55.31523132324219, + "learning_rate": 8.22857142857143e-06, + "loss": 0.1667, + "step": 7454 + }, + { + "epoch": 42.6, + "grad_norm": 43.21916580200195, + "learning_rate": 8.222222222222223e-06, + "loss": 0.164, + "step": 7455 + }, + { + "epoch": 42.605714285714285, + "grad_norm": 51.73290252685547, + "learning_rate": 8.215873015873016e-06, + "loss": 0.1625, + "step": 7456 + }, + { + "epoch": 42.61142857142857, + "grad_norm": 50.38142776489258, + "learning_rate": 8.20952380952381e-06, + "loss": 0.1448, + "step": 7457 + }, + { + "epoch": 42.61714285714286, + "grad_norm": 27.53108024597168, + "learning_rate": 8.203174603174603e-06, + "loss": 0.1938, + "step": 7458 + }, + { + "epoch": 42.62285714285714, + "grad_norm": 48.98680877685547, + "learning_rate": 8.196825396825397e-06, + "loss": 0.1132, + "step": 7459 + }, + { + "epoch": 42.628571428571426, + "grad_norm": 25.339763641357422, + "learning_rate": 8.190476190476192e-06, + "loss": 0.12, + "step": 7460 + }, + { + "epoch": 42.63428571428572, + "grad_norm": 97.11475372314453, + "learning_rate": 8.184126984126984e-06, + "loss": 0.1563, + "step": 7461 + }, + { + "epoch": 42.64, + "grad_norm": 34.36635208129883, + "learning_rate": 8.177777777777779e-06, + "loss": 0.1327, + "step": 7462 + }, + { + "epoch": 42.645714285714284, + "grad_norm": 47.332855224609375, + "learning_rate": 8.171428571428571e-06, + "loss": 0.158, + "step": 7463 + }, + { + "epoch": 42.651428571428575, + "grad_norm": 22.13446617126465, + "learning_rate": 8.165079365079366e-06, + "loss": 0.1633, + "step": 7464 + }, + { + "epoch": 42.65714285714286, + "grad_norm": 81.29454040527344, + "learning_rate": 8.158730158730158e-06, + "loss": 0.1518, + "step": 7465 + }, + { + "epoch": 42.66285714285714, + "grad_norm": 19.024784088134766, + "learning_rate": 8.152380952380953e-06, + "loss": 0.1706, + "step": 7466 + }, + { + "epoch": 42.668571428571425, + "grad_norm": 93.32360076904297, + "learning_rate": 8.146031746031745e-06, + "loss": 0.172, + "step": 7467 + }, + { + "epoch": 42.674285714285716, + "grad_norm": 44.49494934082031, + "learning_rate": 8.13968253968254e-06, + "loss": 0.1559, + "step": 7468 + }, + { + "epoch": 42.68, + "grad_norm": 57.658836364746094, + "learning_rate": 8.133333333333332e-06, + "loss": 0.1283, + "step": 7469 + }, + { + "epoch": 42.68571428571428, + "grad_norm": 22.79932403564453, + "learning_rate": 8.126984126984127e-06, + "loss": 0.1191, + "step": 7470 + }, + { + "epoch": 42.691428571428574, + "grad_norm": 67.99079895019531, + "learning_rate": 8.120634920634921e-06, + "loss": 0.19, + "step": 7471 + }, + { + "epoch": 42.69714285714286, + "grad_norm": 45.546077728271484, + "learning_rate": 8.114285714285715e-06, + "loss": 0.1761, + "step": 7472 + }, + { + "epoch": 42.70285714285714, + "grad_norm": 26.195148468017578, + "learning_rate": 8.10793650793651e-06, + "loss": 0.1524, + "step": 7473 + }, + { + "epoch": 42.70857142857143, + "grad_norm": 52.820526123046875, + "learning_rate": 8.101587301587302e-06, + "loss": 0.1547, + "step": 7474 + }, + { + "epoch": 42.714285714285715, + "grad_norm": 33.6549072265625, + "learning_rate": 8.095238095238097e-06, + "loss": 0.1609, + "step": 7475 + }, + { + "epoch": 42.72, + "grad_norm": 29.644054412841797, + "learning_rate": 8.08888888888889e-06, + "loss": 0.1765, + "step": 7476 + }, + { + "epoch": 42.72571428571428, + "grad_norm": 29.143583297729492, + "learning_rate": 8.082539682539684e-06, + "loss": 0.1616, + "step": 7477 + }, + { + "epoch": 42.73142857142857, + "grad_norm": 54.07554626464844, + "learning_rate": 8.076190476190476e-06, + "loss": 0.1606, + "step": 7478 + }, + { + "epoch": 42.73714285714286, + "grad_norm": 59.4071159362793, + "learning_rate": 8.06984126984127e-06, + "loss": 0.2025, + "step": 7479 + }, + { + "epoch": 42.74285714285714, + "grad_norm": 44.09516143798828, + "learning_rate": 8.063492063492063e-06, + "loss": 0.1097, + "step": 7480 + }, + { + "epoch": 42.74857142857143, + "grad_norm": 32.09178161621094, + "learning_rate": 8.057142857142857e-06, + "loss": 0.1291, + "step": 7481 + }, + { + "epoch": 42.754285714285714, + "grad_norm": 28.902109146118164, + "learning_rate": 8.050793650793652e-06, + "loss": 0.2254, + "step": 7482 + }, + { + "epoch": 42.76, + "grad_norm": 27.518037796020508, + "learning_rate": 8.044444444444444e-06, + "loss": 0.124, + "step": 7483 + }, + { + "epoch": 42.76571428571429, + "grad_norm": 457.9650573730469, + "learning_rate": 8.038095238095239e-06, + "loss": 0.2841, + "step": 7484 + }, + { + "epoch": 42.77142857142857, + "grad_norm": 46.90461730957031, + "learning_rate": 8.031746031746031e-06, + "loss": 0.1329, + "step": 7485 + }, + { + "epoch": 42.777142857142856, + "grad_norm": 31.557842254638672, + "learning_rate": 8.025396825396826e-06, + "loss": 0.2024, + "step": 7486 + }, + { + "epoch": 42.78285714285714, + "grad_norm": 40.40676498413086, + "learning_rate": 8.019047619047618e-06, + "loss": 0.1522, + "step": 7487 + }, + { + "epoch": 42.78857142857143, + "grad_norm": 52.519371032714844, + "learning_rate": 8.012698412698413e-06, + "loss": 0.1644, + "step": 7488 + }, + { + "epoch": 42.794285714285714, + "grad_norm": 22.291147232055664, + "learning_rate": 8.006349206349207e-06, + "loss": 0.118, + "step": 7489 + }, + { + "epoch": 42.8, + "grad_norm": 45.24946594238281, + "learning_rate": 8.000000000000001e-06, + "loss": 0.1665, + "step": 7490 + }, + { + "epoch": 42.80571428571429, + "grad_norm": 34.55263900756836, + "learning_rate": 7.993650793650794e-06, + "loss": 0.2683, + "step": 7491 + }, + { + "epoch": 42.81142857142857, + "grad_norm": 31.581363677978516, + "learning_rate": 7.987301587301588e-06, + "loss": 0.1601, + "step": 7492 + }, + { + "epoch": 42.817142857142855, + "grad_norm": 34.04935836791992, + "learning_rate": 7.980952380952383e-06, + "loss": 0.178, + "step": 7493 + }, + { + "epoch": 42.822857142857146, + "grad_norm": 84.48345947265625, + "learning_rate": 7.974603174603175e-06, + "loss": 0.1736, + "step": 7494 + }, + { + "epoch": 42.82857142857143, + "grad_norm": 22.20957374572754, + "learning_rate": 7.96825396825397e-06, + "loss": 0.1343, + "step": 7495 + }, + { + "epoch": 42.83428571428571, + "grad_norm": 31.793655395507812, + "learning_rate": 7.961904761904762e-06, + "loss": 0.1058, + "step": 7496 + }, + { + "epoch": 42.84, + "grad_norm": 20.395389556884766, + "learning_rate": 7.955555555555557e-06, + "loss": 0.2209, + "step": 7497 + }, + { + "epoch": 42.84571428571429, + "grad_norm": 39.11921691894531, + "learning_rate": 7.94920634920635e-06, + "loss": 0.1347, + "step": 7498 + }, + { + "epoch": 42.85142857142857, + "grad_norm": 70.40147399902344, + "learning_rate": 7.942857142857144e-06, + "loss": 0.1641, + "step": 7499 + }, + { + "epoch": 42.857142857142854, + "grad_norm": 35.69590377807617, + "learning_rate": 7.936507936507936e-06, + "loss": 0.201, + "step": 7500 + }, + { + "epoch": 42.862857142857145, + "grad_norm": 48.583946228027344, + "learning_rate": 7.93015873015873e-06, + "loss": 0.1983, + "step": 7501 + }, + { + "epoch": 42.86857142857143, + "grad_norm": 39.1660270690918, + "learning_rate": 7.923809523809523e-06, + "loss": 0.1265, + "step": 7502 + }, + { + "epoch": 42.87428571428571, + "grad_norm": 49.16383361816406, + "learning_rate": 7.917460317460317e-06, + "loss": 0.1961, + "step": 7503 + }, + { + "epoch": 42.88, + "grad_norm": 28.290082931518555, + "learning_rate": 7.91111111111111e-06, + "loss": 0.1965, + "step": 7504 + }, + { + "epoch": 42.885714285714286, + "grad_norm": 80.19752502441406, + "learning_rate": 7.904761904761904e-06, + "loss": 0.2386, + "step": 7505 + }, + { + "epoch": 42.89142857142857, + "grad_norm": 25.667085647583008, + "learning_rate": 7.898412698412699e-06, + "loss": 0.1357, + "step": 7506 + }, + { + "epoch": 42.89714285714286, + "grad_norm": 79.53691101074219, + "learning_rate": 7.892063492063493e-06, + "loss": 0.2207, + "step": 7507 + }, + { + "epoch": 42.902857142857144, + "grad_norm": 59.32994842529297, + "learning_rate": 7.885714285714286e-06, + "loss": 0.1417, + "step": 7508 + }, + { + "epoch": 42.90857142857143, + "grad_norm": 21.014270782470703, + "learning_rate": 7.87936507936508e-06, + "loss": 0.1568, + "step": 7509 + }, + { + "epoch": 42.91428571428571, + "grad_norm": 60.28929901123047, + "learning_rate": 7.873015873015874e-06, + "loss": 0.1168, + "step": 7510 + }, + { + "epoch": 42.92, + "grad_norm": 32.30584716796875, + "learning_rate": 7.866666666666667e-06, + "loss": 0.2005, + "step": 7511 + }, + { + "epoch": 42.925714285714285, + "grad_norm": 24.222583770751953, + "learning_rate": 7.860317460317461e-06, + "loss": 0.1403, + "step": 7512 + }, + { + "epoch": 42.93142857142857, + "grad_norm": 42.4453239440918, + "learning_rate": 7.853968253968254e-06, + "loss": 0.1229, + "step": 7513 + }, + { + "epoch": 42.93714285714286, + "grad_norm": 37.953250885009766, + "learning_rate": 7.847619047619048e-06, + "loss": 0.1573, + "step": 7514 + }, + { + "epoch": 42.94285714285714, + "grad_norm": 856.2482299804688, + "learning_rate": 7.841269841269841e-06, + "loss": 0.1855, + "step": 7515 + }, + { + "epoch": 42.94857142857143, + "grad_norm": 55.68648910522461, + "learning_rate": 7.834920634920635e-06, + "loss": 0.1351, + "step": 7516 + }, + { + "epoch": 42.95428571428572, + "grad_norm": 41.787540435791016, + "learning_rate": 7.82857142857143e-06, + "loss": 0.1239, + "step": 7517 + }, + { + "epoch": 42.96, + "grad_norm": 53.466854095458984, + "learning_rate": 7.822222222222222e-06, + "loss": 0.2125, + "step": 7518 + }, + { + "epoch": 42.965714285714284, + "grad_norm": 203.714111328125, + "learning_rate": 7.815873015873017e-06, + "loss": 0.1814, + "step": 7519 + }, + { + "epoch": 42.97142857142857, + "grad_norm": 31.365480422973633, + "learning_rate": 7.80952380952381e-06, + "loss": 0.2021, + "step": 7520 + }, + { + "epoch": 42.97714285714286, + "grad_norm": 197.8916015625, + "learning_rate": 7.803174603174604e-06, + "loss": 0.214, + "step": 7521 + }, + { + "epoch": 42.98285714285714, + "grad_norm": 29.759140014648438, + "learning_rate": 7.796825396825396e-06, + "loss": 0.1622, + "step": 7522 + }, + { + "epoch": 42.988571428571426, + "grad_norm": 61.037593841552734, + "learning_rate": 7.79047619047619e-06, + "loss": 0.1531, + "step": 7523 + }, + { + "epoch": 42.994285714285716, + "grad_norm": 39.895286560058594, + "learning_rate": 7.784126984126985e-06, + "loss": 0.1649, + "step": 7524 + }, + { + "epoch": 43.0, + "grad_norm": 46.61077880859375, + "learning_rate": 7.777777777777777e-06, + "loss": 0.1352, + "step": 7525 + }, + { + "epoch": 43.0, + "eval_classes": 0, + "eval_loss": 0.5749897956848145, + "eval_map": 0.9381, + "eval_map_50": 0.9699, + "eval_map_75": 0.9637, + "eval_map_large": 0.9382, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9381, + "eval_map_small": -1.0, + "eval_mar_1": 0.7892, + "eval_mar_10": 0.9759, + "eval_mar_100": 0.9765, + "eval_mar_100_per_class": 0.9765, + "eval_mar_large": 0.9765, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.3945, + "eval_samples_per_second": 21.949, + "eval_steps_per_second": 2.762, + "step": 7525 + }, + { + "epoch": 43.005714285714284, + "grad_norm": 82.03201293945312, + "learning_rate": 7.771428571428572e-06, + "loss": 0.1836, + "step": 7526 + }, + { + "epoch": 43.011428571428574, + "grad_norm": 435.36126708984375, + "learning_rate": 7.765079365079366e-06, + "loss": 0.2198, + "step": 7527 + }, + { + "epoch": 43.01714285714286, + "grad_norm": 42.7081413269043, + "learning_rate": 7.75873015873016e-06, + "loss": 0.1261, + "step": 7528 + }, + { + "epoch": 43.02285714285714, + "grad_norm": 46.990474700927734, + "learning_rate": 7.752380952380953e-06, + "loss": 0.1833, + "step": 7529 + }, + { + "epoch": 43.02857142857143, + "grad_norm": 39.83193588256836, + "learning_rate": 7.746031746031747e-06, + "loss": 0.123, + "step": 7530 + }, + { + "epoch": 43.034285714285716, + "grad_norm": 16.36966896057129, + "learning_rate": 7.73968253968254e-06, + "loss": 0.1705, + "step": 7531 + }, + { + "epoch": 43.04, + "grad_norm": 108.55115509033203, + "learning_rate": 7.733333333333334e-06, + "loss": 0.1844, + "step": 7532 + }, + { + "epoch": 43.04571428571428, + "grad_norm": 54.612186431884766, + "learning_rate": 7.726984126984127e-06, + "loss": 0.1496, + "step": 7533 + }, + { + "epoch": 43.05142857142857, + "grad_norm": 33.79128646850586, + "learning_rate": 7.720634920634921e-06, + "loss": 0.1736, + "step": 7534 + }, + { + "epoch": 43.05714285714286, + "grad_norm": 374.0523986816406, + "learning_rate": 7.714285714285714e-06, + "loss": 0.1488, + "step": 7535 + }, + { + "epoch": 43.06285714285714, + "grad_norm": 83.07660675048828, + "learning_rate": 7.707936507936508e-06, + "loss": 0.1235, + "step": 7536 + }, + { + "epoch": 43.06857142857143, + "grad_norm": 62.87297821044922, + "learning_rate": 7.701587301587301e-06, + "loss": 0.135, + "step": 7537 + }, + { + "epoch": 43.074285714285715, + "grad_norm": 29.516284942626953, + "learning_rate": 7.695238095238095e-06, + "loss": 0.1292, + "step": 7538 + }, + { + "epoch": 43.08, + "grad_norm": 35.36422348022461, + "learning_rate": 7.68888888888889e-06, + "loss": 0.1709, + "step": 7539 + }, + { + "epoch": 43.08571428571429, + "grad_norm": 31.686317443847656, + "learning_rate": 7.682539682539682e-06, + "loss": 0.1376, + "step": 7540 + }, + { + "epoch": 43.09142857142857, + "grad_norm": 66.57743835449219, + "learning_rate": 7.676190476190477e-06, + "loss": 0.1701, + "step": 7541 + }, + { + "epoch": 43.097142857142856, + "grad_norm": 45.713653564453125, + "learning_rate": 7.66984126984127e-06, + "loss": 0.1109, + "step": 7542 + }, + { + "epoch": 43.10285714285714, + "grad_norm": 29.52507972717285, + "learning_rate": 7.663492063492064e-06, + "loss": 0.1376, + "step": 7543 + }, + { + "epoch": 43.10857142857143, + "grad_norm": 59.936851501464844, + "learning_rate": 7.657142857142858e-06, + "loss": 0.1464, + "step": 7544 + }, + { + "epoch": 43.114285714285714, + "grad_norm": 402.4036865234375, + "learning_rate": 7.650793650793652e-06, + "loss": 0.1702, + "step": 7545 + }, + { + "epoch": 43.12, + "grad_norm": 160.90231323242188, + "learning_rate": 7.644444444444445e-06, + "loss": 0.115, + "step": 7546 + }, + { + "epoch": 43.12571428571429, + "grad_norm": 40.402618408203125, + "learning_rate": 7.63809523809524e-06, + "loss": 0.1222, + "step": 7547 + }, + { + "epoch": 43.13142857142857, + "grad_norm": 20.92841148376465, + "learning_rate": 7.631746031746032e-06, + "loss": 0.1456, + "step": 7548 + }, + { + "epoch": 43.137142857142855, + "grad_norm": 105.12482452392578, + "learning_rate": 7.625396825396826e-06, + "loss": 0.1148, + "step": 7549 + }, + { + "epoch": 43.142857142857146, + "grad_norm": 18.593313217163086, + "learning_rate": 7.6190476190476205e-06, + "loss": 0.1563, + "step": 7550 + }, + { + "epoch": 43.14857142857143, + "grad_norm": 42.624813079833984, + "learning_rate": 7.612698412698413e-06, + "loss": 0.1964, + "step": 7551 + }, + { + "epoch": 43.15428571428571, + "grad_norm": 31.275169372558594, + "learning_rate": 7.6063492063492074e-06, + "loss": 0.1141, + "step": 7552 + }, + { + "epoch": 43.16, + "grad_norm": 34.69234085083008, + "learning_rate": 7.6e-06, + "loss": 0.1878, + "step": 7553 + }, + { + "epoch": 43.16571428571429, + "grad_norm": 75.30792236328125, + "learning_rate": 7.593650793650794e-06, + "loss": 0.1781, + "step": 7554 + }, + { + "epoch": 43.17142857142857, + "grad_norm": 29.43678092956543, + "learning_rate": 7.587301587301587e-06, + "loss": 0.1592, + "step": 7555 + }, + { + "epoch": 43.177142857142854, + "grad_norm": 35.68794631958008, + "learning_rate": 7.580952380952381e-06, + "loss": 0.2717, + "step": 7556 + }, + { + "epoch": 43.182857142857145, + "grad_norm": 56.68461608886719, + "learning_rate": 7.574603174603175e-06, + "loss": 0.137, + "step": 7557 + }, + { + "epoch": 43.18857142857143, + "grad_norm": 134.8107147216797, + "learning_rate": 7.568253968253969e-06, + "loss": 0.1402, + "step": 7558 + }, + { + "epoch": 43.19428571428571, + "grad_norm": 91.69994354248047, + "learning_rate": 7.561904761904762e-06, + "loss": 0.2529, + "step": 7559 + }, + { + "epoch": 43.2, + "grad_norm": 47.072265625, + "learning_rate": 7.555555555555556e-06, + "loss": 0.1387, + "step": 7560 + }, + { + "epoch": 43.205714285714286, + "grad_norm": 25.122535705566406, + "learning_rate": 7.549206349206349e-06, + "loss": 0.1738, + "step": 7561 + }, + { + "epoch": 43.21142857142857, + "grad_norm": 26.446069717407227, + "learning_rate": 7.542857142857143e-06, + "loss": 0.144, + "step": 7562 + }, + { + "epoch": 43.21714285714286, + "grad_norm": 135.1419219970703, + "learning_rate": 7.5365079365079375e-06, + "loss": 0.2356, + "step": 7563 + }, + { + "epoch": 43.222857142857144, + "grad_norm": 19.344514846801758, + "learning_rate": 7.53015873015873e-06, + "loss": 0.1643, + "step": 7564 + }, + { + "epoch": 43.22857142857143, + "grad_norm": 20.15989875793457, + "learning_rate": 7.523809523809524e-06, + "loss": 0.1282, + "step": 7565 + }, + { + "epoch": 43.23428571428571, + "grad_norm": 22.729511260986328, + "learning_rate": 7.517460317460318e-06, + "loss": 0.1119, + "step": 7566 + }, + { + "epoch": 43.24, + "grad_norm": 50.45492172241211, + "learning_rate": 7.511111111111112e-06, + "loss": 0.1361, + "step": 7567 + }, + { + "epoch": 43.245714285714286, + "grad_norm": 28.559432983398438, + "learning_rate": 7.504761904761905e-06, + "loss": 0.1731, + "step": 7568 + }, + { + "epoch": 43.25142857142857, + "grad_norm": 286.8123474121094, + "learning_rate": 7.498412698412699e-06, + "loss": 0.1314, + "step": 7569 + }, + { + "epoch": 43.25714285714286, + "grad_norm": 33.98759841918945, + "learning_rate": 7.492063492063492e-06, + "loss": 0.1734, + "step": 7570 + }, + { + "epoch": 43.26285714285714, + "grad_norm": 21.450578689575195, + "learning_rate": 7.485714285714286e-06, + "loss": 0.1865, + "step": 7571 + }, + { + "epoch": 43.26857142857143, + "grad_norm": 11.982288360595703, + "learning_rate": 7.479365079365079e-06, + "loss": 0.2033, + "step": 7572 + }, + { + "epoch": 43.27428571428572, + "grad_norm": 38.925289154052734, + "learning_rate": 7.473015873015873e-06, + "loss": 0.1453, + "step": 7573 + }, + { + "epoch": 43.28, + "grad_norm": 22.259342193603516, + "learning_rate": 7.4666666666666675e-06, + "loss": 0.1331, + "step": 7574 + }, + { + "epoch": 43.285714285714285, + "grad_norm": 45.1170539855957, + "learning_rate": 7.460317460317461e-06, + "loss": 0.469, + "step": 7575 + }, + { + "epoch": 43.29142857142857, + "grad_norm": 90.38233947753906, + "learning_rate": 7.4539682539682544e-06, + "loss": 0.148, + "step": 7576 + }, + { + "epoch": 43.29714285714286, + "grad_norm": 31.033479690551758, + "learning_rate": 7.447619047619048e-06, + "loss": 0.1868, + "step": 7577 + }, + { + "epoch": 43.30285714285714, + "grad_norm": 89.41876983642578, + "learning_rate": 7.441269841269842e-06, + "loss": 0.1399, + "step": 7578 + }, + { + "epoch": 43.308571428571426, + "grad_norm": 66.96041107177734, + "learning_rate": 7.434920634920635e-06, + "loss": 0.1686, + "step": 7579 + }, + { + "epoch": 43.31428571428572, + "grad_norm": 70.08899688720703, + "learning_rate": 7.428571428571429e-06, + "loss": 0.1374, + "step": 7580 + }, + { + "epoch": 43.32, + "grad_norm": 18.745100021362305, + "learning_rate": 7.422222222222222e-06, + "loss": 0.1762, + "step": 7581 + }, + { + "epoch": 43.325714285714284, + "grad_norm": 54.25947570800781, + "learning_rate": 7.415873015873016e-06, + "loss": 0.139, + "step": 7582 + }, + { + "epoch": 43.331428571428575, + "grad_norm": 79.05216979980469, + "learning_rate": 7.40952380952381e-06, + "loss": 0.1897, + "step": 7583 + }, + { + "epoch": 43.33714285714286, + "grad_norm": 31.019147872924805, + "learning_rate": 7.403174603174603e-06, + "loss": 0.1648, + "step": 7584 + }, + { + "epoch": 43.34285714285714, + "grad_norm": 16.248050689697266, + "learning_rate": 7.3968253968253975e-06, + "loss": 0.1553, + "step": 7585 + }, + { + "epoch": 43.348571428571425, + "grad_norm": 37.0955924987793, + "learning_rate": 7.390476190476191e-06, + "loss": 0.1316, + "step": 7586 + }, + { + "epoch": 43.354285714285716, + "grad_norm": 22.13149070739746, + "learning_rate": 7.384126984126985e-06, + "loss": 0.2298, + "step": 7587 + }, + { + "epoch": 43.36, + "grad_norm": 34.06486892700195, + "learning_rate": 7.377777777777778e-06, + "loss": 0.1287, + "step": 7588 + }, + { + "epoch": 43.36571428571428, + "grad_norm": 77.93970489501953, + "learning_rate": 7.371428571428572e-06, + "loss": 0.1197, + "step": 7589 + }, + { + "epoch": 43.371428571428574, + "grad_norm": 62.190792083740234, + "learning_rate": 7.365079365079365e-06, + "loss": 0.1972, + "step": 7590 + }, + { + "epoch": 43.37714285714286, + "grad_norm": 42.25458526611328, + "learning_rate": 7.358730158730159e-06, + "loss": 0.1302, + "step": 7591 + }, + { + "epoch": 43.38285714285714, + "grad_norm": 27.4211483001709, + "learning_rate": 7.352380952380952e-06, + "loss": 0.1605, + "step": 7592 + }, + { + "epoch": 43.38857142857143, + "grad_norm": 35.7080078125, + "learning_rate": 7.346031746031746e-06, + "loss": 0.1723, + "step": 7593 + }, + { + "epoch": 43.394285714285715, + "grad_norm": 19.61418914794922, + "learning_rate": 7.33968253968254e-06, + "loss": 0.332, + "step": 7594 + }, + { + "epoch": 43.4, + "grad_norm": 44.07203674316406, + "learning_rate": 7.333333333333334e-06, + "loss": 0.1577, + "step": 7595 + }, + { + "epoch": 43.40571428571428, + "grad_norm": 24.050046920776367, + "learning_rate": 7.326984126984128e-06, + "loss": 0.1353, + "step": 7596 + }, + { + "epoch": 43.41142857142857, + "grad_norm": 36.659420013427734, + "learning_rate": 7.320634920634921e-06, + "loss": 0.1791, + "step": 7597 + }, + { + "epoch": 43.417142857142856, + "grad_norm": 30.74590492248535, + "learning_rate": 7.314285714285715e-06, + "loss": 0.1289, + "step": 7598 + }, + { + "epoch": 43.42285714285714, + "grad_norm": 54.270450592041016, + "learning_rate": 7.307936507936508e-06, + "loss": 0.1525, + "step": 7599 + }, + { + "epoch": 43.42857142857143, + "grad_norm": 13.909317970275879, + "learning_rate": 7.301587301587302e-06, + "loss": 0.1565, + "step": 7600 + }, + { + "epoch": 43.434285714285714, + "grad_norm": 39.99986267089844, + "learning_rate": 7.295238095238095e-06, + "loss": 0.1583, + "step": 7601 + }, + { + "epoch": 43.44, + "grad_norm": 76.60659790039062, + "learning_rate": 7.288888888888889e-06, + "loss": 0.1394, + "step": 7602 + }, + { + "epoch": 43.44571428571429, + "grad_norm": 68.71869659423828, + "learning_rate": 7.282539682539683e-06, + "loss": 0.1694, + "step": 7603 + }, + { + "epoch": 43.45142857142857, + "grad_norm": 240.68544006347656, + "learning_rate": 7.276190476190477e-06, + "loss": 0.1812, + "step": 7604 + }, + { + "epoch": 43.457142857142856, + "grad_norm": 27.34787940979004, + "learning_rate": 7.26984126984127e-06, + "loss": 0.1393, + "step": 7605 + }, + { + "epoch": 43.462857142857146, + "grad_norm": 37.935115814208984, + "learning_rate": 7.263492063492064e-06, + "loss": 0.1654, + "step": 7606 + }, + { + "epoch": 43.46857142857143, + "grad_norm": 44.87675857543945, + "learning_rate": 7.257142857142857e-06, + "loss": 0.1219, + "step": 7607 + }, + { + "epoch": 43.47428571428571, + "grad_norm": 23.068836212158203, + "learning_rate": 7.250793650793651e-06, + "loss": 0.1369, + "step": 7608 + }, + { + "epoch": 43.48, + "grad_norm": 65.95645141601562, + "learning_rate": 7.244444444444445e-06, + "loss": 0.1619, + "step": 7609 + }, + { + "epoch": 43.48571428571429, + "grad_norm": 66.43006134033203, + "learning_rate": 7.238095238095238e-06, + "loss": 0.1209, + "step": 7610 + }, + { + "epoch": 43.49142857142857, + "grad_norm": 47.76883316040039, + "learning_rate": 7.231746031746032e-06, + "loss": 0.1227, + "step": 7611 + }, + { + "epoch": 43.497142857142855, + "grad_norm": 47.75768280029297, + "learning_rate": 7.225396825396826e-06, + "loss": 0.1494, + "step": 7612 + }, + { + "epoch": 43.502857142857145, + "grad_norm": 26.707481384277344, + "learning_rate": 7.21904761904762e-06, + "loss": 0.1576, + "step": 7613 + }, + { + "epoch": 43.50857142857143, + "grad_norm": 49.4616813659668, + "learning_rate": 7.212698412698413e-06, + "loss": 0.173, + "step": 7614 + }, + { + "epoch": 43.51428571428571, + "grad_norm": 39.75261688232422, + "learning_rate": 7.206349206349207e-06, + "loss": 0.17, + "step": 7615 + }, + { + "epoch": 43.52, + "grad_norm": 19.144189834594727, + "learning_rate": 7.2e-06, + "loss": 0.1685, + "step": 7616 + }, + { + "epoch": 43.52571428571429, + "grad_norm": 61.75029754638672, + "learning_rate": 7.193650793650794e-06, + "loss": 0.1447, + "step": 7617 + }, + { + "epoch": 43.53142857142857, + "grad_norm": 16.760744094848633, + "learning_rate": 7.187301587301587e-06, + "loss": 0.1048, + "step": 7618 + }, + { + "epoch": 43.537142857142854, + "grad_norm": 18.882497787475586, + "learning_rate": 7.180952380952381e-06, + "loss": 0.197, + "step": 7619 + }, + { + "epoch": 43.542857142857144, + "grad_norm": 38.44529724121094, + "learning_rate": 7.174603174603175e-06, + "loss": 0.1801, + "step": 7620 + }, + { + "epoch": 43.54857142857143, + "grad_norm": 114.03311157226562, + "learning_rate": 7.168253968253969e-06, + "loss": 0.146, + "step": 7621 + }, + { + "epoch": 43.55428571428571, + "grad_norm": 90.95785522460938, + "learning_rate": 7.161904761904763e-06, + "loss": 0.1437, + "step": 7622 + }, + { + "epoch": 43.56, + "grad_norm": 30.148263931274414, + "learning_rate": 7.155555555555556e-06, + "loss": 0.1549, + "step": 7623 + }, + { + "epoch": 43.565714285714286, + "grad_norm": 56.53114318847656, + "learning_rate": 7.14920634920635e-06, + "loss": 0.2126, + "step": 7624 + }, + { + "epoch": 43.57142857142857, + "grad_norm": 86.12628173828125, + "learning_rate": 7.142857142857143e-06, + "loss": 0.1507, + "step": 7625 + }, + { + "epoch": 43.57714285714286, + "grad_norm": 26.091625213623047, + "learning_rate": 7.136507936507937e-06, + "loss": 0.1571, + "step": 7626 + }, + { + "epoch": 43.582857142857144, + "grad_norm": 30.037315368652344, + "learning_rate": 7.13015873015873e-06, + "loss": 0.1186, + "step": 7627 + }, + { + "epoch": 43.58857142857143, + "grad_norm": 33.91983413696289, + "learning_rate": 7.123809523809524e-06, + "loss": 0.1662, + "step": 7628 + }, + { + "epoch": 43.59428571428572, + "grad_norm": 58.2686653137207, + "learning_rate": 7.1174603174603175e-06, + "loss": 0.1438, + "step": 7629 + }, + { + "epoch": 43.6, + "grad_norm": 24.259056091308594, + "learning_rate": 7.111111111111112e-06, + "loss": 0.1747, + "step": 7630 + }, + { + "epoch": 43.605714285714285, + "grad_norm": 38.988319396972656, + "learning_rate": 7.104761904761905e-06, + "loss": 0.3159, + "step": 7631 + }, + { + "epoch": 43.61142857142857, + "grad_norm": 39.77925491333008, + "learning_rate": 7.098412698412699e-06, + "loss": 0.1578, + "step": 7632 + }, + { + "epoch": 43.61714285714286, + "grad_norm": 29.86747932434082, + "learning_rate": 7.092063492063493e-06, + "loss": 0.1524, + "step": 7633 + }, + { + "epoch": 43.62285714285714, + "grad_norm": 34.30352020263672, + "learning_rate": 7.085714285714286e-06, + "loss": 0.1373, + "step": 7634 + }, + { + "epoch": 43.628571428571426, + "grad_norm": 21.58470916748047, + "learning_rate": 7.07936507936508e-06, + "loss": 0.1218, + "step": 7635 + }, + { + "epoch": 43.63428571428572, + "grad_norm": 50.943058013916016, + "learning_rate": 7.073015873015873e-06, + "loss": 0.1302, + "step": 7636 + }, + { + "epoch": 43.64, + "grad_norm": 21.63199234008789, + "learning_rate": 7.066666666666667e-06, + "loss": 0.1617, + "step": 7637 + }, + { + "epoch": 43.645714285714284, + "grad_norm": 20.79852867126465, + "learning_rate": 7.0603174603174605e-06, + "loss": 0.1723, + "step": 7638 + }, + { + "epoch": 43.651428571428575, + "grad_norm": 30.47551155090332, + "learning_rate": 7.053968253968255e-06, + "loss": 0.144, + "step": 7639 + }, + { + "epoch": 43.65714285714286, + "grad_norm": 16.609371185302734, + "learning_rate": 7.0476190476190475e-06, + "loss": 0.2066, + "step": 7640 + }, + { + "epoch": 43.66285714285714, + "grad_norm": 47.40229415893555, + "learning_rate": 7.041269841269842e-06, + "loss": 0.1788, + "step": 7641 + }, + { + "epoch": 43.668571428571425, + "grad_norm": 32.97322463989258, + "learning_rate": 7.034920634920636e-06, + "loss": 0.1435, + "step": 7642 + }, + { + "epoch": 43.674285714285716, + "grad_norm": 36.80225372314453, + "learning_rate": 7.028571428571429e-06, + "loss": 0.145, + "step": 7643 + }, + { + "epoch": 43.68, + "grad_norm": 27.588375091552734, + "learning_rate": 7.022222222222223e-06, + "loss": 0.1489, + "step": 7644 + }, + { + "epoch": 43.68571428571428, + "grad_norm": 49.63401794433594, + "learning_rate": 7.015873015873016e-06, + "loss": 0.1575, + "step": 7645 + }, + { + "epoch": 43.691428571428574, + "grad_norm": 49.40351104736328, + "learning_rate": 7.00952380952381e-06, + "loss": 0.1875, + "step": 7646 + }, + { + "epoch": 43.69714285714286, + "grad_norm": 43.086910247802734, + "learning_rate": 7.003174603174604e-06, + "loss": 0.1205, + "step": 7647 + }, + { + "epoch": 43.70285714285714, + "grad_norm": 29.192461013793945, + "learning_rate": 6.996825396825397e-06, + "loss": 0.1581, + "step": 7648 + }, + { + "epoch": 43.70857142857143, + "grad_norm": 42.49656295776367, + "learning_rate": 6.9904761904761905e-06, + "loss": 0.1362, + "step": 7649 + }, + { + "epoch": 43.714285714285715, + "grad_norm": 46.579830169677734, + "learning_rate": 6.984126984126985e-06, + "loss": 0.1742, + "step": 7650 + }, + { + "epoch": 43.72, + "grad_norm": 960.8983154296875, + "learning_rate": 6.9777777777777775e-06, + "loss": 0.1848, + "step": 7651 + }, + { + "epoch": 43.72571428571428, + "grad_norm": 70.21526336669922, + "learning_rate": 6.971428571428572e-06, + "loss": 0.1467, + "step": 7652 + }, + { + "epoch": 43.73142857142857, + "grad_norm": 13.356405258178711, + "learning_rate": 6.965079365079366e-06, + "loss": 0.2087, + "step": 7653 + }, + { + "epoch": 43.73714285714286, + "grad_norm": 18.8299503326416, + "learning_rate": 6.958730158730159e-06, + "loss": 0.1509, + "step": 7654 + }, + { + "epoch": 43.74285714285714, + "grad_norm": 48.8301887512207, + "learning_rate": 6.952380952380953e-06, + "loss": 0.1093, + "step": 7655 + }, + { + "epoch": 43.74857142857143, + "grad_norm": 33.69208908081055, + "learning_rate": 6.946031746031746e-06, + "loss": 0.1604, + "step": 7656 + }, + { + "epoch": 43.754285714285714, + "grad_norm": 39.09320068359375, + "learning_rate": 6.93968253968254e-06, + "loss": 0.1591, + "step": 7657 + }, + { + "epoch": 43.76, + "grad_norm": 54.30573654174805, + "learning_rate": 6.933333333333334e-06, + "loss": 0.1459, + "step": 7658 + }, + { + "epoch": 43.76571428571429, + "grad_norm": 43.04143142700195, + "learning_rate": 6.926984126984128e-06, + "loss": 0.1916, + "step": 7659 + }, + { + "epoch": 43.77142857142857, + "grad_norm": 69.95057678222656, + "learning_rate": 6.9206349206349206e-06, + "loss": 0.1872, + "step": 7660 + }, + { + "epoch": 43.777142857142856, + "grad_norm": 32.16206741333008, + "learning_rate": 6.914285714285715e-06, + "loss": 0.1827, + "step": 7661 + }, + { + "epoch": 43.78285714285714, + "grad_norm": 94.184814453125, + "learning_rate": 6.9079365079365075e-06, + "loss": 0.1363, + "step": 7662 + }, + { + "epoch": 43.78857142857143, + "grad_norm": 28.984830856323242, + "learning_rate": 6.901587301587302e-06, + "loss": 0.1286, + "step": 7663 + }, + { + "epoch": 43.794285714285714, + "grad_norm": 59.8381462097168, + "learning_rate": 6.8952380952380945e-06, + "loss": 0.1945, + "step": 7664 + }, + { + "epoch": 43.8, + "grad_norm": 54.81769943237305, + "learning_rate": 6.888888888888889e-06, + "loss": 0.1399, + "step": 7665 + }, + { + "epoch": 43.80571428571429, + "grad_norm": 25.788875579833984, + "learning_rate": 6.882539682539683e-06, + "loss": 0.2116, + "step": 7666 + }, + { + "epoch": 43.81142857142857, + "grad_norm": 51.8724365234375, + "learning_rate": 6.876190476190477e-06, + "loss": 0.1393, + "step": 7667 + }, + { + "epoch": 43.817142857142855, + "grad_norm": 21.43828582763672, + "learning_rate": 6.869841269841271e-06, + "loss": 0.155, + "step": 7668 + }, + { + "epoch": 43.822857142857146, + "grad_norm": 41.8471794128418, + "learning_rate": 6.863492063492064e-06, + "loss": 0.1177, + "step": 7669 + }, + { + "epoch": 43.82857142857143, + "grad_norm": 51.57056427001953, + "learning_rate": 6.857142857142858e-06, + "loss": 0.1241, + "step": 7670 + }, + { + "epoch": 43.83428571428571, + "grad_norm": 46.511741638183594, + "learning_rate": 6.8507936507936506e-06, + "loss": 0.1043, + "step": 7671 + }, + { + "epoch": 43.84, + "grad_norm": 88.49575805664062, + "learning_rate": 6.844444444444445e-06, + "loss": 0.1316, + "step": 7672 + }, + { + "epoch": 43.84571428571429, + "grad_norm": 36.16876220703125, + "learning_rate": 6.8380952380952375e-06, + "loss": 0.1327, + "step": 7673 + }, + { + "epoch": 43.85142857142857, + "grad_norm": 50.89331817626953, + "learning_rate": 6.831746031746032e-06, + "loss": 0.2681, + "step": 7674 + }, + { + "epoch": 43.857142857142854, + "grad_norm": 44.32616424560547, + "learning_rate": 6.825396825396825e-06, + "loss": 0.1589, + "step": 7675 + }, + { + "epoch": 43.862857142857145, + "grad_norm": 45.69709396362305, + "learning_rate": 6.81904761904762e-06, + "loss": 0.1618, + "step": 7676 + }, + { + "epoch": 43.86857142857143, + "grad_norm": 831.8772583007812, + "learning_rate": 6.812698412698414e-06, + "loss": 0.2403, + "step": 7677 + }, + { + "epoch": 43.87428571428571, + "grad_norm": 50.53889465332031, + "learning_rate": 6.806349206349207e-06, + "loss": 0.1664, + "step": 7678 + }, + { + "epoch": 43.88, + "grad_norm": 33.22833251953125, + "learning_rate": 6.800000000000001e-06, + "loss": 0.126, + "step": 7679 + }, + { + "epoch": 43.885714285714286, + "grad_norm": 50.8762321472168, + "learning_rate": 6.793650793650794e-06, + "loss": 0.1478, + "step": 7680 + }, + { + "epoch": 43.89142857142857, + "grad_norm": 57.65444564819336, + "learning_rate": 6.787301587301588e-06, + "loss": 0.1661, + "step": 7681 + }, + { + "epoch": 43.89714285714286, + "grad_norm": 64.80712890625, + "learning_rate": 6.7809523809523806e-06, + "loss": 0.1689, + "step": 7682 + }, + { + "epoch": 43.902857142857144, + "grad_norm": 43.664512634277344, + "learning_rate": 6.774603174603175e-06, + "loss": 0.2021, + "step": 7683 + }, + { + "epoch": 43.90857142857143, + "grad_norm": 65.74862670898438, + "learning_rate": 6.768253968253968e-06, + "loss": 0.1362, + "step": 7684 + }, + { + "epoch": 43.91428571428571, + "grad_norm": 34.35681915283203, + "learning_rate": 6.761904761904763e-06, + "loss": 0.1945, + "step": 7685 + }, + { + "epoch": 43.92, + "grad_norm": 34.14244079589844, + "learning_rate": 6.755555555555555e-06, + "loss": 0.0913, + "step": 7686 + }, + { + "epoch": 43.925714285714285, + "grad_norm": 42.42035675048828, + "learning_rate": 6.74920634920635e-06, + "loss": 0.1462, + "step": 7687 + }, + { + "epoch": 43.93142857142857, + "grad_norm": 74.27864837646484, + "learning_rate": 6.742857142857144e-06, + "loss": 0.2258, + "step": 7688 + }, + { + "epoch": 43.93714285714286, + "grad_norm": 64.61466217041016, + "learning_rate": 6.736507936507937e-06, + "loss": 0.1289, + "step": 7689 + }, + { + "epoch": 43.94285714285714, + "grad_norm": 50.40987014770508, + "learning_rate": 6.730158730158731e-06, + "loss": 0.1687, + "step": 7690 + }, + { + "epoch": 43.94857142857143, + "grad_norm": 110.01002502441406, + "learning_rate": 6.723809523809524e-06, + "loss": 0.1591, + "step": 7691 + }, + { + "epoch": 43.95428571428572, + "grad_norm": 58.86008071899414, + "learning_rate": 6.717460317460318e-06, + "loss": 0.1716, + "step": 7692 + }, + { + "epoch": 43.96, + "grad_norm": 54.54946517944336, + "learning_rate": 6.711111111111111e-06, + "loss": 0.1859, + "step": 7693 + }, + { + "epoch": 43.965714285714284, + "grad_norm": 209.70298767089844, + "learning_rate": 6.704761904761906e-06, + "loss": 0.1432, + "step": 7694 + }, + { + "epoch": 43.97142857142857, + "grad_norm": 33.95160675048828, + "learning_rate": 6.698412698412698e-06, + "loss": 0.1285, + "step": 7695 + }, + { + "epoch": 43.97714285714286, + "grad_norm": 32.52368927001953, + "learning_rate": 6.692063492063493e-06, + "loss": 0.155, + "step": 7696 + }, + { + "epoch": 43.98285714285714, + "grad_norm": 53.18463134765625, + "learning_rate": 6.685714285714285e-06, + "loss": 0.2814, + "step": 7697 + }, + { + "epoch": 43.988571428571426, + "grad_norm": 494.98291015625, + "learning_rate": 6.67936507936508e-06, + "loss": 0.1706, + "step": 7698 + }, + { + "epoch": 43.994285714285716, + "grad_norm": 43.096839904785156, + "learning_rate": 6.673015873015874e-06, + "loss": 0.1665, + "step": 7699 + }, + { + "epoch": 44.0, + "grad_norm": 48.978614807128906, + "learning_rate": 6.666666666666667e-06, + "loss": 0.1927, + "step": 7700 + }, + { + "epoch": 44.0, + "eval_classes": 0, + "eval_loss": 0.5757384300231934, + "eval_map": 0.9387, + "eval_map_50": 0.9684, + "eval_map_75": 0.9649, + "eval_map_large": 0.9387, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9387, + "eval_map_small": -1.0, + "eval_mar_1": 0.7902, + "eval_mar_10": 0.9749, + "eval_mar_100": 0.9749, + "eval_mar_100_per_class": 0.9749, + "eval_mar_large": 0.9749, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.311, + "eval_samples_per_second": 22.087, + "eval_steps_per_second": 2.78, + "step": 7700 + }, + { + "epoch": 44.005714285714284, + "grad_norm": 59.333335876464844, + "learning_rate": 6.660317460317461e-06, + "loss": 0.1075, + "step": 7701 + }, + { + "epoch": 44.011428571428574, + "grad_norm": 34.29730987548828, + "learning_rate": 6.6539682539682545e-06, + "loss": 0.1525, + "step": 7702 + }, + { + "epoch": 44.01714285714286, + "grad_norm": 286.7693786621094, + "learning_rate": 6.647619047619048e-06, + "loss": 0.1714, + "step": 7703 + }, + { + "epoch": 44.02285714285714, + "grad_norm": 65.73849487304688, + "learning_rate": 6.6412698412698414e-06, + "loss": 0.1158, + "step": 7704 + }, + { + "epoch": 44.02857142857143, + "grad_norm": 36.001380920410156, + "learning_rate": 6.634920634920636e-06, + "loss": 0.2047, + "step": 7705 + }, + { + "epoch": 44.034285714285716, + "grad_norm": 41.00141143798828, + "learning_rate": 6.628571428571428e-06, + "loss": 0.1346, + "step": 7706 + }, + { + "epoch": 44.04, + "grad_norm": 39.80067825317383, + "learning_rate": 6.622222222222223e-06, + "loss": 0.1475, + "step": 7707 + }, + { + "epoch": 44.04571428571428, + "grad_norm": 31.52075958251953, + "learning_rate": 6.615873015873015e-06, + "loss": 0.1168, + "step": 7708 + }, + { + "epoch": 44.05142857142857, + "grad_norm": 45.78147506713867, + "learning_rate": 6.60952380952381e-06, + "loss": 0.1976, + "step": 7709 + }, + { + "epoch": 44.05714285714286, + "grad_norm": 38.48584747314453, + "learning_rate": 6.603174603174604e-06, + "loss": 0.2292, + "step": 7710 + }, + { + "epoch": 44.06285714285714, + "grad_norm": 48.30115509033203, + "learning_rate": 6.596825396825397e-06, + "loss": 0.1574, + "step": 7711 + }, + { + "epoch": 44.06857142857143, + "grad_norm": 34.71587371826172, + "learning_rate": 6.590476190476191e-06, + "loss": 0.1413, + "step": 7712 + }, + { + "epoch": 44.074285714285715, + "grad_norm": 26.006423950195312, + "learning_rate": 6.5841269841269845e-06, + "loss": 0.1238, + "step": 7713 + }, + { + "epoch": 44.08, + "grad_norm": 58.09585952758789, + "learning_rate": 6.577777777777779e-06, + "loss": 0.1434, + "step": 7714 + }, + { + "epoch": 44.08571428571429, + "grad_norm": 53.749900817871094, + "learning_rate": 6.5714285714285714e-06, + "loss": 0.2007, + "step": 7715 + }, + { + "epoch": 44.09142857142857, + "grad_norm": 64.02667999267578, + "learning_rate": 6.565079365079366e-06, + "loss": 0.1173, + "step": 7716 + }, + { + "epoch": 44.097142857142856, + "grad_norm": 19.437864303588867, + "learning_rate": 6.558730158730158e-06, + "loss": 0.1861, + "step": 7717 + }, + { + "epoch": 44.10285714285714, + "grad_norm": 41.478515625, + "learning_rate": 6.552380952380953e-06, + "loss": 0.1524, + "step": 7718 + }, + { + "epoch": 44.10857142857143, + "grad_norm": 24.171451568603516, + "learning_rate": 6.546031746031746e-06, + "loss": 0.1573, + "step": 7719 + }, + { + "epoch": 44.114285714285714, + "grad_norm": 24.408952713012695, + "learning_rate": 6.53968253968254e-06, + "loss": 0.118, + "step": 7720 + }, + { + "epoch": 44.12, + "grad_norm": 48.23006820678711, + "learning_rate": 6.533333333333333e-06, + "loss": 0.1784, + "step": 7721 + }, + { + "epoch": 44.12571428571429, + "grad_norm": 33.18339157104492, + "learning_rate": 6.5269841269841275e-06, + "loss": 0.1202, + "step": 7722 + }, + { + "epoch": 44.13142857142857, + "grad_norm": 47.897403717041016, + "learning_rate": 6.520634920634922e-06, + "loss": 0.0878, + "step": 7723 + }, + { + "epoch": 44.137142857142855, + "grad_norm": 66.20220184326172, + "learning_rate": 6.5142857142857145e-06, + "loss": 0.1532, + "step": 7724 + }, + { + "epoch": 44.142857142857146, + "grad_norm": 22.750667572021484, + "learning_rate": 6.507936507936509e-06, + "loss": 0.1685, + "step": 7725 + }, + { + "epoch": 44.14857142857143, + "grad_norm": 27.192466735839844, + "learning_rate": 6.5015873015873014e-06, + "loss": 0.1222, + "step": 7726 + }, + { + "epoch": 44.15428571428571, + "grad_norm": 37.51421356201172, + "learning_rate": 6.495238095238096e-06, + "loss": 0.1223, + "step": 7727 + }, + { + "epoch": 44.16, + "grad_norm": 76.6365737915039, + "learning_rate": 6.488888888888888e-06, + "loss": 0.1615, + "step": 7728 + }, + { + "epoch": 44.16571428571429, + "grad_norm": 64.56356811523438, + "learning_rate": 6.482539682539683e-06, + "loss": 0.1891, + "step": 7729 + }, + { + "epoch": 44.17142857142857, + "grad_norm": 66.12844848632812, + "learning_rate": 6.476190476190476e-06, + "loss": 0.1378, + "step": 7730 + }, + { + "epoch": 44.177142857142854, + "grad_norm": 56.333126068115234, + "learning_rate": 6.4698412698412706e-06, + "loss": 0.1402, + "step": 7731 + }, + { + "epoch": 44.182857142857145, + "grad_norm": 64.17578887939453, + "learning_rate": 6.463492063492063e-06, + "loss": 0.1244, + "step": 7732 + }, + { + "epoch": 44.18857142857143, + "grad_norm": 50.45033645629883, + "learning_rate": 6.4571428571428575e-06, + "loss": 0.1571, + "step": 7733 + }, + { + "epoch": 44.19428571428571, + "grad_norm": 64.60138702392578, + "learning_rate": 6.450793650793652e-06, + "loss": 0.1621, + "step": 7734 + }, + { + "epoch": 44.2, + "grad_norm": 59.69667434692383, + "learning_rate": 6.4444444444444445e-06, + "loss": 0.2149, + "step": 7735 + }, + { + "epoch": 44.205714285714286, + "grad_norm": 52.08457946777344, + "learning_rate": 6.438095238095239e-06, + "loss": 0.0926, + "step": 7736 + }, + { + "epoch": 44.21142857142857, + "grad_norm": 95.57523345947266, + "learning_rate": 6.4317460317460314e-06, + "loss": 0.1676, + "step": 7737 + }, + { + "epoch": 44.21714285714286, + "grad_norm": 32.85334777832031, + "learning_rate": 6.425396825396826e-06, + "loss": 0.1463, + "step": 7738 + }, + { + "epoch": 44.222857142857144, + "grad_norm": 39.23460006713867, + "learning_rate": 6.419047619047619e-06, + "loss": 0.1318, + "step": 7739 + }, + { + "epoch": 44.22857142857143, + "grad_norm": 51.70003890991211, + "learning_rate": 6.412698412698414e-06, + "loss": 0.1424, + "step": 7740 + }, + { + "epoch": 44.23428571428571, + "grad_norm": 85.29315185546875, + "learning_rate": 6.406349206349206e-06, + "loss": 0.1678, + "step": 7741 + }, + { + "epoch": 44.24, + "grad_norm": 109.13520812988281, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.1376, + "step": 7742 + }, + { + "epoch": 44.245714285714286, + "grad_norm": 24.532550811767578, + "learning_rate": 6.393650793650793e-06, + "loss": 0.1545, + "step": 7743 + }, + { + "epoch": 44.25142857142857, + "grad_norm": 671.1575317382812, + "learning_rate": 6.3873015873015875e-06, + "loss": 0.1926, + "step": 7744 + }, + { + "epoch": 44.25714285714286, + "grad_norm": 44.302879333496094, + "learning_rate": 6.380952380952382e-06, + "loss": 0.1135, + "step": 7745 + }, + { + "epoch": 44.26285714285714, + "grad_norm": 55.46490478515625, + "learning_rate": 6.3746031746031745e-06, + "loss": 0.1392, + "step": 7746 + }, + { + "epoch": 44.26857142857143, + "grad_norm": 51.76401901245117, + "learning_rate": 6.368253968253969e-06, + "loss": 0.1314, + "step": 7747 + }, + { + "epoch": 44.27428571428572, + "grad_norm": 49.445411682128906, + "learning_rate": 6.361904761904762e-06, + "loss": 0.1592, + "step": 7748 + }, + { + "epoch": 44.28, + "grad_norm": 37.76740264892578, + "learning_rate": 6.355555555555557e-06, + "loss": 0.0979, + "step": 7749 + }, + { + "epoch": 44.285714285714285, + "grad_norm": 40.30945587158203, + "learning_rate": 6.349206349206349e-06, + "loss": 0.0989, + "step": 7750 + }, + { + "epoch": 44.29142857142857, + "grad_norm": 35.8820686340332, + "learning_rate": 6.342857142857144e-06, + "loss": 0.1374, + "step": 7751 + }, + { + "epoch": 44.29714285714286, + "grad_norm": 59.061431884765625, + "learning_rate": 6.336507936507936e-06, + "loss": 0.1363, + "step": 7752 + }, + { + "epoch": 44.30285714285714, + "grad_norm": 126.11488342285156, + "learning_rate": 6.3301587301587306e-06, + "loss": 0.154, + "step": 7753 + }, + { + "epoch": 44.308571428571426, + "grad_norm": 29.980083465576172, + "learning_rate": 6.323809523809523e-06, + "loss": 0.1223, + "step": 7754 + }, + { + "epoch": 44.31428571428572, + "grad_norm": 129.24725341796875, + "learning_rate": 6.3174603174603175e-06, + "loss": 0.1297, + "step": 7755 + }, + { + "epoch": 44.32, + "grad_norm": 47.06840133666992, + "learning_rate": 6.311111111111112e-06, + "loss": 0.1736, + "step": 7756 + }, + { + "epoch": 44.325714285714284, + "grad_norm": 760.0050048828125, + "learning_rate": 6.304761904761905e-06, + "loss": 0.1587, + "step": 7757 + }, + { + "epoch": 44.331428571428575, + "grad_norm": 34.820648193359375, + "learning_rate": 6.2984126984127e-06, + "loss": 0.1292, + "step": 7758 + }, + { + "epoch": 44.33714285714286, + "grad_norm": 29.063579559326172, + "learning_rate": 6.292063492063492e-06, + "loss": 0.1625, + "step": 7759 + }, + { + "epoch": 44.34285714285714, + "grad_norm": 432.6778564453125, + "learning_rate": 6.285714285714287e-06, + "loss": 0.1781, + "step": 7760 + }, + { + "epoch": 44.348571428571425, + "grad_norm": 25.651865005493164, + "learning_rate": 6.279365079365079e-06, + "loss": 0.126, + "step": 7761 + }, + { + "epoch": 44.354285714285716, + "grad_norm": 34.330894470214844, + "learning_rate": 6.273015873015874e-06, + "loss": 0.1425, + "step": 7762 + }, + { + "epoch": 44.36, + "grad_norm": 860.991943359375, + "learning_rate": 6.266666666666666e-06, + "loss": 0.2029, + "step": 7763 + }, + { + "epoch": 44.36571428571428, + "grad_norm": 33.004478454589844, + "learning_rate": 6.2603174603174606e-06, + "loss": 0.1603, + "step": 7764 + }, + { + "epoch": 44.371428571428574, + "grad_norm": 35.191619873046875, + "learning_rate": 6.253968253968254e-06, + "loss": 0.1652, + "step": 7765 + }, + { + "epoch": 44.37714285714286, + "grad_norm": 34.987613677978516, + "learning_rate": 6.247619047619048e-06, + "loss": 0.1469, + "step": 7766 + }, + { + "epoch": 44.38285714285714, + "grad_norm": 48.29430389404297, + "learning_rate": 6.241269841269842e-06, + "loss": 0.0925, + "step": 7767 + }, + { + "epoch": 44.38857142857143, + "grad_norm": 44.30611801147461, + "learning_rate": 6.234920634920635e-06, + "loss": 0.1699, + "step": 7768 + }, + { + "epoch": 44.394285714285715, + "grad_norm": 37.219600677490234, + "learning_rate": 6.228571428571429e-06, + "loss": 0.1443, + "step": 7769 + }, + { + "epoch": 44.4, + "grad_norm": 54.558162689208984, + "learning_rate": 6.222222222222222e-06, + "loss": 0.1487, + "step": 7770 + }, + { + "epoch": 44.40571428571428, + "grad_norm": 96.38822937011719, + "learning_rate": 6.215873015873016e-06, + "loss": 0.1501, + "step": 7771 + }, + { + "epoch": 44.41142857142857, + "grad_norm": 20.549890518188477, + "learning_rate": 6.209523809523809e-06, + "loss": 0.1393, + "step": 7772 + }, + { + "epoch": 44.417142857142856, + "grad_norm": 22.02899742126465, + "learning_rate": 6.203174603174604e-06, + "loss": 0.1303, + "step": 7773 + }, + { + "epoch": 44.42285714285714, + "grad_norm": 82.55475616455078, + "learning_rate": 6.196825396825397e-06, + "loss": 0.149, + "step": 7774 + }, + { + "epoch": 44.42857142857143, + "grad_norm": 47.80804443359375, + "learning_rate": 6.190476190476191e-06, + "loss": 0.1799, + "step": 7775 + }, + { + "epoch": 44.434285714285714, + "grad_norm": 75.9185562133789, + "learning_rate": 6.184126984126985e-06, + "loss": 0.193, + "step": 7776 + }, + { + "epoch": 44.44, + "grad_norm": 45.21895217895508, + "learning_rate": 6.177777777777778e-06, + "loss": 0.1714, + "step": 7777 + }, + { + "epoch": 44.44571428571429, + "grad_norm": 23.10559844970703, + "learning_rate": 6.171428571428572e-06, + "loss": 0.1295, + "step": 7778 + }, + { + "epoch": 44.45142857142857, + "grad_norm": 55.595394134521484, + "learning_rate": 6.165079365079365e-06, + "loss": 0.1614, + "step": 7779 + }, + { + "epoch": 44.457142857142856, + "grad_norm": 47.207794189453125, + "learning_rate": 6.158730158730159e-06, + "loss": 0.1374, + "step": 7780 + }, + { + "epoch": 44.462857142857146, + "grad_norm": 34.826934814453125, + "learning_rate": 6.152380952380952e-06, + "loss": 0.1263, + "step": 7781 + }, + { + "epoch": 44.46857142857143, + "grad_norm": 163.04493713378906, + "learning_rate": 6.146031746031746e-06, + "loss": 0.1609, + "step": 7782 + }, + { + "epoch": 44.47428571428571, + "grad_norm": 67.82376861572266, + "learning_rate": 6.139682539682539e-06, + "loss": 0.1474, + "step": 7783 + }, + { + "epoch": 44.48, + "grad_norm": 37.07332992553711, + "learning_rate": 6.133333333333334e-06, + "loss": 0.1435, + "step": 7784 + }, + { + "epoch": 44.48571428571429, + "grad_norm": 54.88054275512695, + "learning_rate": 6.126984126984128e-06, + "loss": 0.1417, + "step": 7785 + }, + { + "epoch": 44.49142857142857, + "grad_norm": 29.38848114013672, + "learning_rate": 6.1206349206349214e-06, + "loss": 0.1092, + "step": 7786 + }, + { + "epoch": 44.497142857142855, + "grad_norm": 63.567604064941406, + "learning_rate": 6.114285714285715e-06, + "loss": 0.1559, + "step": 7787 + }, + { + "epoch": 44.502857142857145, + "grad_norm": 53.42649841308594, + "learning_rate": 6.107936507936508e-06, + "loss": 0.1857, + "step": 7788 + }, + { + "epoch": 44.50857142857143, + "grad_norm": 20.135610580444336, + "learning_rate": 6.101587301587302e-06, + "loss": 0.1098, + "step": 7789 + }, + { + "epoch": 44.51428571428571, + "grad_norm": 49.138065338134766, + "learning_rate": 6.095238095238095e-06, + "loss": 0.2962, + "step": 7790 + }, + { + "epoch": 44.52, + "grad_norm": 50.56275939941406, + "learning_rate": 6.088888888888889e-06, + "loss": 0.1249, + "step": 7791 + }, + { + "epoch": 44.52571428571429, + "grad_norm": 538.6000366210938, + "learning_rate": 6.082539682539682e-06, + "loss": 0.1384, + "step": 7792 + }, + { + "epoch": 44.53142857142857, + "grad_norm": 32.58999252319336, + "learning_rate": 6.076190476190477e-06, + "loss": 0.1256, + "step": 7793 + }, + { + "epoch": 44.537142857142854, + "grad_norm": 28.279216766357422, + "learning_rate": 6.06984126984127e-06, + "loss": 0.1275, + "step": 7794 + }, + { + "epoch": 44.542857142857144, + "grad_norm": 50.32058334350586, + "learning_rate": 6.063492063492064e-06, + "loss": 0.2252, + "step": 7795 + }, + { + "epoch": 44.54857142857143, + "grad_norm": 32.84303665161133, + "learning_rate": 6.057142857142858e-06, + "loss": 0.1643, + "step": 7796 + }, + { + "epoch": 44.55428571428571, + "grad_norm": 43.972381591796875, + "learning_rate": 6.0507936507936514e-06, + "loss": 0.1207, + "step": 7797 + }, + { + "epoch": 44.56, + "grad_norm": 31.350746154785156, + "learning_rate": 6.044444444444445e-06, + "loss": 0.1728, + "step": 7798 + }, + { + "epoch": 44.565714285714286, + "grad_norm": 57.16665267944336, + "learning_rate": 6.038095238095238e-06, + "loss": 0.0836, + "step": 7799 + }, + { + "epoch": 44.57142857142857, + "grad_norm": 24.479278564453125, + "learning_rate": 6.031746031746032e-06, + "loss": 0.122, + "step": 7800 + }, + { + "epoch": 44.57714285714286, + "grad_norm": 55.26851272583008, + "learning_rate": 6.025396825396825e-06, + "loss": 0.0994, + "step": 7801 + }, + { + "epoch": 44.582857142857144, + "grad_norm": 121.71125793457031, + "learning_rate": 6.01904761904762e-06, + "loss": 0.199, + "step": 7802 + }, + { + "epoch": 44.58857142857143, + "grad_norm": 133.99131774902344, + "learning_rate": 6.012698412698413e-06, + "loss": 0.1916, + "step": 7803 + }, + { + "epoch": 44.59428571428572, + "grad_norm": 44.67299270629883, + "learning_rate": 6.006349206349207e-06, + "loss": 0.2354, + "step": 7804 + }, + { + "epoch": 44.6, + "grad_norm": 90.81427001953125, + "learning_rate": 6e-06, + "loss": 0.1429, + "step": 7805 + }, + { + "epoch": 44.605714285714285, + "grad_norm": 26.58412742614746, + "learning_rate": 5.993650793650794e-06, + "loss": 0.137, + "step": 7806 + }, + { + "epoch": 44.61142857142857, + "grad_norm": 74.78943634033203, + "learning_rate": 5.987301587301587e-06, + "loss": 0.4208, + "step": 7807 + }, + { + "epoch": 44.61714285714286, + "grad_norm": 57.04520797729492, + "learning_rate": 5.9809523809523814e-06, + "loss": 0.1065, + "step": 7808 + }, + { + "epoch": 44.62285714285714, + "grad_norm": 34.23002243041992, + "learning_rate": 5.974603174603175e-06, + "loss": 0.1434, + "step": 7809 + }, + { + "epoch": 44.628571428571426, + "grad_norm": 2633.763671875, + "learning_rate": 5.968253968253968e-06, + "loss": 0.411, + "step": 7810 + }, + { + "epoch": 44.63428571428572, + "grad_norm": 53.742576599121094, + "learning_rate": 5.961904761904762e-06, + "loss": 0.13, + "step": 7811 + }, + { + "epoch": 44.64, + "grad_norm": 21.34613609313965, + "learning_rate": 5.955555555555556e-06, + "loss": 0.1278, + "step": 7812 + }, + { + "epoch": 44.645714285714284, + "grad_norm": 67.92548370361328, + "learning_rate": 5.94920634920635e-06, + "loss": 0.1736, + "step": 7813 + }, + { + "epoch": 44.651428571428575, + "grad_norm": 29.171289443969727, + "learning_rate": 5.942857142857143e-06, + "loss": 0.1587, + "step": 7814 + }, + { + "epoch": 44.65714285714286, + "grad_norm": 35.45895767211914, + "learning_rate": 5.936507936507937e-06, + "loss": 0.1653, + "step": 7815 + }, + { + "epoch": 44.66285714285714, + "grad_norm": 43.47918701171875, + "learning_rate": 5.93015873015873e-06, + "loss": 0.1434, + "step": 7816 + }, + { + "epoch": 44.668571428571425, + "grad_norm": 58.679508209228516, + "learning_rate": 5.923809523809524e-06, + "loss": 0.1682, + "step": 7817 + }, + { + "epoch": 44.674285714285716, + "grad_norm": 191.1954803466797, + "learning_rate": 5.917460317460317e-06, + "loss": 0.1325, + "step": 7818 + }, + { + "epoch": 44.68, + "grad_norm": 50.023590087890625, + "learning_rate": 5.9111111111111115e-06, + "loss": 0.1521, + "step": 7819 + }, + { + "epoch": 44.68571428571428, + "grad_norm": 51.93128967285156, + "learning_rate": 5.904761904761905e-06, + "loss": 0.2369, + "step": 7820 + }, + { + "epoch": 44.691428571428574, + "grad_norm": 39.430572509765625, + "learning_rate": 5.898412698412699e-06, + "loss": 0.1177, + "step": 7821 + }, + { + "epoch": 44.69714285714286, + "grad_norm": 13.830597877502441, + "learning_rate": 5.892063492063493e-06, + "loss": 0.1274, + "step": 7822 + }, + { + "epoch": 44.70285714285714, + "grad_norm": 76.25123596191406, + "learning_rate": 5.885714285714286e-06, + "loss": 0.1033, + "step": 7823 + }, + { + "epoch": 44.70857142857143, + "grad_norm": 60.84410095214844, + "learning_rate": 5.87936507936508e-06, + "loss": 0.1232, + "step": 7824 + }, + { + "epoch": 44.714285714285715, + "grad_norm": 41.37836456298828, + "learning_rate": 5.873015873015873e-06, + "loss": 0.1203, + "step": 7825 + }, + { + "epoch": 44.72, + "grad_norm": 33.750022888183594, + "learning_rate": 5.866666666666667e-06, + "loss": 0.1386, + "step": 7826 + }, + { + "epoch": 44.72571428571428, + "grad_norm": 33.331016540527344, + "learning_rate": 5.86031746031746e-06, + "loss": 0.1498, + "step": 7827 + }, + { + "epoch": 44.73142857142857, + "grad_norm": 41.63890075683594, + "learning_rate": 5.853968253968254e-06, + "loss": 0.2001, + "step": 7828 + }, + { + "epoch": 44.73714285714286, + "grad_norm": 29.633655548095703, + "learning_rate": 5.847619047619048e-06, + "loss": 0.1549, + "step": 7829 + }, + { + "epoch": 44.74285714285714, + "grad_norm": 31.78554344177246, + "learning_rate": 5.841269841269842e-06, + "loss": 0.192, + "step": 7830 + }, + { + "epoch": 44.74857142857143, + "grad_norm": 38.54855728149414, + "learning_rate": 5.834920634920636e-06, + "loss": 0.1658, + "step": 7831 + }, + { + "epoch": 44.754285714285714, + "grad_norm": 25.076963424682617, + "learning_rate": 5.828571428571429e-06, + "loss": 0.1816, + "step": 7832 + }, + { + "epoch": 44.76, + "grad_norm": 37.5577278137207, + "learning_rate": 5.822222222222223e-06, + "loss": 0.1466, + "step": 7833 + }, + { + "epoch": 44.76571428571429, + "grad_norm": 51.69167709350586, + "learning_rate": 5.815873015873016e-06, + "loss": 0.2645, + "step": 7834 + }, + { + "epoch": 44.77142857142857, + "grad_norm": 50.27036666870117, + "learning_rate": 5.80952380952381e-06, + "loss": 0.1469, + "step": 7835 + }, + { + "epoch": 44.777142857142856, + "grad_norm": 26.678077697753906, + "learning_rate": 5.803174603174603e-06, + "loss": 0.1616, + "step": 7836 + }, + { + "epoch": 44.78285714285714, + "grad_norm": 249.02993774414062, + "learning_rate": 5.796825396825397e-06, + "loss": 0.1815, + "step": 7837 + }, + { + "epoch": 44.78857142857143, + "grad_norm": 35.703819274902344, + "learning_rate": 5.790476190476191e-06, + "loss": 0.1801, + "step": 7838 + }, + { + "epoch": 44.794285714285714, + "grad_norm": 63.47907638549805, + "learning_rate": 5.7841269841269845e-06, + "loss": 0.142, + "step": 7839 + }, + { + "epoch": 44.8, + "grad_norm": 56.610721588134766, + "learning_rate": 5.777777777777778e-06, + "loss": 0.1931, + "step": 7840 + }, + { + "epoch": 44.80571428571429, + "grad_norm": 41.78334426879883, + "learning_rate": 5.7714285714285715e-06, + "loss": 0.1049, + "step": 7841 + }, + { + "epoch": 44.81142857142857, + "grad_norm": 42.41416549682617, + "learning_rate": 5.765079365079366e-06, + "loss": 0.1297, + "step": 7842 + }, + { + "epoch": 44.817142857142855, + "grad_norm": 21.613910675048828, + "learning_rate": 5.758730158730159e-06, + "loss": 0.1222, + "step": 7843 + }, + { + "epoch": 44.822857142857146, + "grad_norm": 54.29707336425781, + "learning_rate": 5.752380952380953e-06, + "loss": 0.1181, + "step": 7844 + }, + { + "epoch": 44.82857142857143, + "grad_norm": 25.819608688354492, + "learning_rate": 5.746031746031746e-06, + "loss": 0.1345, + "step": 7845 + }, + { + "epoch": 44.83428571428571, + "grad_norm": 39.601863861083984, + "learning_rate": 5.73968253968254e-06, + "loss": 0.1373, + "step": 7846 + }, + { + "epoch": 44.84, + "grad_norm": 25.690563201904297, + "learning_rate": 5.733333333333333e-06, + "loss": 0.2132, + "step": 7847 + }, + { + "epoch": 44.84571428571429, + "grad_norm": 66.87892150878906, + "learning_rate": 5.7269841269841275e-06, + "loss": 0.1445, + "step": 7848 + }, + { + "epoch": 44.85142857142857, + "grad_norm": 50.268497467041016, + "learning_rate": 5.720634920634921e-06, + "loss": 0.1438, + "step": 7849 + }, + { + "epoch": 44.857142857142854, + "grad_norm": 26.253446578979492, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.1119, + "step": 7850 + }, + { + "epoch": 44.862857142857145, + "grad_norm": 59.0138053894043, + "learning_rate": 5.707936507936508e-06, + "loss": 0.1161, + "step": 7851 + }, + { + "epoch": 44.86857142857143, + "grad_norm": 68.45675659179688, + "learning_rate": 5.7015873015873015e-06, + "loss": 0.1035, + "step": 7852 + }, + { + "epoch": 44.87428571428571, + "grad_norm": 46.866146087646484, + "learning_rate": 5.695238095238096e-06, + "loss": 0.1345, + "step": 7853 + }, + { + "epoch": 44.88, + "grad_norm": 59.4177131652832, + "learning_rate": 5.688888888888889e-06, + "loss": 0.1827, + "step": 7854 + }, + { + "epoch": 44.885714285714286, + "grad_norm": 79.4287338256836, + "learning_rate": 5.682539682539683e-06, + "loss": 0.1751, + "step": 7855 + }, + { + "epoch": 44.89142857142857, + "grad_norm": 44.99930953979492, + "learning_rate": 5.676190476190476e-06, + "loss": 0.1545, + "step": 7856 + }, + { + "epoch": 44.89714285714286, + "grad_norm": 38.18852233886719, + "learning_rate": 5.669841269841271e-06, + "loss": 0.1851, + "step": 7857 + }, + { + "epoch": 44.902857142857144, + "grad_norm": 81.95789337158203, + "learning_rate": 5.663492063492064e-06, + "loss": 0.1157, + "step": 7858 + }, + { + "epoch": 44.90857142857143, + "grad_norm": 33.37058639526367, + "learning_rate": 5.6571428571428576e-06, + "loss": 0.1635, + "step": 7859 + }, + { + "epoch": 44.91428571428571, + "grad_norm": 81.13784790039062, + "learning_rate": 5.650793650793651e-06, + "loss": 0.174, + "step": 7860 + }, + { + "epoch": 44.92, + "grad_norm": 85.00211334228516, + "learning_rate": 5.6444444444444445e-06, + "loss": 0.1532, + "step": 7861 + }, + { + "epoch": 44.925714285714285, + "grad_norm": 49.13711166381836, + "learning_rate": 5.638095238095238e-06, + "loss": 0.1283, + "step": 7862 + }, + { + "epoch": 44.93142857142857, + "grad_norm": 41.752376556396484, + "learning_rate": 5.6317460317460315e-06, + "loss": 0.1513, + "step": 7863 + }, + { + "epoch": 44.93714285714286, + "grad_norm": 32.89028549194336, + "learning_rate": 5.625396825396825e-06, + "loss": 0.1995, + "step": 7864 + }, + { + "epoch": 44.94285714285714, + "grad_norm": 117.55921936035156, + "learning_rate": 5.619047619047619e-06, + "loss": 0.1514, + "step": 7865 + }, + { + "epoch": 44.94857142857143, + "grad_norm": 69.728271484375, + "learning_rate": 5.612698412698414e-06, + "loss": 0.1303, + "step": 7866 + }, + { + "epoch": 44.95428571428572, + "grad_norm": 726.4796142578125, + "learning_rate": 5.606349206349207e-06, + "loss": 0.1795, + "step": 7867 + }, + { + "epoch": 44.96, + "grad_norm": 54.38838195800781, + "learning_rate": 5.600000000000001e-06, + "loss": 0.136, + "step": 7868 + }, + { + "epoch": 44.965714285714284, + "grad_norm": 21.06503677368164, + "learning_rate": 5.593650793650794e-06, + "loss": 0.1315, + "step": 7869 + }, + { + "epoch": 44.97142857142857, + "grad_norm": 50.8365478515625, + "learning_rate": 5.5873015873015876e-06, + "loss": 0.1109, + "step": 7870 + }, + { + "epoch": 44.97714285714286, + "grad_norm": 65.22562408447266, + "learning_rate": 5.580952380952381e-06, + "loss": 0.2545, + "step": 7871 + }, + { + "epoch": 44.98285714285714, + "grad_norm": 26.463062286376953, + "learning_rate": 5.5746031746031745e-06, + "loss": 0.1487, + "step": 7872 + }, + { + "epoch": 44.988571428571426, + "grad_norm": 23.692731857299805, + "learning_rate": 5.568253968253968e-06, + "loss": 0.1816, + "step": 7873 + }, + { + "epoch": 44.994285714285716, + "grad_norm": 109.07501983642578, + "learning_rate": 5.561904761904762e-06, + "loss": 0.1082, + "step": 7874 + }, + { + "epoch": 45.0, + "grad_norm": 27.450218200683594, + "learning_rate": 5.555555555555556e-06, + "loss": 0.1496, + "step": 7875 + }, + { + "epoch": 45.0, + "eval_classes": 0, + "eval_loss": 0.5875609517097473, + "eval_map": 0.9372, + "eval_map_50": 0.9697, + "eval_map_75": 0.9638, + "eval_map_large": 0.9373, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9372, + "eval_map_small": -1.0, + "eval_mar_1": 0.7952, + "eval_mar_10": 0.9759, + "eval_mar_100": 0.9759, + "eval_mar_100_per_class": 0.9759, + "eval_mar_large": 0.9759, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.7437, + "eval_samples_per_second": 21.392, + "eval_steps_per_second": 2.692, + "step": 7875 + }, + { + "epoch": 45.005714285714284, + "grad_norm": 59.07111358642578, + "learning_rate": 5.54920634920635e-06, + "loss": 0.0999, + "step": 7876 + }, + { + "epoch": 45.011428571428574, + "grad_norm": 28.458738327026367, + "learning_rate": 5.542857142857144e-06, + "loss": 0.1813, + "step": 7877 + }, + { + "epoch": 45.01714285714286, + "grad_norm": 43.924617767333984, + "learning_rate": 5.536507936507937e-06, + "loss": 0.1198, + "step": 7878 + }, + { + "epoch": 45.02285714285714, + "grad_norm": 42.84949493408203, + "learning_rate": 5.530158730158731e-06, + "loss": 0.1692, + "step": 7879 + }, + { + "epoch": 45.02857142857143, + "grad_norm": 68.04060363769531, + "learning_rate": 5.523809523809524e-06, + "loss": 0.1177, + "step": 7880 + }, + { + "epoch": 45.034285714285716, + "grad_norm": 122.39205169677734, + "learning_rate": 5.5174603174603176e-06, + "loss": 0.1593, + "step": 7881 + }, + { + "epoch": 45.04, + "grad_norm": 58.70307540893555, + "learning_rate": 5.511111111111111e-06, + "loss": 0.2077, + "step": 7882 + }, + { + "epoch": 45.04571428571428, + "grad_norm": 72.9378433227539, + "learning_rate": 5.5047619047619045e-06, + "loss": 0.12, + "step": 7883 + }, + { + "epoch": 45.05142857142857, + "grad_norm": 60.564754486083984, + "learning_rate": 5.498412698412699e-06, + "loss": 0.1211, + "step": 7884 + }, + { + "epoch": 45.05714285714286, + "grad_norm": 42.58056640625, + "learning_rate": 5.492063492063492e-06, + "loss": 0.1291, + "step": 7885 + }, + { + "epoch": 45.06285714285714, + "grad_norm": 43.20802688598633, + "learning_rate": 5.485714285714286e-06, + "loss": 0.1323, + "step": 7886 + }, + { + "epoch": 45.06857142857143, + "grad_norm": 35.570735931396484, + "learning_rate": 5.479365079365079e-06, + "loss": 0.1062, + "step": 7887 + }, + { + "epoch": 45.074285714285715, + "grad_norm": 35.60871505737305, + "learning_rate": 5.473015873015874e-06, + "loss": 0.1661, + "step": 7888 + }, + { + "epoch": 45.08, + "grad_norm": 26.704849243164062, + "learning_rate": 5.466666666666667e-06, + "loss": 0.1529, + "step": 7889 + }, + { + "epoch": 45.08571428571429, + "grad_norm": 53.47730255126953, + "learning_rate": 5.460317460317461e-06, + "loss": 0.2025, + "step": 7890 + }, + { + "epoch": 45.09142857142857, + "grad_norm": 70.42041015625, + "learning_rate": 5.453968253968254e-06, + "loss": 0.3081, + "step": 7891 + }, + { + "epoch": 45.097142857142856, + "grad_norm": 92.92615509033203, + "learning_rate": 5.4476190476190476e-06, + "loss": 0.1179, + "step": 7892 + }, + { + "epoch": 45.10285714285714, + "grad_norm": 71.29002380371094, + "learning_rate": 5.441269841269842e-06, + "loss": 0.1377, + "step": 7893 + }, + { + "epoch": 45.10857142857143, + "grad_norm": 59.49152374267578, + "learning_rate": 5.434920634920635e-06, + "loss": 0.2838, + "step": 7894 + }, + { + "epoch": 45.114285714285714, + "grad_norm": 37.16068649291992, + "learning_rate": 5.428571428571429e-06, + "loss": 0.1124, + "step": 7895 + }, + { + "epoch": 45.12, + "grad_norm": 41.775028228759766, + "learning_rate": 5.422222222222222e-06, + "loss": 0.1479, + "step": 7896 + }, + { + "epoch": 45.12571428571429, + "grad_norm": 85.41061401367188, + "learning_rate": 5.415873015873016e-06, + "loss": 0.1679, + "step": 7897 + }, + { + "epoch": 45.13142857142857, + "grad_norm": 44.06031799316406, + "learning_rate": 5.409523809523809e-06, + "loss": 0.156, + "step": 7898 + }, + { + "epoch": 45.137142857142855, + "grad_norm": 39.47423553466797, + "learning_rate": 5.403174603174604e-06, + "loss": 0.1432, + "step": 7899 + }, + { + "epoch": 45.142857142857146, + "grad_norm": 28.359256744384766, + "learning_rate": 5.396825396825397e-06, + "loss": 0.1443, + "step": 7900 + }, + { + "epoch": 45.14857142857143, + "grad_norm": 34.1742057800293, + "learning_rate": 5.390476190476191e-06, + "loss": 0.1378, + "step": 7901 + }, + { + "epoch": 45.15428571428571, + "grad_norm": 33.425750732421875, + "learning_rate": 5.384126984126984e-06, + "loss": 0.1248, + "step": 7902 + }, + { + "epoch": 45.16, + "grad_norm": 44.18424987792969, + "learning_rate": 5.3777777777777784e-06, + "loss": 0.172, + "step": 7903 + }, + { + "epoch": 45.16571428571429, + "grad_norm": 44.47559356689453, + "learning_rate": 5.371428571428572e-06, + "loss": 0.1656, + "step": 7904 + }, + { + "epoch": 45.17142857142857, + "grad_norm": 44.56504440307617, + "learning_rate": 5.365079365079365e-06, + "loss": 0.1147, + "step": 7905 + }, + { + "epoch": 45.177142857142854, + "grad_norm": 28.468868255615234, + "learning_rate": 5.358730158730159e-06, + "loss": 0.1694, + "step": 7906 + }, + { + "epoch": 45.182857142857145, + "grad_norm": 37.43157196044922, + "learning_rate": 5.352380952380952e-06, + "loss": 0.1554, + "step": 7907 + }, + { + "epoch": 45.18857142857143, + "grad_norm": 40.826416015625, + "learning_rate": 5.346031746031746e-06, + "loss": 0.1427, + "step": 7908 + }, + { + "epoch": 45.19428571428571, + "grad_norm": 44.846988677978516, + "learning_rate": 5.339682539682539e-06, + "loss": 0.1875, + "step": 7909 + }, + { + "epoch": 45.2, + "grad_norm": 43.36846923828125, + "learning_rate": 5.333333333333334e-06, + "loss": 0.1556, + "step": 7910 + }, + { + "epoch": 45.205714285714286, + "grad_norm": 23.776344299316406, + "learning_rate": 5.326984126984127e-06, + "loss": 0.1688, + "step": 7911 + }, + { + "epoch": 45.21142857142857, + "grad_norm": 32.264644622802734, + "learning_rate": 5.3206349206349215e-06, + "loss": 0.1537, + "step": 7912 + }, + { + "epoch": 45.21714285714286, + "grad_norm": 73.19031524658203, + "learning_rate": 5.314285714285715e-06, + "loss": 0.145, + "step": 7913 + }, + { + "epoch": 45.222857142857144, + "grad_norm": 59.46937942504883, + "learning_rate": 5.3079365079365084e-06, + "loss": 0.1153, + "step": 7914 + }, + { + "epoch": 45.22857142857143, + "grad_norm": 17.42439079284668, + "learning_rate": 5.301587301587302e-06, + "loss": 0.0885, + "step": 7915 + }, + { + "epoch": 45.23428571428571, + "grad_norm": 57.91249465942383, + "learning_rate": 5.295238095238095e-06, + "loss": 0.1386, + "step": 7916 + }, + { + "epoch": 45.24, + "grad_norm": 48.12847900390625, + "learning_rate": 5.288888888888889e-06, + "loss": 0.1431, + "step": 7917 + }, + { + "epoch": 45.245714285714286, + "grad_norm": 1868.663818359375, + "learning_rate": 5.282539682539682e-06, + "loss": 0.1713, + "step": 7918 + }, + { + "epoch": 45.25142857142857, + "grad_norm": 40.173118591308594, + "learning_rate": 5.276190476190476e-06, + "loss": 0.2278, + "step": 7919 + }, + { + "epoch": 45.25714285714286, + "grad_norm": 87.19586944580078, + "learning_rate": 5.26984126984127e-06, + "loss": 0.1878, + "step": 7920 + }, + { + "epoch": 45.26285714285714, + "grad_norm": 68.53565216064453, + "learning_rate": 5.263492063492064e-06, + "loss": 0.1328, + "step": 7921 + }, + { + "epoch": 45.26857142857143, + "grad_norm": 47.07212829589844, + "learning_rate": 5.257142857142858e-06, + "loss": 0.2188, + "step": 7922 + }, + { + "epoch": 45.27428571428572, + "grad_norm": 65.88520812988281, + "learning_rate": 5.2507936507936515e-06, + "loss": 0.1713, + "step": 7923 + }, + { + "epoch": 45.28, + "grad_norm": 1658.98828125, + "learning_rate": 5.244444444444445e-06, + "loss": 0.2176, + "step": 7924 + }, + { + "epoch": 45.285714285714285, + "grad_norm": 30.800500869750977, + "learning_rate": 5.2380952380952384e-06, + "loss": 0.1687, + "step": 7925 + }, + { + "epoch": 45.29142857142857, + "grad_norm": 718.1648559570312, + "learning_rate": 5.231746031746032e-06, + "loss": 0.2337, + "step": 7926 + }, + { + "epoch": 45.29714285714286, + "grad_norm": 77.49954223632812, + "learning_rate": 5.225396825396825e-06, + "loss": 0.1318, + "step": 7927 + }, + { + "epoch": 45.30285714285714, + "grad_norm": 52.57571029663086, + "learning_rate": 5.219047619047619e-06, + "loss": 0.18, + "step": 7928 + }, + { + "epoch": 45.308571428571426, + "grad_norm": 35.31848907470703, + "learning_rate": 5.212698412698413e-06, + "loss": 0.1682, + "step": 7929 + }, + { + "epoch": 45.31428571428572, + "grad_norm": 44.66339111328125, + "learning_rate": 5.206349206349207e-06, + "loss": 0.1446, + "step": 7930 + }, + { + "epoch": 45.32, + "grad_norm": 19.15682601928711, + "learning_rate": 5.2e-06, + "loss": 0.1189, + "step": 7931 + }, + { + "epoch": 45.325714285714284, + "grad_norm": 27.01017189025879, + "learning_rate": 5.193650793650794e-06, + "loss": 0.1335, + "step": 7932 + }, + { + "epoch": 45.331428571428575, + "grad_norm": 51.86173629760742, + "learning_rate": 5.187301587301588e-06, + "loss": 0.1628, + "step": 7933 + }, + { + "epoch": 45.33714285714286, + "grad_norm": 36.805477142333984, + "learning_rate": 5.1809523809523815e-06, + "loss": 0.1129, + "step": 7934 + }, + { + "epoch": 45.34285714285714, + "grad_norm": 41.201351165771484, + "learning_rate": 5.174603174603175e-06, + "loss": 0.1683, + "step": 7935 + }, + { + "epoch": 45.348571428571425, + "grad_norm": 72.04317474365234, + "learning_rate": 5.1682539682539685e-06, + "loss": 0.1878, + "step": 7936 + }, + { + "epoch": 45.354285714285716, + "grad_norm": 53.820682525634766, + "learning_rate": 5.161904761904762e-06, + "loss": 0.1308, + "step": 7937 + }, + { + "epoch": 45.36, + "grad_norm": 68.02818298339844, + "learning_rate": 5.155555555555555e-06, + "loss": 0.2302, + "step": 7938 + }, + { + "epoch": 45.36571428571428, + "grad_norm": 34.54709243774414, + "learning_rate": 5.14920634920635e-06, + "loss": 0.1217, + "step": 7939 + }, + { + "epoch": 45.371428571428574, + "grad_norm": 57.59125900268555, + "learning_rate": 5.142857142857143e-06, + "loss": 0.1571, + "step": 7940 + }, + { + "epoch": 45.37714285714286, + "grad_norm": 20.881120681762695, + "learning_rate": 5.136507936507937e-06, + "loss": 0.1987, + "step": 7941 + }, + { + "epoch": 45.38285714285714, + "grad_norm": 633.7258911132812, + "learning_rate": 5.13015873015873e-06, + "loss": 0.2172, + "step": 7942 + }, + { + "epoch": 45.38857142857143, + "grad_norm": 57.42353057861328, + "learning_rate": 5.123809523809524e-06, + "loss": 0.1497, + "step": 7943 + }, + { + "epoch": 45.394285714285715, + "grad_norm": 29.66927719116211, + "learning_rate": 5.117460317460317e-06, + "loss": 0.1191, + "step": 7944 + }, + { + "epoch": 45.4, + "grad_norm": 37.49243927001953, + "learning_rate": 5.1111111111111115e-06, + "loss": 0.151, + "step": 7945 + }, + { + "epoch": 45.40571428571428, + "grad_norm": 66.22801971435547, + "learning_rate": 5.104761904761905e-06, + "loss": 0.1189, + "step": 7946 + }, + { + "epoch": 45.41142857142857, + "grad_norm": 61.25311279296875, + "learning_rate": 5.0984126984126985e-06, + "loss": 0.1553, + "step": 7947 + }, + { + "epoch": 45.417142857142856, + "grad_norm": 20.99935531616211, + "learning_rate": 5.092063492063493e-06, + "loss": 0.1017, + "step": 7948 + }, + { + "epoch": 45.42285714285714, + "grad_norm": 342.1048583984375, + "learning_rate": 5.085714285714286e-06, + "loss": 0.117, + "step": 7949 + }, + { + "epoch": 45.42857142857143, + "grad_norm": 29.054729461669922, + "learning_rate": 5.07936507936508e-06, + "loss": 0.1472, + "step": 7950 + }, + { + "epoch": 45.434285714285714, + "grad_norm": 46.416114807128906, + "learning_rate": 5.073015873015873e-06, + "loss": 0.1255, + "step": 7951 + }, + { + "epoch": 45.44, + "grad_norm": 37.09671401977539, + "learning_rate": 5.066666666666667e-06, + "loss": 0.1401, + "step": 7952 + }, + { + "epoch": 45.44571428571429, + "grad_norm": 55.9088020324707, + "learning_rate": 5.06031746031746e-06, + "loss": 0.1078, + "step": 7953 + }, + { + "epoch": 45.45142857142857, + "grad_norm": 27.684017181396484, + "learning_rate": 5.053968253968254e-06, + "loss": 0.1642, + "step": 7954 + }, + { + "epoch": 45.457142857142856, + "grad_norm": 56.756187438964844, + "learning_rate": 5.047619047619047e-06, + "loss": 0.1364, + "step": 7955 + }, + { + "epoch": 45.462857142857146, + "grad_norm": 32.92528533935547, + "learning_rate": 5.0412698412698415e-06, + "loss": 0.1332, + "step": 7956 + }, + { + "epoch": 45.46857142857143, + "grad_norm": 80.59513092041016, + "learning_rate": 5.034920634920636e-06, + "loss": 0.1283, + "step": 7957 + }, + { + "epoch": 45.47428571428571, + "grad_norm": 39.07324981689453, + "learning_rate": 5.028571428571429e-06, + "loss": 0.1259, + "step": 7958 + }, + { + "epoch": 45.48, + "grad_norm": 45.028076171875, + "learning_rate": 5.022222222222223e-06, + "loss": 0.1298, + "step": 7959 + }, + { + "epoch": 45.48571428571429, + "grad_norm": 42.70392608642578, + "learning_rate": 5.015873015873016e-06, + "loss": 0.1357, + "step": 7960 + }, + { + "epoch": 45.49142857142857, + "grad_norm": 29.76717758178711, + "learning_rate": 5.00952380952381e-06, + "loss": 0.1171, + "step": 7961 + }, + { + "epoch": 45.497142857142855, + "grad_norm": 68.0356216430664, + "learning_rate": 5.003174603174603e-06, + "loss": 0.1502, + "step": 7962 + }, + { + "epoch": 45.502857142857145, + "grad_norm": 79.7303237915039, + "learning_rate": 4.996825396825397e-06, + "loss": 0.1387, + "step": 7963 + }, + { + "epoch": 45.50857142857143, + "grad_norm": 34.87709045410156, + "learning_rate": 4.99047619047619e-06, + "loss": 0.1394, + "step": 7964 + }, + { + "epoch": 45.51428571428571, + "grad_norm": 58.27235412597656, + "learning_rate": 4.9841269841269845e-06, + "loss": 0.1435, + "step": 7965 + }, + { + "epoch": 45.52, + "grad_norm": 34.056663513183594, + "learning_rate": 4.977777777777778e-06, + "loss": 0.1526, + "step": 7966 + }, + { + "epoch": 45.52571428571429, + "grad_norm": 35.867698669433594, + "learning_rate": 4.9714285714285715e-06, + "loss": 0.0987, + "step": 7967 + }, + { + "epoch": 45.53142857142857, + "grad_norm": 92.24200439453125, + "learning_rate": 4.965079365079366e-06, + "loss": 0.1502, + "step": 7968 + }, + { + "epoch": 45.537142857142854, + "grad_norm": 229.3932342529297, + "learning_rate": 4.958730158730159e-06, + "loss": 0.1199, + "step": 7969 + }, + { + "epoch": 45.542857142857144, + "grad_norm": 38.768646240234375, + "learning_rate": 4.952380952380953e-06, + "loss": 0.1456, + "step": 7970 + }, + { + "epoch": 45.54857142857143, + "grad_norm": 166.4169464111328, + "learning_rate": 4.946031746031746e-06, + "loss": 0.3404, + "step": 7971 + }, + { + "epoch": 45.55428571428571, + "grad_norm": 22.784006118774414, + "learning_rate": 4.93968253968254e-06, + "loss": 0.1605, + "step": 7972 + }, + { + "epoch": 45.56, + "grad_norm": 88.45488739013672, + "learning_rate": 4.933333333333333e-06, + "loss": 0.1392, + "step": 7973 + }, + { + "epoch": 45.565714285714286, + "grad_norm": 20.583641052246094, + "learning_rate": 4.926984126984127e-06, + "loss": 0.12, + "step": 7974 + }, + { + "epoch": 45.57142857142857, + "grad_norm": 28.72245979309082, + "learning_rate": 4.920634920634921e-06, + "loss": 0.1228, + "step": 7975 + }, + { + "epoch": 45.57714285714286, + "grad_norm": 49.244537353515625, + "learning_rate": 4.9142857142857145e-06, + "loss": 0.133, + "step": 7976 + }, + { + "epoch": 45.582857142857144, + "grad_norm": 64.7433090209961, + "learning_rate": 4.907936507936508e-06, + "loss": 0.1481, + "step": 7977 + }, + { + "epoch": 45.58857142857143, + "grad_norm": 43.253334045410156, + "learning_rate": 4.9015873015873015e-06, + "loss": 0.1384, + "step": 7978 + }, + { + "epoch": 45.59428571428572, + "grad_norm": 102.45452880859375, + "learning_rate": 4.895238095238096e-06, + "loss": 0.1451, + "step": 7979 + }, + { + "epoch": 45.6, + "grad_norm": 59.437744140625, + "learning_rate": 4.888888888888889e-06, + "loss": 0.1486, + "step": 7980 + }, + { + "epoch": 45.605714285714285, + "grad_norm": 50.85014724731445, + "learning_rate": 4.882539682539683e-06, + "loss": 0.1787, + "step": 7981 + }, + { + "epoch": 45.61142857142857, + "grad_norm": 95.12709045410156, + "learning_rate": 4.876190476190476e-06, + "loss": 0.1944, + "step": 7982 + }, + { + "epoch": 45.61714285714286, + "grad_norm": 44.42409133911133, + "learning_rate": 4.86984126984127e-06, + "loss": 0.1506, + "step": 7983 + }, + { + "epoch": 45.62285714285714, + "grad_norm": 27.613187789916992, + "learning_rate": 4.863492063492064e-06, + "loss": 0.1313, + "step": 7984 + }, + { + "epoch": 45.628571428571426, + "grad_norm": 37.380916595458984, + "learning_rate": 4.857142857142858e-06, + "loss": 0.1489, + "step": 7985 + }, + { + "epoch": 45.63428571428572, + "grad_norm": 18.24190902709961, + "learning_rate": 4.850793650793651e-06, + "loss": 0.1606, + "step": 7986 + }, + { + "epoch": 45.64, + "grad_norm": 21.4625244140625, + "learning_rate": 4.8444444444444446e-06, + "loss": 0.1006, + "step": 7987 + }, + { + "epoch": 45.645714285714284, + "grad_norm": 25.19818687438965, + "learning_rate": 4.838095238095238e-06, + "loss": 0.1763, + "step": 7988 + }, + { + "epoch": 45.651428571428575, + "grad_norm": 42.4224853515625, + "learning_rate": 4.8317460317460315e-06, + "loss": 0.1362, + "step": 7989 + }, + { + "epoch": 45.65714285714286, + "grad_norm": 54.029212951660156, + "learning_rate": 4.825396825396826e-06, + "loss": 0.1629, + "step": 7990 + }, + { + "epoch": 45.66285714285714, + "grad_norm": 38.289039611816406, + "learning_rate": 4.819047619047619e-06, + "loss": 0.0991, + "step": 7991 + }, + { + "epoch": 45.668571428571425, + "grad_norm": 30.55815887451172, + "learning_rate": 4.812698412698413e-06, + "loss": 0.1261, + "step": 7992 + }, + { + "epoch": 45.674285714285716, + "grad_norm": 52.03242874145508, + "learning_rate": 4.806349206349207e-06, + "loss": 0.1151, + "step": 7993 + }, + { + "epoch": 45.68, + "grad_norm": 54.05915832519531, + "learning_rate": 4.800000000000001e-06, + "loss": 0.1356, + "step": 7994 + }, + { + "epoch": 45.68571428571428, + "grad_norm": 59.59641647338867, + "learning_rate": 4.793650793650794e-06, + "loss": 0.1261, + "step": 7995 + }, + { + "epoch": 45.691428571428574, + "grad_norm": 42.24554443359375, + "learning_rate": 4.787301587301588e-06, + "loss": 0.2133, + "step": 7996 + }, + { + "epoch": 45.69714285714286, + "grad_norm": 32.48039627075195, + "learning_rate": 4.780952380952381e-06, + "loss": 0.1592, + "step": 7997 + }, + { + "epoch": 45.70285714285714, + "grad_norm": 66.41365051269531, + "learning_rate": 4.7746031746031746e-06, + "loss": 0.1625, + "step": 7998 + }, + { + "epoch": 45.70857142857143, + "grad_norm": 23.149810791015625, + "learning_rate": 4.768253968253968e-06, + "loss": 0.231, + "step": 7999 + }, + { + "epoch": 45.714285714285715, + "grad_norm": 75.46231842041016, + "learning_rate": 4.7619047619047615e-06, + "loss": 0.174, + "step": 8000 + }, + { + "epoch": 45.72, + "grad_norm": 32.4194450378418, + "learning_rate": 4.755555555555556e-06, + "loss": 0.1508, + "step": 8001 + }, + { + "epoch": 45.72571428571428, + "grad_norm": 46.35731506347656, + "learning_rate": 4.749206349206349e-06, + "loss": 0.286, + "step": 8002 + }, + { + "epoch": 45.73142857142857, + "grad_norm": 44.204750061035156, + "learning_rate": 4.742857142857144e-06, + "loss": 0.145, + "step": 8003 + }, + { + "epoch": 45.73714285714286, + "grad_norm": 20.596189498901367, + "learning_rate": 4.736507936507937e-06, + "loss": 0.1717, + "step": 8004 + }, + { + "epoch": 45.74285714285714, + "grad_norm": 52.89987564086914, + "learning_rate": 4.730158730158731e-06, + "loss": 0.1003, + "step": 8005 + }, + { + "epoch": 45.74857142857143, + "grad_norm": 47.98521041870117, + "learning_rate": 4.723809523809524e-06, + "loss": 0.1458, + "step": 8006 + }, + { + "epoch": 45.754285714285714, + "grad_norm": 58.42574691772461, + "learning_rate": 4.717460317460318e-06, + "loss": 0.1715, + "step": 8007 + }, + { + "epoch": 45.76, + "grad_norm": 23.98272132873535, + "learning_rate": 4.711111111111111e-06, + "loss": 0.1402, + "step": 8008 + }, + { + "epoch": 45.76571428571429, + "grad_norm": 30.55145835876465, + "learning_rate": 4.7047619047619046e-06, + "loss": 0.1579, + "step": 8009 + }, + { + "epoch": 45.77142857142857, + "grad_norm": 56.62355422973633, + "learning_rate": 4.698412698412698e-06, + "loss": 0.1431, + "step": 8010 + }, + { + "epoch": 45.777142857142856, + "grad_norm": 23.12961769104004, + "learning_rate": 4.692063492063492e-06, + "loss": 0.1337, + "step": 8011 + }, + { + "epoch": 45.78285714285714, + "grad_norm": 67.6146240234375, + "learning_rate": 4.685714285714286e-06, + "loss": 0.1221, + "step": 8012 + }, + { + "epoch": 45.78857142857143, + "grad_norm": 59.8460693359375, + "learning_rate": 4.67936507936508e-06, + "loss": 0.148, + "step": 8013 + }, + { + "epoch": 45.794285714285714, + "grad_norm": 40.833213806152344, + "learning_rate": 4.673015873015874e-06, + "loss": 0.0933, + "step": 8014 + }, + { + "epoch": 45.8, + "grad_norm": 40.59428787231445, + "learning_rate": 4.666666666666667e-06, + "loss": 0.1348, + "step": 8015 + }, + { + "epoch": 45.80571428571429, + "grad_norm": 46.43875503540039, + "learning_rate": 4.660317460317461e-06, + "loss": 0.1362, + "step": 8016 + }, + { + "epoch": 45.81142857142857, + "grad_norm": 29.034568786621094, + "learning_rate": 4.653968253968254e-06, + "loss": 0.1268, + "step": 8017 + }, + { + "epoch": 45.817142857142855, + "grad_norm": 46.89924240112305, + "learning_rate": 4.647619047619048e-06, + "loss": 0.1295, + "step": 8018 + }, + { + "epoch": 45.822857142857146, + "grad_norm": 30.950572967529297, + "learning_rate": 4.641269841269841e-06, + "loss": 0.1392, + "step": 8019 + }, + { + "epoch": 45.82857142857143, + "grad_norm": 40.0648307800293, + "learning_rate": 4.634920634920635e-06, + "loss": 0.1877, + "step": 8020 + }, + { + "epoch": 45.83428571428571, + "grad_norm": 72.41403198242188, + "learning_rate": 4.628571428571429e-06, + "loss": 0.167, + "step": 8021 + }, + { + "epoch": 45.84, + "grad_norm": 66.96370697021484, + "learning_rate": 4.622222222222222e-06, + "loss": 0.1308, + "step": 8022 + }, + { + "epoch": 45.84571428571429, + "grad_norm": 53.45942306518555, + "learning_rate": 4.615873015873016e-06, + "loss": 0.1318, + "step": 8023 + }, + { + "epoch": 45.85142857142857, + "grad_norm": 150.67694091796875, + "learning_rate": 4.609523809523809e-06, + "loss": 0.1171, + "step": 8024 + }, + { + "epoch": 45.857142857142854, + "grad_norm": 52.64244079589844, + "learning_rate": 4.603174603174604e-06, + "loss": 0.1788, + "step": 8025 + }, + { + "epoch": 45.862857142857145, + "grad_norm": 25.661348342895508, + "learning_rate": 4.596825396825397e-06, + "loss": 0.1838, + "step": 8026 + }, + { + "epoch": 45.86857142857143, + "grad_norm": 33.09591293334961, + "learning_rate": 4.590476190476191e-06, + "loss": 0.1409, + "step": 8027 + }, + { + "epoch": 45.87428571428571, + "grad_norm": 27.691877365112305, + "learning_rate": 4.584126984126984e-06, + "loss": 0.1439, + "step": 8028 + }, + { + "epoch": 45.88, + "grad_norm": 18.00971031188965, + "learning_rate": 4.5777777777777785e-06, + "loss": 0.1229, + "step": 8029 + }, + { + "epoch": 45.885714285714286, + "grad_norm": 56.43489074707031, + "learning_rate": 4.571428571428572e-06, + "loss": 0.1047, + "step": 8030 + }, + { + "epoch": 45.89142857142857, + "grad_norm": 33.09745788574219, + "learning_rate": 4.5650793650793654e-06, + "loss": 0.121, + "step": 8031 + }, + { + "epoch": 45.89714285714286, + "grad_norm": 30.577470779418945, + "learning_rate": 4.558730158730159e-06, + "loss": 0.1106, + "step": 8032 + }, + { + "epoch": 45.902857142857144, + "grad_norm": 28.69456672668457, + "learning_rate": 4.552380952380952e-06, + "loss": 0.1294, + "step": 8033 + }, + { + "epoch": 45.90857142857143, + "grad_norm": 50.44547653198242, + "learning_rate": 4.546031746031746e-06, + "loss": 0.1363, + "step": 8034 + }, + { + "epoch": 45.91428571428571, + "grad_norm": 677.958984375, + "learning_rate": 4.539682539682539e-06, + "loss": 0.1529, + "step": 8035 + }, + { + "epoch": 45.92, + "grad_norm": 49.4383430480957, + "learning_rate": 4.533333333333334e-06, + "loss": 0.1313, + "step": 8036 + }, + { + "epoch": 45.925714285714285, + "grad_norm": 129.2937469482422, + "learning_rate": 4.526984126984127e-06, + "loss": 0.1538, + "step": 8037 + }, + { + "epoch": 45.93142857142857, + "grad_norm": 42.8577766418457, + "learning_rate": 4.520634920634921e-06, + "loss": 0.1205, + "step": 8038 + }, + { + "epoch": 45.93714285714286, + "grad_norm": 48.1625862121582, + "learning_rate": 4.514285714285715e-06, + "loss": 0.1368, + "step": 8039 + }, + { + "epoch": 45.94285714285714, + "grad_norm": 56.93679428100586, + "learning_rate": 4.5079365079365085e-06, + "loss": 0.1108, + "step": 8040 + }, + { + "epoch": 45.94857142857143, + "grad_norm": 28.19448471069336, + "learning_rate": 4.501587301587302e-06, + "loss": 0.2028, + "step": 8041 + }, + { + "epoch": 45.95428571428572, + "grad_norm": 53.25984191894531, + "learning_rate": 4.4952380952380954e-06, + "loss": 0.1149, + "step": 8042 + }, + { + "epoch": 45.96, + "grad_norm": 32.59288024902344, + "learning_rate": 4.488888888888889e-06, + "loss": 0.1506, + "step": 8043 + }, + { + "epoch": 45.965714285714284, + "grad_norm": 31.4505558013916, + "learning_rate": 4.482539682539682e-06, + "loss": 0.1363, + "step": 8044 + }, + { + "epoch": 45.97142857142857, + "grad_norm": 30.830820083618164, + "learning_rate": 4.476190476190476e-06, + "loss": 0.1905, + "step": 8045 + }, + { + "epoch": 45.97714285714286, + "grad_norm": 44.29206848144531, + "learning_rate": 4.469841269841269e-06, + "loss": 0.1376, + "step": 8046 + }, + { + "epoch": 45.98285714285714, + "grad_norm": 727.3399658203125, + "learning_rate": 4.463492063492064e-06, + "loss": 0.1943, + "step": 8047 + }, + { + "epoch": 45.988571428571426, + "grad_norm": 26.93597984313965, + "learning_rate": 4.457142857142858e-06, + "loss": 0.1025, + "step": 8048 + }, + { + "epoch": 45.994285714285716, + "grad_norm": 54.799049377441406, + "learning_rate": 4.4507936507936515e-06, + "loss": 0.1231, + "step": 8049 + }, + { + "epoch": 46.0, + "grad_norm": 323.103515625, + "learning_rate": 4.444444444444445e-06, + "loss": 0.1579, + "step": 8050 + }, + { + "epoch": 46.0, + "eval_classes": 0, + "eval_loss": 0.5804963111877441, + "eval_map": 0.9393, + "eval_map_50": 0.971, + "eval_map_75": 0.9643, + "eval_map_large": 0.9393, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9393, + "eval_map_small": -1.0, + "eval_mar_1": 0.7914, + "eval_mar_10": 0.9752, + "eval_mar_100": 0.9765, + "eval_mar_100_per_class": 0.9765, + "eval_mar_large": 0.9765, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 12.9868, + "eval_samples_per_second": 22.638, + "eval_steps_per_second": 2.849, + "step": 8050 + }, + { + "epoch": 46.005714285714284, + "grad_norm": 38.558109283447266, + "learning_rate": 4.4380952380952385e-06, + "loss": 0.1036, + "step": 8051 + }, + { + "epoch": 46.011428571428574, + "grad_norm": 33.96249771118164, + "learning_rate": 4.431746031746032e-06, + "loss": 0.139, + "step": 8052 + }, + { + "epoch": 46.01714285714286, + "grad_norm": 72.55626678466797, + "learning_rate": 4.4253968253968254e-06, + "loss": 0.1293, + "step": 8053 + }, + { + "epoch": 46.02285714285714, + "grad_norm": 40.11007308959961, + "learning_rate": 4.419047619047619e-06, + "loss": 0.1389, + "step": 8054 + }, + { + "epoch": 46.02857142857143, + "grad_norm": 45.6063346862793, + "learning_rate": 4.412698412698412e-06, + "loss": 0.1658, + "step": 8055 + }, + { + "epoch": 46.034285714285716, + "grad_norm": 160.0494384765625, + "learning_rate": 4.406349206349207e-06, + "loss": 0.198, + "step": 8056 + }, + { + "epoch": 46.04, + "grad_norm": 59.35651779174805, + "learning_rate": 4.4e-06, + "loss": 0.1533, + "step": 8057 + }, + { + "epoch": 46.04571428571428, + "grad_norm": 60.67522048950195, + "learning_rate": 4.393650793650794e-06, + "loss": 0.1191, + "step": 8058 + }, + { + "epoch": 46.05142857142857, + "grad_norm": 36.21040725708008, + "learning_rate": 4.387301587301588e-06, + "loss": 0.1049, + "step": 8059 + }, + { + "epoch": 46.05714285714286, + "grad_norm": 25.05276107788086, + "learning_rate": 4.3809523809523815e-06, + "loss": 0.142, + "step": 8060 + }, + { + "epoch": 46.06285714285714, + "grad_norm": 37.21646499633789, + "learning_rate": 4.374603174603175e-06, + "loss": 0.1007, + "step": 8061 + }, + { + "epoch": 46.06857142857143, + "grad_norm": 92.95510864257812, + "learning_rate": 4.3682539682539685e-06, + "loss": 0.1653, + "step": 8062 + }, + { + "epoch": 46.074285714285715, + "grad_norm": 45.78368377685547, + "learning_rate": 4.361904761904762e-06, + "loss": 0.0935, + "step": 8063 + }, + { + "epoch": 46.08, + "grad_norm": 78.9537582397461, + "learning_rate": 4.3555555555555555e-06, + "loss": 0.1634, + "step": 8064 + }, + { + "epoch": 46.08571428571429, + "grad_norm": 47.754878997802734, + "learning_rate": 4.34920634920635e-06, + "loss": 0.1306, + "step": 8065 + }, + { + "epoch": 46.09142857142857, + "grad_norm": 38.64006805419922, + "learning_rate": 4.342857142857143e-06, + "loss": 0.1382, + "step": 8066 + }, + { + "epoch": 46.097142857142856, + "grad_norm": 47.222618103027344, + "learning_rate": 4.336507936507937e-06, + "loss": 0.1524, + "step": 8067 + }, + { + "epoch": 46.10285714285714, + "grad_norm": 28.96839714050293, + "learning_rate": 4.33015873015873e-06, + "loss": 0.138, + "step": 8068 + }, + { + "epoch": 46.10857142857143, + "grad_norm": 28.85514259338379, + "learning_rate": 4.323809523809524e-06, + "loss": 0.1345, + "step": 8069 + }, + { + "epoch": 46.114285714285714, + "grad_norm": 120.97235107421875, + "learning_rate": 4.317460317460318e-06, + "loss": 0.2, + "step": 8070 + }, + { + "epoch": 46.12, + "grad_norm": 45.22092056274414, + "learning_rate": 4.3111111111111115e-06, + "loss": 0.1386, + "step": 8071 + }, + { + "epoch": 46.12571428571429, + "grad_norm": 17.343658447265625, + "learning_rate": 4.304761904761905e-06, + "loss": 0.1392, + "step": 8072 + }, + { + "epoch": 46.13142857142857, + "grad_norm": 55.66539001464844, + "learning_rate": 4.2984126984126985e-06, + "loss": 0.1834, + "step": 8073 + }, + { + "epoch": 46.137142857142855, + "grad_norm": 65.15888977050781, + "learning_rate": 4.292063492063492e-06, + "loss": 0.1463, + "step": 8074 + }, + { + "epoch": 46.142857142857146, + "grad_norm": 41.014129638671875, + "learning_rate": 4.285714285714286e-06, + "loss": 0.1523, + "step": 8075 + }, + { + "epoch": 46.14857142857143, + "grad_norm": 30.696699142456055, + "learning_rate": 4.27936507936508e-06, + "loss": 0.1527, + "step": 8076 + }, + { + "epoch": 46.15428571428571, + "grad_norm": 35.69332504272461, + "learning_rate": 4.273015873015873e-06, + "loss": 0.1674, + "step": 8077 + }, + { + "epoch": 46.16, + "grad_norm": 35.99774932861328, + "learning_rate": 4.266666666666667e-06, + "loss": 0.1502, + "step": 8078 + }, + { + "epoch": 46.16571428571429, + "grad_norm": 106.03459167480469, + "learning_rate": 4.26031746031746e-06, + "loss": 0.116, + "step": 8079 + }, + { + "epoch": 46.17142857142857, + "grad_norm": 39.56972885131836, + "learning_rate": 4.253968253968254e-06, + "loss": 0.4655, + "step": 8080 + }, + { + "epoch": 46.177142857142854, + "grad_norm": 17.103309631347656, + "learning_rate": 4.247619047619047e-06, + "loss": 0.1246, + "step": 8081 + }, + { + "epoch": 46.182857142857145, + "grad_norm": 23.25718879699707, + "learning_rate": 4.2412698412698415e-06, + "loss": 0.1346, + "step": 8082 + }, + { + "epoch": 46.18857142857143, + "grad_norm": 2234.8271484375, + "learning_rate": 4.234920634920635e-06, + "loss": 0.2929, + "step": 8083 + }, + { + "epoch": 46.19428571428571, + "grad_norm": 123.91931915283203, + "learning_rate": 4.228571428571429e-06, + "loss": 0.1155, + "step": 8084 + }, + { + "epoch": 46.2, + "grad_norm": 22.70152473449707, + "learning_rate": 4.222222222222223e-06, + "loss": 0.132, + "step": 8085 + }, + { + "epoch": 46.205714285714286, + "grad_norm": 35.118003845214844, + "learning_rate": 4.215873015873016e-06, + "loss": 0.13, + "step": 8086 + }, + { + "epoch": 46.21142857142857, + "grad_norm": 38.66529083251953, + "learning_rate": 4.20952380952381e-06, + "loss": 0.1394, + "step": 8087 + }, + { + "epoch": 46.21714285714286, + "grad_norm": 45.83235168457031, + "learning_rate": 4.203174603174603e-06, + "loss": 0.0785, + "step": 8088 + }, + { + "epoch": 46.222857142857144, + "grad_norm": 28.21929359436035, + "learning_rate": 4.196825396825397e-06, + "loss": 0.1272, + "step": 8089 + }, + { + "epoch": 46.22857142857143, + "grad_norm": 27.014911651611328, + "learning_rate": 4.19047619047619e-06, + "loss": 0.1115, + "step": 8090 + }, + { + "epoch": 46.23428571428571, + "grad_norm": 70.28314208984375, + "learning_rate": 4.184126984126984e-06, + "loss": 0.1855, + "step": 8091 + }, + { + "epoch": 46.24, + "grad_norm": 97.12055206298828, + "learning_rate": 4.177777777777778e-06, + "loss": 0.2051, + "step": 8092 + }, + { + "epoch": 46.245714285714286, + "grad_norm": 92.34823608398438, + "learning_rate": 4.1714285714285715e-06, + "loss": 0.1157, + "step": 8093 + }, + { + "epoch": 46.25142857142857, + "grad_norm": 30.1092586517334, + "learning_rate": 4.165079365079366e-06, + "loss": 0.1308, + "step": 8094 + }, + { + "epoch": 46.25714285714286, + "grad_norm": 60.77410888671875, + "learning_rate": 4.158730158730159e-06, + "loss": 0.1948, + "step": 8095 + }, + { + "epoch": 46.26285714285714, + "grad_norm": 54.656578063964844, + "learning_rate": 4.152380952380953e-06, + "loss": 0.1284, + "step": 8096 + }, + { + "epoch": 46.26857142857143, + "grad_norm": 18.921110153198242, + "learning_rate": 4.146031746031746e-06, + "loss": 0.1085, + "step": 8097 + }, + { + "epoch": 46.27428571428572, + "grad_norm": 32.94758605957031, + "learning_rate": 4.13968253968254e-06, + "loss": 0.1655, + "step": 8098 + }, + { + "epoch": 46.28, + "grad_norm": 52.38801193237305, + "learning_rate": 4.133333333333333e-06, + "loss": 0.0871, + "step": 8099 + }, + { + "epoch": 46.285714285714285, + "grad_norm": 60.48457336425781, + "learning_rate": 4.126984126984127e-06, + "loss": 0.1785, + "step": 8100 + }, + { + "epoch": 46.29142857142857, + "grad_norm": 21.47099494934082, + "learning_rate": 4.120634920634921e-06, + "loss": 0.093, + "step": 8101 + }, + { + "epoch": 46.29714285714286, + "grad_norm": 47.40420150756836, + "learning_rate": 4.114285714285715e-06, + "loss": 0.1423, + "step": 8102 + }, + { + "epoch": 46.30285714285714, + "grad_norm": 27.537425994873047, + "learning_rate": 4.107936507936508e-06, + "loss": 0.125, + "step": 8103 + }, + { + "epoch": 46.308571428571426, + "grad_norm": 25.858352661132812, + "learning_rate": 4.1015873015873015e-06, + "loss": 0.1388, + "step": 8104 + }, + { + "epoch": 46.31428571428572, + "grad_norm": 37.65393829345703, + "learning_rate": 4.095238095238096e-06, + "loss": 0.1413, + "step": 8105 + }, + { + "epoch": 46.32, + "grad_norm": 34.300838470458984, + "learning_rate": 4.088888888888889e-06, + "loss": 0.1718, + "step": 8106 + }, + { + "epoch": 46.325714285714284, + "grad_norm": 225.12020874023438, + "learning_rate": 4.082539682539683e-06, + "loss": 0.1327, + "step": 8107 + }, + { + "epoch": 46.331428571428575, + "grad_norm": 91.56395721435547, + "learning_rate": 4.076190476190476e-06, + "loss": 0.1801, + "step": 8108 + }, + { + "epoch": 46.33714285714286, + "grad_norm": 146.67095947265625, + "learning_rate": 4.06984126984127e-06, + "loss": 0.1194, + "step": 8109 + }, + { + "epoch": 46.34285714285714, + "grad_norm": 27.21466827392578, + "learning_rate": 4.063492063492063e-06, + "loss": 0.1009, + "step": 8110 + }, + { + "epoch": 46.348571428571425, + "grad_norm": 57.456424713134766, + "learning_rate": 4.057142857142858e-06, + "loss": 0.1041, + "step": 8111 + }, + { + "epoch": 46.354285714285716, + "grad_norm": 51.558509826660156, + "learning_rate": 4.050793650793651e-06, + "loss": 0.1208, + "step": 8112 + }, + { + "epoch": 46.36, + "grad_norm": 72.62211608886719, + "learning_rate": 4.044444444444445e-06, + "loss": 0.2069, + "step": 8113 + }, + { + "epoch": 46.36571428571428, + "grad_norm": 385.8822326660156, + "learning_rate": 4.038095238095238e-06, + "loss": 0.1599, + "step": 8114 + }, + { + "epoch": 46.371428571428574, + "grad_norm": 30.5274715423584, + "learning_rate": 4.0317460317460316e-06, + "loss": 0.1327, + "step": 8115 + }, + { + "epoch": 46.37714285714286, + "grad_norm": 40.12117385864258, + "learning_rate": 4.025396825396826e-06, + "loss": 0.2773, + "step": 8116 + }, + { + "epoch": 46.38285714285714, + "grad_norm": 33.24130630493164, + "learning_rate": 4.019047619047619e-06, + "loss": 0.1292, + "step": 8117 + }, + { + "epoch": 46.38857142857143, + "grad_norm": 26.06099510192871, + "learning_rate": 4.012698412698413e-06, + "loss": 0.0963, + "step": 8118 + }, + { + "epoch": 46.394285714285715, + "grad_norm": 25.47860336303711, + "learning_rate": 4.006349206349206e-06, + "loss": 0.1537, + "step": 8119 + }, + { + "epoch": 46.4, + "grad_norm": 14.787314414978027, + "learning_rate": 4.000000000000001e-06, + "loss": 0.1415, + "step": 8120 + }, + { + "epoch": 46.40571428571428, + "grad_norm": 71.64945220947266, + "learning_rate": 3.993650793650794e-06, + "loss": 0.1475, + "step": 8121 + }, + { + "epoch": 46.41142857142857, + "grad_norm": 45.85343933105469, + "learning_rate": 3.987301587301588e-06, + "loss": 0.1218, + "step": 8122 + }, + { + "epoch": 46.417142857142856, + "grad_norm": 22.669567108154297, + "learning_rate": 3.980952380952381e-06, + "loss": 0.1365, + "step": 8123 + }, + { + "epoch": 46.42285714285714, + "grad_norm": 61.678863525390625, + "learning_rate": 3.974603174603175e-06, + "loss": 0.2096, + "step": 8124 + }, + { + "epoch": 46.42857142857143, + "grad_norm": 40.377811431884766, + "learning_rate": 3.968253968253968e-06, + "loss": 0.1392, + "step": 8125 + }, + { + "epoch": 46.434285714285714, + "grad_norm": 76.2131576538086, + "learning_rate": 3.9619047619047616e-06, + "loss": 0.1054, + "step": 8126 + }, + { + "epoch": 46.44, + "grad_norm": 52.06477737426758, + "learning_rate": 3.955555555555555e-06, + "loss": 0.1174, + "step": 8127 + }, + { + "epoch": 46.44571428571429, + "grad_norm": 111.36573028564453, + "learning_rate": 3.949206349206349e-06, + "loss": 0.1364, + "step": 8128 + }, + { + "epoch": 46.45142857142857, + "grad_norm": 35.40325164794922, + "learning_rate": 3.942857142857143e-06, + "loss": 0.1621, + "step": 8129 + }, + { + "epoch": 46.457142857142856, + "grad_norm": 25.186216354370117, + "learning_rate": 3.936507936507937e-06, + "loss": 0.1325, + "step": 8130 + }, + { + "epoch": 46.462857142857146, + "grad_norm": 38.83059310913086, + "learning_rate": 3.930158730158731e-06, + "loss": 0.1178, + "step": 8131 + }, + { + "epoch": 46.46857142857143, + "grad_norm": 29.10702896118164, + "learning_rate": 3.923809523809524e-06, + "loss": 0.0836, + "step": 8132 + }, + { + "epoch": 46.47428571428571, + "grad_norm": 23.07155990600586, + "learning_rate": 3.917460317460318e-06, + "loss": 0.1087, + "step": 8133 + }, + { + "epoch": 46.48, + "grad_norm": 60.923797607421875, + "learning_rate": 3.911111111111111e-06, + "loss": 0.1187, + "step": 8134 + }, + { + "epoch": 46.48571428571429, + "grad_norm": 652.712158203125, + "learning_rate": 3.904761904761905e-06, + "loss": 0.1137, + "step": 8135 + }, + { + "epoch": 46.49142857142857, + "grad_norm": 55.697235107421875, + "learning_rate": 3.898412698412698e-06, + "loss": 0.1279, + "step": 8136 + }, + { + "epoch": 46.497142857142855, + "grad_norm": 26.30288314819336, + "learning_rate": 3.892063492063492e-06, + "loss": 0.1884, + "step": 8137 + }, + { + "epoch": 46.502857142857145, + "grad_norm": 70.62115478515625, + "learning_rate": 3.885714285714286e-06, + "loss": 0.1151, + "step": 8138 + }, + { + "epoch": 46.50857142857143, + "grad_norm": 30.58101463317871, + "learning_rate": 3.87936507936508e-06, + "loss": 0.1007, + "step": 8139 + }, + { + "epoch": 46.51428571428571, + "grad_norm": 24.60630989074707, + "learning_rate": 3.873015873015874e-06, + "loss": 0.1614, + "step": 8140 + }, + { + "epoch": 46.52, + "grad_norm": 1553.7259521484375, + "learning_rate": 3.866666666666667e-06, + "loss": 0.1584, + "step": 8141 + }, + { + "epoch": 46.52571428571429, + "grad_norm": 42.48031997680664, + "learning_rate": 3.860317460317461e-06, + "loss": 0.123, + "step": 8142 + }, + { + "epoch": 46.53142857142857, + "grad_norm": 49.869937896728516, + "learning_rate": 3.853968253968254e-06, + "loss": 0.112, + "step": 8143 + }, + { + "epoch": 46.537142857142854, + "grad_norm": 23.666994094848633, + "learning_rate": 3.847619047619048e-06, + "loss": 0.1292, + "step": 8144 + }, + { + "epoch": 46.542857142857144, + "grad_norm": 27.957948684692383, + "learning_rate": 3.841269841269841e-06, + "loss": 0.1092, + "step": 8145 + }, + { + "epoch": 46.54857142857143, + "grad_norm": 45.88031768798828, + "learning_rate": 3.834920634920635e-06, + "loss": 0.142, + "step": 8146 + }, + { + "epoch": 46.55428571428571, + "grad_norm": 52.62200927734375, + "learning_rate": 3.828571428571429e-06, + "loss": 0.1337, + "step": 8147 + }, + { + "epoch": 46.56, + "grad_norm": 24.335599899291992, + "learning_rate": 3.8222222222222224e-06, + "loss": 0.1494, + "step": 8148 + }, + { + "epoch": 46.565714285714286, + "grad_norm": 43.13810729980469, + "learning_rate": 3.815873015873016e-06, + "loss": 0.1263, + "step": 8149 + }, + { + "epoch": 46.57142857142857, + "grad_norm": 24.405460357666016, + "learning_rate": 3.8095238095238102e-06, + "loss": 0.15, + "step": 8150 + }, + { + "epoch": 46.57714285714286, + "grad_norm": 54.83547592163086, + "learning_rate": 3.8031746031746037e-06, + "loss": 0.1236, + "step": 8151 + }, + { + "epoch": 46.582857142857144, + "grad_norm": 52.128448486328125, + "learning_rate": 3.796825396825397e-06, + "loss": 0.1105, + "step": 8152 + }, + { + "epoch": 46.58857142857143, + "grad_norm": 14.59903621673584, + "learning_rate": 3.7904761904761907e-06, + "loss": 0.1386, + "step": 8153 + }, + { + "epoch": 46.59428571428572, + "grad_norm": 39.54267883300781, + "learning_rate": 3.7841269841269846e-06, + "loss": 0.1793, + "step": 8154 + }, + { + "epoch": 46.6, + "grad_norm": 41.793216705322266, + "learning_rate": 3.777777777777778e-06, + "loss": 0.1349, + "step": 8155 + }, + { + "epoch": 46.605714285714285, + "grad_norm": 44.470699310302734, + "learning_rate": 3.7714285714285716e-06, + "loss": 0.1634, + "step": 8156 + }, + { + "epoch": 46.61142857142857, + "grad_norm": 21.93581199645996, + "learning_rate": 3.765079365079365e-06, + "loss": 0.1589, + "step": 8157 + }, + { + "epoch": 46.61714285714286, + "grad_norm": 69.88360595703125, + "learning_rate": 3.758730158730159e-06, + "loss": 0.2723, + "step": 8158 + }, + { + "epoch": 46.62285714285714, + "grad_norm": 21.550399780273438, + "learning_rate": 3.7523809523809524e-06, + "loss": 0.1487, + "step": 8159 + }, + { + "epoch": 46.628571428571426, + "grad_norm": 65.6437759399414, + "learning_rate": 3.746031746031746e-06, + "loss": 0.123, + "step": 8160 + }, + { + "epoch": 46.63428571428572, + "grad_norm": 13.96087646484375, + "learning_rate": 3.7396825396825394e-06, + "loss": 0.1618, + "step": 8161 + }, + { + "epoch": 46.64, + "grad_norm": 31.26278305053711, + "learning_rate": 3.7333333333333337e-06, + "loss": 0.1606, + "step": 8162 + }, + { + "epoch": 46.645714285714284, + "grad_norm": 44.1922721862793, + "learning_rate": 3.7269841269841272e-06, + "loss": 0.1782, + "step": 8163 + }, + { + "epoch": 46.651428571428575, + "grad_norm": 55.542381286621094, + "learning_rate": 3.720634920634921e-06, + "loss": 0.1617, + "step": 8164 + }, + { + "epoch": 46.65714285714286, + "grad_norm": 26.718368530273438, + "learning_rate": 3.7142857142857146e-06, + "loss": 0.118, + "step": 8165 + }, + { + "epoch": 46.66285714285714, + "grad_norm": 26.83867835998535, + "learning_rate": 3.707936507936508e-06, + "loss": 0.1379, + "step": 8166 + }, + { + "epoch": 46.668571428571425, + "grad_norm": 72.89448547363281, + "learning_rate": 3.7015873015873016e-06, + "loss": 0.1193, + "step": 8167 + }, + { + "epoch": 46.674285714285716, + "grad_norm": 30.765777587890625, + "learning_rate": 3.6952380952380955e-06, + "loss": 0.1249, + "step": 8168 + }, + { + "epoch": 46.68, + "grad_norm": 23.857946395874023, + "learning_rate": 3.688888888888889e-06, + "loss": 0.1176, + "step": 8169 + }, + { + "epoch": 46.68571428571428, + "grad_norm": 54.70825958251953, + "learning_rate": 3.6825396825396824e-06, + "loss": 0.2107, + "step": 8170 + }, + { + "epoch": 46.691428571428574, + "grad_norm": 36.463069915771484, + "learning_rate": 3.676190476190476e-06, + "loss": 0.0937, + "step": 8171 + }, + { + "epoch": 46.69714285714286, + "grad_norm": 24.24658203125, + "learning_rate": 3.66984126984127e-06, + "loss": 0.1428, + "step": 8172 + }, + { + "epoch": 46.70285714285714, + "grad_norm": 32.16752243041992, + "learning_rate": 3.663492063492064e-06, + "loss": 0.1049, + "step": 8173 + }, + { + "epoch": 46.70857142857143, + "grad_norm": 21.37236976623535, + "learning_rate": 3.6571428571428576e-06, + "loss": 0.1869, + "step": 8174 + }, + { + "epoch": 46.714285714285715, + "grad_norm": 50.4166145324707, + "learning_rate": 3.650793650793651e-06, + "loss": 0.1869, + "step": 8175 + }, + { + "epoch": 46.72, + "grad_norm": 1364.17138671875, + "learning_rate": 3.6444444444444446e-06, + "loss": 0.2007, + "step": 8176 + }, + { + "epoch": 46.72571428571428, + "grad_norm": 53.10227584838867, + "learning_rate": 3.6380952380952385e-06, + "loss": 0.136, + "step": 8177 + }, + { + "epoch": 46.73142857142857, + "grad_norm": 27.524919509887695, + "learning_rate": 3.631746031746032e-06, + "loss": 0.1336, + "step": 8178 + }, + { + "epoch": 46.73714285714286, + "grad_norm": 35.467166900634766, + "learning_rate": 3.6253968253968255e-06, + "loss": 0.133, + "step": 8179 + }, + { + "epoch": 46.74285714285714, + "grad_norm": 83.05083465576172, + "learning_rate": 3.619047619047619e-06, + "loss": 0.1201, + "step": 8180 + }, + { + "epoch": 46.74857142857143, + "grad_norm": 24.13372230529785, + "learning_rate": 3.612698412698413e-06, + "loss": 0.2466, + "step": 8181 + }, + { + "epoch": 46.754285714285714, + "grad_norm": 1623.7891845703125, + "learning_rate": 3.6063492063492064e-06, + "loss": 0.1579, + "step": 8182 + }, + { + "epoch": 46.76, + "grad_norm": 42.3116340637207, + "learning_rate": 3.6e-06, + "loss": 0.1609, + "step": 8183 + }, + { + "epoch": 46.76571428571429, + "grad_norm": 63.3863525390625, + "learning_rate": 3.5936507936507933e-06, + "loss": 0.178, + "step": 8184 + }, + { + "epoch": 46.77142857142857, + "grad_norm": 21.229982376098633, + "learning_rate": 3.5873015873015877e-06, + "loss": 0.1605, + "step": 8185 + }, + { + "epoch": 46.777142857142856, + "grad_norm": 23.970304489135742, + "learning_rate": 3.5809523809523816e-06, + "loss": 0.1469, + "step": 8186 + }, + { + "epoch": 46.78285714285714, + "grad_norm": 44.26129913330078, + "learning_rate": 3.574603174603175e-06, + "loss": 0.226, + "step": 8187 + }, + { + "epoch": 46.78857142857143, + "grad_norm": 97.34606170654297, + "learning_rate": 3.5682539682539685e-06, + "loss": 0.1419, + "step": 8188 + }, + { + "epoch": 46.794285714285714, + "grad_norm": 59.434635162353516, + "learning_rate": 3.561904761904762e-06, + "loss": 0.2077, + "step": 8189 + }, + { + "epoch": 46.8, + "grad_norm": 51.30827331542969, + "learning_rate": 3.555555555555556e-06, + "loss": 0.1433, + "step": 8190 + }, + { + "epoch": 46.80571428571429, + "grad_norm": 1357.738525390625, + "learning_rate": 3.5492063492063494e-06, + "loss": 0.1907, + "step": 8191 + }, + { + "epoch": 46.81142857142857, + "grad_norm": 59.56105041503906, + "learning_rate": 3.542857142857143e-06, + "loss": 0.1449, + "step": 8192 + }, + { + "epoch": 46.817142857142855, + "grad_norm": 45.39702606201172, + "learning_rate": 3.5365079365079364e-06, + "loss": 0.1678, + "step": 8193 + }, + { + "epoch": 46.822857142857146, + "grad_norm": 34.24027633666992, + "learning_rate": 3.5301587301587303e-06, + "loss": 0.1231, + "step": 8194 + }, + { + "epoch": 46.82857142857143, + "grad_norm": 1358.833251953125, + "learning_rate": 3.5238095238095238e-06, + "loss": 0.1588, + "step": 8195 + }, + { + "epoch": 46.83428571428571, + "grad_norm": 51.168968200683594, + "learning_rate": 3.517460317460318e-06, + "loss": 0.1441, + "step": 8196 + }, + { + "epoch": 46.84, + "grad_norm": 29.741315841674805, + "learning_rate": 3.5111111111111116e-06, + "loss": 0.1355, + "step": 8197 + }, + { + "epoch": 46.84571428571429, + "grad_norm": 30.0971736907959, + "learning_rate": 3.504761904761905e-06, + "loss": 0.1656, + "step": 8198 + }, + { + "epoch": 46.85142857142857, + "grad_norm": 15.887604713439941, + "learning_rate": 3.4984126984126985e-06, + "loss": 0.0932, + "step": 8199 + }, + { + "epoch": 46.857142857142854, + "grad_norm": 88.35197448730469, + "learning_rate": 3.4920634920634924e-06, + "loss": 0.1495, + "step": 8200 + }, + { + "epoch": 46.862857142857145, + "grad_norm": 553.6282348632812, + "learning_rate": 3.485714285714286e-06, + "loss": 0.1997, + "step": 8201 + }, + { + "epoch": 46.86857142857143, + "grad_norm": 39.761356353759766, + "learning_rate": 3.4793650793650794e-06, + "loss": 0.1478, + "step": 8202 + }, + { + "epoch": 46.87428571428571, + "grad_norm": 62.88118362426758, + "learning_rate": 3.473015873015873e-06, + "loss": 0.1478, + "step": 8203 + }, + { + "epoch": 46.88, + "grad_norm": 41.82820510864258, + "learning_rate": 3.466666666666667e-06, + "loss": 0.1475, + "step": 8204 + }, + { + "epoch": 46.885714285714286, + "grad_norm": 24.58503532409668, + "learning_rate": 3.4603174603174603e-06, + "loss": 0.1154, + "step": 8205 + }, + { + "epoch": 46.89142857142857, + "grad_norm": 750.5741577148438, + "learning_rate": 3.4539682539682538e-06, + "loss": 0.1468, + "step": 8206 + }, + { + "epoch": 46.89714285714286, + "grad_norm": 38.60437774658203, + "learning_rate": 3.4476190476190472e-06, + "loss": 0.1349, + "step": 8207 + }, + { + "epoch": 46.902857142857144, + "grad_norm": 55.603458404541016, + "learning_rate": 3.4412698412698416e-06, + "loss": 0.1764, + "step": 8208 + }, + { + "epoch": 46.90857142857143, + "grad_norm": 22.69065284729004, + "learning_rate": 3.4349206349206355e-06, + "loss": 0.1271, + "step": 8209 + }, + { + "epoch": 46.91428571428571, + "grad_norm": 75.1495361328125, + "learning_rate": 3.428571428571429e-06, + "loss": 0.1061, + "step": 8210 + }, + { + "epoch": 46.92, + "grad_norm": 462.46527099609375, + "learning_rate": 3.4222222222222224e-06, + "loss": 0.1508, + "step": 8211 + }, + { + "epoch": 46.925714285714285, + "grad_norm": 45.36282730102539, + "learning_rate": 3.415873015873016e-06, + "loss": 0.168, + "step": 8212 + }, + { + "epoch": 46.93142857142857, + "grad_norm": 45.03870391845703, + "learning_rate": 3.40952380952381e-06, + "loss": 0.1222, + "step": 8213 + }, + { + "epoch": 46.93714285714286, + "grad_norm": 25.038911819458008, + "learning_rate": 3.4031746031746033e-06, + "loss": 0.2271, + "step": 8214 + }, + { + "epoch": 46.94285714285714, + "grad_norm": 31.28525733947754, + "learning_rate": 3.396825396825397e-06, + "loss": 0.1915, + "step": 8215 + }, + { + "epoch": 46.94857142857143, + "grad_norm": 63.485836029052734, + "learning_rate": 3.3904761904761903e-06, + "loss": 0.1574, + "step": 8216 + }, + { + "epoch": 46.95428571428572, + "grad_norm": 22.424684524536133, + "learning_rate": 3.384126984126984e-06, + "loss": 0.1187, + "step": 8217 + }, + { + "epoch": 46.96, + "grad_norm": 32.10712814331055, + "learning_rate": 3.3777777777777777e-06, + "loss": 0.1317, + "step": 8218 + }, + { + "epoch": 46.965714285714284, + "grad_norm": 30.546588897705078, + "learning_rate": 3.371428571428572e-06, + "loss": 0.2055, + "step": 8219 + }, + { + "epoch": 46.97142857142857, + "grad_norm": 34.96418380737305, + "learning_rate": 3.3650793650793655e-06, + "loss": 0.136, + "step": 8220 + }, + { + "epoch": 46.97714285714286, + "grad_norm": 65.4540023803711, + "learning_rate": 3.358730158730159e-06, + "loss": 0.1742, + "step": 8221 + }, + { + "epoch": 46.98285714285714, + "grad_norm": 45.166603088378906, + "learning_rate": 3.352380952380953e-06, + "loss": 0.1549, + "step": 8222 + }, + { + "epoch": 46.988571428571426, + "grad_norm": 37.14235305786133, + "learning_rate": 3.3460317460317464e-06, + "loss": 0.0934, + "step": 8223 + }, + { + "epoch": 46.994285714285716, + "grad_norm": 50.102142333984375, + "learning_rate": 3.33968253968254e-06, + "loss": 0.171, + "step": 8224 + }, + { + "epoch": 47.0, + "grad_norm": 46.10976791381836, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.1926, + "step": 8225 + }, + { + "epoch": 47.0, + "eval_classes": 0, + "eval_loss": 0.5867149829864502, + "eval_map": 0.9388, + "eval_map_50": 0.97, + "eval_map_75": 0.9642, + "eval_map_large": 0.9388, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9388, + "eval_map_small": -1.0, + "eval_mar_1": 0.7911, + "eval_mar_10": 0.9752, + "eval_mar_100": 0.9765, + "eval_mar_100_per_class": 0.9765, + "eval_mar_large": 0.9765, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 12.8376, + "eval_samples_per_second": 22.901, + "eval_steps_per_second": 2.882, + "step": 8225 + }, + { + "epoch": 47.005714285714284, + "grad_norm": 831.6156616210938, + "learning_rate": 3.3269841269841272e-06, + "loss": 0.1533, + "step": 8226 + }, + { + "epoch": 47.011428571428574, + "grad_norm": 48.87065887451172, + "learning_rate": 3.3206349206349207e-06, + "loss": 0.1053, + "step": 8227 + }, + { + "epoch": 47.01714285714286, + "grad_norm": 63.857749938964844, + "learning_rate": 3.314285714285714e-06, + "loss": 0.2056, + "step": 8228 + }, + { + "epoch": 47.02285714285714, + "grad_norm": 35.554115295410156, + "learning_rate": 3.3079365079365077e-06, + "loss": 0.1378, + "step": 8229 + }, + { + "epoch": 47.02857142857143, + "grad_norm": 45.24153137207031, + "learning_rate": 3.301587301587302e-06, + "loss": 0.1045, + "step": 8230 + }, + { + "epoch": 47.034285714285716, + "grad_norm": 20.985979080200195, + "learning_rate": 3.2952380952380955e-06, + "loss": 0.1071, + "step": 8231 + }, + { + "epoch": 47.04, + "grad_norm": 36.83102798461914, + "learning_rate": 3.2888888888888894e-06, + "loss": 0.1437, + "step": 8232 + }, + { + "epoch": 47.04571428571428, + "grad_norm": 94.6207504272461, + "learning_rate": 3.282539682539683e-06, + "loss": 0.173, + "step": 8233 + }, + { + "epoch": 47.05142857142857, + "grad_norm": 25.87105369567871, + "learning_rate": 3.2761904761904764e-06, + "loss": 0.13, + "step": 8234 + }, + { + "epoch": 47.05714285714286, + "grad_norm": 27.784732818603516, + "learning_rate": 3.26984126984127e-06, + "loss": 0.1602, + "step": 8235 + }, + { + "epoch": 47.06285714285714, + "grad_norm": 27.864486694335938, + "learning_rate": 3.2634920634920638e-06, + "loss": 0.1795, + "step": 8236 + }, + { + "epoch": 47.06857142857143, + "grad_norm": 36.08664321899414, + "learning_rate": 3.2571428571428572e-06, + "loss": 0.1531, + "step": 8237 + }, + { + "epoch": 47.074285714285715, + "grad_norm": 79.20808410644531, + "learning_rate": 3.2507936507936507e-06, + "loss": 0.1488, + "step": 8238 + }, + { + "epoch": 47.08, + "grad_norm": 60.46731185913086, + "learning_rate": 3.244444444444444e-06, + "loss": 0.1718, + "step": 8239 + }, + { + "epoch": 47.08571428571429, + "grad_norm": 27.0144100189209, + "learning_rate": 3.238095238095238e-06, + "loss": 0.145, + "step": 8240 + }, + { + "epoch": 47.09142857142857, + "grad_norm": 49.7216682434082, + "learning_rate": 3.2317460317460316e-06, + "loss": 0.1116, + "step": 8241 + }, + { + "epoch": 47.097142857142856, + "grad_norm": 38.72159957885742, + "learning_rate": 3.225396825396826e-06, + "loss": 0.1109, + "step": 8242 + }, + { + "epoch": 47.10285714285714, + "grad_norm": 47.735015869140625, + "learning_rate": 3.2190476190476194e-06, + "loss": 0.1321, + "step": 8243 + }, + { + "epoch": 47.10857142857143, + "grad_norm": 1039.72509765625, + "learning_rate": 3.212698412698413e-06, + "loss": 0.1194, + "step": 8244 + }, + { + "epoch": 47.114285714285714, + "grad_norm": 51.46315383911133, + "learning_rate": 3.206349206349207e-06, + "loss": 0.1445, + "step": 8245 + }, + { + "epoch": 47.12, + "grad_norm": 20.40951919555664, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.1499, + "step": 8246 + }, + { + "epoch": 47.12571428571429, + "grad_norm": 24.824369430541992, + "learning_rate": 3.1936507936507938e-06, + "loss": 0.1675, + "step": 8247 + }, + { + "epoch": 47.13142857142857, + "grad_norm": 37.45597839355469, + "learning_rate": 3.1873015873015872e-06, + "loss": 0.1664, + "step": 8248 + }, + { + "epoch": 47.137142857142855, + "grad_norm": 40.37081527709961, + "learning_rate": 3.180952380952381e-06, + "loss": 0.1506, + "step": 8249 + }, + { + "epoch": 47.142857142857146, + "grad_norm": 27.73122215270996, + "learning_rate": 3.1746031746031746e-06, + "loss": 0.1141, + "step": 8250 + }, + { + "epoch": 47.14857142857143, + "grad_norm": 48.40898132324219, + "learning_rate": 3.168253968253968e-06, + "loss": 0.0857, + "step": 8251 + }, + { + "epoch": 47.15428571428571, + "grad_norm": 77.0861587524414, + "learning_rate": 3.1619047619047616e-06, + "loss": 0.1449, + "step": 8252 + }, + { + "epoch": 47.16, + "grad_norm": 68.09453582763672, + "learning_rate": 3.155555555555556e-06, + "loss": 0.1245, + "step": 8253 + }, + { + "epoch": 47.16571428571429, + "grad_norm": 51.11342239379883, + "learning_rate": 3.14920634920635e-06, + "loss": 0.0918, + "step": 8254 + }, + { + "epoch": 47.17142857142857, + "grad_norm": 52.35264205932617, + "learning_rate": 3.1428571428571433e-06, + "loss": 0.1848, + "step": 8255 + }, + { + "epoch": 47.177142857142854, + "grad_norm": 25.312780380249023, + "learning_rate": 3.136507936507937e-06, + "loss": 0.0969, + "step": 8256 + }, + { + "epoch": 47.182857142857145, + "grad_norm": 55.124568939208984, + "learning_rate": 3.1301587301587303e-06, + "loss": 0.118, + "step": 8257 + }, + { + "epoch": 47.18857142857143, + "grad_norm": 237.57467651367188, + "learning_rate": 3.123809523809524e-06, + "loss": 0.1089, + "step": 8258 + }, + { + "epoch": 47.19428571428571, + "grad_norm": 38.97993087768555, + "learning_rate": 3.1174603174603177e-06, + "loss": 0.1345, + "step": 8259 + }, + { + "epoch": 47.2, + "grad_norm": 50.58306884765625, + "learning_rate": 3.111111111111111e-06, + "loss": 0.1569, + "step": 8260 + }, + { + "epoch": 47.205714285714286, + "grad_norm": 378.4385070800781, + "learning_rate": 3.1047619047619046e-06, + "loss": 0.1853, + "step": 8261 + }, + { + "epoch": 47.21142857142857, + "grad_norm": 55.626102447509766, + "learning_rate": 3.0984126984126985e-06, + "loss": 0.1259, + "step": 8262 + }, + { + "epoch": 47.21714285714286, + "grad_norm": 105.51905059814453, + "learning_rate": 3.0920634920634925e-06, + "loss": 0.1035, + "step": 8263 + }, + { + "epoch": 47.222857142857144, + "grad_norm": 44.22740936279297, + "learning_rate": 3.085714285714286e-06, + "loss": 0.119, + "step": 8264 + }, + { + "epoch": 47.22857142857143, + "grad_norm": 58.547607421875, + "learning_rate": 3.0793650793650794e-06, + "loss": 0.1681, + "step": 8265 + }, + { + "epoch": 47.23428571428571, + "grad_norm": 30.744277954101562, + "learning_rate": 3.073015873015873e-06, + "loss": 0.2352, + "step": 8266 + }, + { + "epoch": 47.24, + "grad_norm": 81.6842269897461, + "learning_rate": 3.066666666666667e-06, + "loss": 0.1274, + "step": 8267 + }, + { + "epoch": 47.245714285714286, + "grad_norm": 251.95037841796875, + "learning_rate": 3.0603174603174607e-06, + "loss": 0.1263, + "step": 8268 + }, + { + "epoch": 47.25142857142857, + "grad_norm": 24.623939514160156, + "learning_rate": 3.053968253968254e-06, + "loss": 0.1671, + "step": 8269 + }, + { + "epoch": 47.25714285714286, + "grad_norm": 36.25025177001953, + "learning_rate": 3.0476190476190477e-06, + "loss": 0.1635, + "step": 8270 + }, + { + "epoch": 47.26285714285714, + "grad_norm": 85.24287414550781, + "learning_rate": 3.041269841269841e-06, + "loss": 0.133, + "step": 8271 + }, + { + "epoch": 47.26857142857143, + "grad_norm": 27.511301040649414, + "learning_rate": 3.034920634920635e-06, + "loss": 0.1113, + "step": 8272 + }, + { + "epoch": 47.27428571428572, + "grad_norm": 19.489055633544922, + "learning_rate": 3.028571428571429e-06, + "loss": 0.0927, + "step": 8273 + }, + { + "epoch": 47.28, + "grad_norm": 37.98540115356445, + "learning_rate": 3.0222222222222225e-06, + "loss": 0.0935, + "step": 8274 + }, + { + "epoch": 47.285714285714285, + "grad_norm": 70.36798858642578, + "learning_rate": 3.015873015873016e-06, + "loss": 0.1276, + "step": 8275 + }, + { + "epoch": 47.29142857142857, + "grad_norm": 25.47026252746582, + "learning_rate": 3.00952380952381e-06, + "loss": 0.2125, + "step": 8276 + }, + { + "epoch": 47.29714285714286, + "grad_norm": 20.216646194458008, + "learning_rate": 3.0031746031746033e-06, + "loss": 0.1335, + "step": 8277 + }, + { + "epoch": 47.30285714285714, + "grad_norm": 46.408729553222656, + "learning_rate": 2.996825396825397e-06, + "loss": 0.1901, + "step": 8278 + }, + { + "epoch": 47.308571428571426, + "grad_norm": 33.401920318603516, + "learning_rate": 2.9904761904761907e-06, + "loss": 0.1619, + "step": 8279 + }, + { + "epoch": 47.31428571428572, + "grad_norm": 1567.3370361328125, + "learning_rate": 2.984126984126984e-06, + "loss": 0.3557, + "step": 8280 + }, + { + "epoch": 47.32, + "grad_norm": 52.10667037963867, + "learning_rate": 2.977777777777778e-06, + "loss": 0.1347, + "step": 8281 + }, + { + "epoch": 47.325714285714284, + "grad_norm": 37.514949798583984, + "learning_rate": 2.9714285714285716e-06, + "loss": 0.1396, + "step": 8282 + }, + { + "epoch": 47.331428571428575, + "grad_norm": 21.12006950378418, + "learning_rate": 2.965079365079365e-06, + "loss": 0.1, + "step": 8283 + }, + { + "epoch": 47.33714285714286, + "grad_norm": 70.54804992675781, + "learning_rate": 2.9587301587301586e-06, + "loss": 0.1464, + "step": 8284 + }, + { + "epoch": 47.34285714285714, + "grad_norm": 55.37355422973633, + "learning_rate": 2.9523809523809525e-06, + "loss": 0.0889, + "step": 8285 + }, + { + "epoch": 47.348571428571425, + "grad_norm": 41.585208892822266, + "learning_rate": 2.9460317460317464e-06, + "loss": 0.1637, + "step": 8286 + }, + { + "epoch": 47.354285714285716, + "grad_norm": 34.12653350830078, + "learning_rate": 2.93968253968254e-06, + "loss": 0.1423, + "step": 8287 + }, + { + "epoch": 47.36, + "grad_norm": 36.18317413330078, + "learning_rate": 2.9333333333333333e-06, + "loss": 0.1318, + "step": 8288 + }, + { + "epoch": 47.36571428571428, + "grad_norm": 43.84857940673828, + "learning_rate": 2.926984126984127e-06, + "loss": 0.1479, + "step": 8289 + }, + { + "epoch": 47.371428571428574, + "grad_norm": 67.99090576171875, + "learning_rate": 2.920634920634921e-06, + "loss": 0.1318, + "step": 8290 + }, + { + "epoch": 47.37714285714286, + "grad_norm": 13.11436939239502, + "learning_rate": 2.9142857142857146e-06, + "loss": 0.1368, + "step": 8291 + }, + { + "epoch": 47.38285714285714, + "grad_norm": 30.894506454467773, + "learning_rate": 2.907936507936508e-06, + "loss": 0.1138, + "step": 8292 + }, + { + "epoch": 47.38857142857143, + "grad_norm": 38.45000457763672, + "learning_rate": 2.9015873015873016e-06, + "loss": 0.1314, + "step": 8293 + }, + { + "epoch": 47.394285714285715, + "grad_norm": 17.25142478942871, + "learning_rate": 2.8952380952380955e-06, + "loss": 0.151, + "step": 8294 + }, + { + "epoch": 47.4, + "grad_norm": 40.75666427612305, + "learning_rate": 2.888888888888889e-06, + "loss": 0.1359, + "step": 8295 + }, + { + "epoch": 47.40571428571428, + "grad_norm": 49.49689865112305, + "learning_rate": 2.882539682539683e-06, + "loss": 0.1432, + "step": 8296 + }, + { + "epoch": 47.41142857142857, + "grad_norm": 39.747493743896484, + "learning_rate": 2.8761904761904764e-06, + "loss": 0.131, + "step": 8297 + }, + { + "epoch": 47.417142857142856, + "grad_norm": 11.636551856994629, + "learning_rate": 2.86984126984127e-06, + "loss": 0.1374, + "step": 8298 + }, + { + "epoch": 47.42285714285714, + "grad_norm": 46.46279525756836, + "learning_rate": 2.8634920634920638e-06, + "loss": 0.0989, + "step": 8299 + }, + { + "epoch": 47.42857142857143, + "grad_norm": 62.19584274291992, + "learning_rate": 2.8571428571428573e-06, + "loss": 0.1192, + "step": 8300 + }, + { + "epoch": 47.434285714285714, + "grad_norm": 37.39603805541992, + "learning_rate": 2.8507936507936507e-06, + "loss": 0.1281, + "step": 8301 + }, + { + "epoch": 47.44, + "grad_norm": 70.29913330078125, + "learning_rate": 2.8444444444444446e-06, + "loss": 0.1249, + "step": 8302 + }, + { + "epoch": 47.44571428571429, + "grad_norm": 39.924232482910156, + "learning_rate": 2.838095238095238e-06, + "loss": 0.1295, + "step": 8303 + }, + { + "epoch": 47.45142857142857, + "grad_norm": 22.518259048461914, + "learning_rate": 2.831746031746032e-06, + "loss": 0.1061, + "step": 8304 + }, + { + "epoch": 47.457142857142856, + "grad_norm": 49.80412673950195, + "learning_rate": 2.8253968253968255e-06, + "loss": 0.1511, + "step": 8305 + }, + { + "epoch": 47.462857142857146, + "grad_norm": 41.26884841918945, + "learning_rate": 2.819047619047619e-06, + "loss": 0.1306, + "step": 8306 + }, + { + "epoch": 47.46857142857143, + "grad_norm": 45.56854248046875, + "learning_rate": 2.8126984126984125e-06, + "loss": 0.1149, + "step": 8307 + }, + { + "epoch": 47.47428571428571, + "grad_norm": 55.0831298828125, + "learning_rate": 2.806349206349207e-06, + "loss": 0.0992, + "step": 8308 + }, + { + "epoch": 47.48, + "grad_norm": 23.75543975830078, + "learning_rate": 2.8000000000000003e-06, + "loss": 0.1156, + "step": 8309 + }, + { + "epoch": 47.48571428571429, + "grad_norm": 22.74629783630371, + "learning_rate": 2.7936507936507938e-06, + "loss": 0.1275, + "step": 8310 + }, + { + "epoch": 47.49142857142857, + "grad_norm": 56.682891845703125, + "learning_rate": 2.7873015873015873e-06, + "loss": 0.1803, + "step": 8311 + }, + { + "epoch": 47.497142857142855, + "grad_norm": 25.42420196533203, + "learning_rate": 2.780952380952381e-06, + "loss": 0.2765, + "step": 8312 + }, + { + "epoch": 47.502857142857145, + "grad_norm": 48.49197769165039, + "learning_rate": 2.774603174603175e-06, + "loss": 0.1611, + "step": 8313 + }, + { + "epoch": 47.50857142857143, + "grad_norm": 31.067285537719727, + "learning_rate": 2.7682539682539686e-06, + "loss": 0.1706, + "step": 8314 + }, + { + "epoch": 47.51428571428571, + "grad_norm": 47.51041030883789, + "learning_rate": 2.761904761904762e-06, + "loss": 0.1307, + "step": 8315 + }, + { + "epoch": 47.52, + "grad_norm": 40.67095184326172, + "learning_rate": 2.7555555555555555e-06, + "loss": 0.0961, + "step": 8316 + }, + { + "epoch": 47.52571428571429, + "grad_norm": 146.58514404296875, + "learning_rate": 2.7492063492063494e-06, + "loss": 0.1653, + "step": 8317 + }, + { + "epoch": 47.53142857142857, + "grad_norm": 80.32205200195312, + "learning_rate": 2.742857142857143e-06, + "loss": 0.1155, + "step": 8318 + }, + { + "epoch": 47.537142857142854, + "grad_norm": 47.289634704589844, + "learning_rate": 2.736507936507937e-06, + "loss": 0.1319, + "step": 8319 + }, + { + "epoch": 47.542857142857144, + "grad_norm": 23.88016700744629, + "learning_rate": 2.7301587301587303e-06, + "loss": 0.1174, + "step": 8320 + }, + { + "epoch": 47.54857142857143, + "grad_norm": 50.47907638549805, + "learning_rate": 2.7238095238095238e-06, + "loss": 0.1272, + "step": 8321 + }, + { + "epoch": 47.55428571428571, + "grad_norm": 45.27683639526367, + "learning_rate": 2.7174603174603177e-06, + "loss": 0.1088, + "step": 8322 + }, + { + "epoch": 47.56, + "grad_norm": 23.826622009277344, + "learning_rate": 2.711111111111111e-06, + "loss": 0.1979, + "step": 8323 + }, + { + "epoch": 47.565714285714286, + "grad_norm": 30.396648406982422, + "learning_rate": 2.7047619047619047e-06, + "loss": 0.1101, + "step": 8324 + }, + { + "epoch": 47.57142857142857, + "grad_norm": 35.595489501953125, + "learning_rate": 2.6984126984126986e-06, + "loss": 0.1466, + "step": 8325 + }, + { + "epoch": 47.57714285714286, + "grad_norm": 40.22230911254883, + "learning_rate": 2.692063492063492e-06, + "loss": 0.1931, + "step": 8326 + }, + { + "epoch": 47.582857142857144, + "grad_norm": 161.3650360107422, + "learning_rate": 2.685714285714286e-06, + "loss": 0.133, + "step": 8327 + }, + { + "epoch": 47.58857142857143, + "grad_norm": 35.126338958740234, + "learning_rate": 2.6793650793650794e-06, + "loss": 0.127, + "step": 8328 + }, + { + "epoch": 47.59428571428572, + "grad_norm": 34.326942443847656, + "learning_rate": 2.673015873015873e-06, + "loss": 0.1369, + "step": 8329 + }, + { + "epoch": 47.6, + "grad_norm": 59.76482009887695, + "learning_rate": 2.666666666666667e-06, + "loss": 0.1403, + "step": 8330 + }, + { + "epoch": 47.605714285714285, + "grad_norm": 42.53165817260742, + "learning_rate": 2.6603174603174607e-06, + "loss": 0.1916, + "step": 8331 + }, + { + "epoch": 47.61142857142857, + "grad_norm": 68.71272277832031, + "learning_rate": 2.6539682539682542e-06, + "loss": 0.1097, + "step": 8332 + }, + { + "epoch": 47.61714285714286, + "grad_norm": 35.917396545410156, + "learning_rate": 2.6476190476190477e-06, + "loss": 0.0865, + "step": 8333 + }, + { + "epoch": 47.62285714285714, + "grad_norm": 25.682573318481445, + "learning_rate": 2.641269841269841e-06, + "loss": 0.1399, + "step": 8334 + }, + { + "epoch": 47.628571428571426, + "grad_norm": 43.88801574707031, + "learning_rate": 2.634920634920635e-06, + "loss": 0.1751, + "step": 8335 + }, + { + "epoch": 47.63428571428572, + "grad_norm": 27.394786834716797, + "learning_rate": 2.628571428571429e-06, + "loss": 0.1477, + "step": 8336 + }, + { + "epoch": 47.64, + "grad_norm": 95.20401763916016, + "learning_rate": 2.6222222222222225e-06, + "loss": 0.1482, + "step": 8337 + }, + { + "epoch": 47.645714285714284, + "grad_norm": 29.056629180908203, + "learning_rate": 2.615873015873016e-06, + "loss": 0.1455, + "step": 8338 + }, + { + "epoch": 47.651428571428575, + "grad_norm": 42.398990631103516, + "learning_rate": 2.6095238095238094e-06, + "loss": 0.1229, + "step": 8339 + }, + { + "epoch": 47.65714285714286, + "grad_norm": 44.73271179199219, + "learning_rate": 2.6031746031746034e-06, + "loss": 0.1161, + "step": 8340 + }, + { + "epoch": 47.66285714285714, + "grad_norm": 46.483375549316406, + "learning_rate": 2.596825396825397e-06, + "loss": 0.1542, + "step": 8341 + }, + { + "epoch": 47.668571428571425, + "grad_norm": 54.983882904052734, + "learning_rate": 2.5904761904761907e-06, + "loss": 0.1515, + "step": 8342 + }, + { + "epoch": 47.674285714285716, + "grad_norm": 111.94491577148438, + "learning_rate": 2.5841269841269842e-06, + "loss": 0.1714, + "step": 8343 + }, + { + "epoch": 47.68, + "grad_norm": 20.73655128479004, + "learning_rate": 2.5777777777777777e-06, + "loss": 0.1511, + "step": 8344 + }, + { + "epoch": 47.68571428571428, + "grad_norm": 78.76811218261719, + "learning_rate": 2.5714285714285716e-06, + "loss": 0.126, + "step": 8345 + }, + { + "epoch": 47.691428571428574, + "grad_norm": 59.38375473022461, + "learning_rate": 2.565079365079365e-06, + "loss": 0.1051, + "step": 8346 + }, + { + "epoch": 47.69714285714286, + "grad_norm": 39.290348052978516, + "learning_rate": 2.5587301587301586e-06, + "loss": 0.117, + "step": 8347 + }, + { + "epoch": 47.70285714285714, + "grad_norm": 22.81290054321289, + "learning_rate": 2.5523809523809525e-06, + "loss": 0.1429, + "step": 8348 + }, + { + "epoch": 47.70857142857143, + "grad_norm": 88.15597534179688, + "learning_rate": 2.5460317460317464e-06, + "loss": 0.0944, + "step": 8349 + }, + { + "epoch": 47.714285714285715, + "grad_norm": 369.8224792480469, + "learning_rate": 2.53968253968254e-06, + "loss": 0.1044, + "step": 8350 + }, + { + "epoch": 47.72, + "grad_norm": 48.856605529785156, + "learning_rate": 2.5333333333333334e-06, + "loss": 0.1982, + "step": 8351 + }, + { + "epoch": 47.72571428571428, + "grad_norm": 25.321481704711914, + "learning_rate": 2.526984126984127e-06, + "loss": 0.1451, + "step": 8352 + }, + { + "epoch": 47.73142857142857, + "grad_norm": 77.55184173583984, + "learning_rate": 2.5206349206349207e-06, + "loss": 0.1326, + "step": 8353 + }, + { + "epoch": 47.73714285714286, + "grad_norm": 27.76304817199707, + "learning_rate": 2.5142857142857147e-06, + "loss": 0.1235, + "step": 8354 + }, + { + "epoch": 47.74285714285714, + "grad_norm": 391.8480529785156, + "learning_rate": 2.507936507936508e-06, + "loss": 0.1613, + "step": 8355 + }, + { + "epoch": 47.74857142857143, + "grad_norm": 26.454538345336914, + "learning_rate": 2.5015873015873016e-06, + "loss": 0.1165, + "step": 8356 + }, + { + "epoch": 47.754285714285714, + "grad_norm": 26.777204513549805, + "learning_rate": 2.495238095238095e-06, + "loss": 0.1185, + "step": 8357 + }, + { + "epoch": 47.76, + "grad_norm": 599.7885131835938, + "learning_rate": 2.488888888888889e-06, + "loss": 0.1889, + "step": 8358 + }, + { + "epoch": 47.76571428571429, + "grad_norm": 382.1898193359375, + "learning_rate": 2.482539682539683e-06, + "loss": 0.1575, + "step": 8359 + }, + { + "epoch": 47.77142857142857, + "grad_norm": 57.55063247680664, + "learning_rate": 2.4761904761904764e-06, + "loss": 0.1624, + "step": 8360 + }, + { + "epoch": 47.777142857142856, + "grad_norm": 24.63785743713379, + "learning_rate": 2.46984126984127e-06, + "loss": 0.1153, + "step": 8361 + }, + { + "epoch": 47.78285714285714, + "grad_norm": 36.32056427001953, + "learning_rate": 2.4634920634920634e-06, + "loss": 0.1473, + "step": 8362 + }, + { + "epoch": 47.78857142857143, + "grad_norm": 65.92711639404297, + "learning_rate": 2.4571428571428573e-06, + "loss": 0.1222, + "step": 8363 + }, + { + "epoch": 47.794285714285714, + "grad_norm": 34.27105712890625, + "learning_rate": 2.4507936507936508e-06, + "loss": 0.1124, + "step": 8364 + }, + { + "epoch": 47.8, + "grad_norm": 27.015478134155273, + "learning_rate": 2.4444444444444447e-06, + "loss": 0.1552, + "step": 8365 + }, + { + "epoch": 47.80571428571429, + "grad_norm": 33.48320770263672, + "learning_rate": 2.438095238095238e-06, + "loss": 0.1325, + "step": 8366 + }, + { + "epoch": 47.81142857142857, + "grad_norm": 29.862972259521484, + "learning_rate": 2.431746031746032e-06, + "loss": 0.1393, + "step": 8367 + }, + { + "epoch": 47.817142857142855, + "grad_norm": 63.37629699707031, + "learning_rate": 2.4253968253968255e-06, + "loss": 0.1837, + "step": 8368 + }, + { + "epoch": 47.822857142857146, + "grad_norm": 42.46182632446289, + "learning_rate": 2.419047619047619e-06, + "loss": 0.1581, + "step": 8369 + }, + { + "epoch": 47.82857142857143, + "grad_norm": 68.42111206054688, + "learning_rate": 2.412698412698413e-06, + "loss": 0.1119, + "step": 8370 + }, + { + "epoch": 47.83428571428571, + "grad_norm": 38.69887924194336, + "learning_rate": 2.4063492063492064e-06, + "loss": 0.1239, + "step": 8371 + }, + { + "epoch": 47.84, + "grad_norm": 19.0052490234375, + "learning_rate": 2.4000000000000003e-06, + "loss": 0.1433, + "step": 8372 + }, + { + "epoch": 47.84571428571429, + "grad_norm": 56.69364929199219, + "learning_rate": 2.393650793650794e-06, + "loss": 0.5268, + "step": 8373 + }, + { + "epoch": 47.85142857142857, + "grad_norm": 104.00349426269531, + "learning_rate": 2.3873015873015873e-06, + "loss": 0.0927, + "step": 8374 + }, + { + "epoch": 47.857142857142854, + "grad_norm": 27.823810577392578, + "learning_rate": 2.3809523809523808e-06, + "loss": 0.1616, + "step": 8375 + }, + { + "epoch": 47.862857142857145, + "grad_norm": 34.778770446777344, + "learning_rate": 2.3746031746031747e-06, + "loss": 0.1227, + "step": 8376 + }, + { + "epoch": 47.86857142857143, + "grad_norm": 278.9198913574219, + "learning_rate": 2.3682539682539686e-06, + "loss": 0.1321, + "step": 8377 + }, + { + "epoch": 47.87428571428571, + "grad_norm": 61.145484924316406, + "learning_rate": 2.361904761904762e-06, + "loss": 0.1147, + "step": 8378 + }, + { + "epoch": 47.88, + "grad_norm": 30.837554931640625, + "learning_rate": 2.3555555555555555e-06, + "loss": 0.1414, + "step": 8379 + }, + { + "epoch": 47.885714285714286, + "grad_norm": 78.45512390136719, + "learning_rate": 2.349206349206349e-06, + "loss": 0.1562, + "step": 8380 + }, + { + "epoch": 47.89142857142857, + "grad_norm": 56.4572639465332, + "learning_rate": 2.342857142857143e-06, + "loss": 0.1366, + "step": 8381 + }, + { + "epoch": 47.89714285714286, + "grad_norm": 30.032258987426758, + "learning_rate": 2.336507936507937e-06, + "loss": 0.1084, + "step": 8382 + }, + { + "epoch": 47.902857142857144, + "grad_norm": 70.42892456054688, + "learning_rate": 2.3301587301587303e-06, + "loss": 0.1693, + "step": 8383 + }, + { + "epoch": 47.90857142857143, + "grad_norm": 37.96274185180664, + "learning_rate": 2.323809523809524e-06, + "loss": 0.1191, + "step": 8384 + }, + { + "epoch": 47.91428571428571, + "grad_norm": 25.497051239013672, + "learning_rate": 2.3174603174603177e-06, + "loss": 0.1744, + "step": 8385 + }, + { + "epoch": 47.92, + "grad_norm": 42.66016387939453, + "learning_rate": 2.311111111111111e-06, + "loss": 0.1295, + "step": 8386 + }, + { + "epoch": 47.925714285714285, + "grad_norm": 25.076026916503906, + "learning_rate": 2.3047619047619047e-06, + "loss": 0.1547, + "step": 8387 + }, + { + "epoch": 47.93142857142857, + "grad_norm": 30.716888427734375, + "learning_rate": 2.2984126984126986e-06, + "loss": 0.1679, + "step": 8388 + }, + { + "epoch": 47.93714285714286, + "grad_norm": 38.1251106262207, + "learning_rate": 2.292063492063492e-06, + "loss": 0.2028, + "step": 8389 + }, + { + "epoch": 47.94285714285714, + "grad_norm": 42.802391052246094, + "learning_rate": 2.285714285714286e-06, + "loss": 0.1305, + "step": 8390 + }, + { + "epoch": 47.94857142857143, + "grad_norm": 104.67507934570312, + "learning_rate": 2.2793650793650795e-06, + "loss": 0.1812, + "step": 8391 + }, + { + "epoch": 47.95428571428572, + "grad_norm": 25.324993133544922, + "learning_rate": 2.273015873015873e-06, + "loss": 0.1411, + "step": 8392 + }, + { + "epoch": 47.96, + "grad_norm": 37.27313995361328, + "learning_rate": 2.266666666666667e-06, + "loss": 0.0953, + "step": 8393 + }, + { + "epoch": 47.965714285714284, + "grad_norm": 36.83076477050781, + "learning_rate": 2.2603174603174603e-06, + "loss": 0.1841, + "step": 8394 + }, + { + "epoch": 47.97142857142857, + "grad_norm": 67.00370025634766, + "learning_rate": 2.2539682539682542e-06, + "loss": 0.2445, + "step": 8395 + }, + { + "epoch": 47.97714285714286, + "grad_norm": 32.93381881713867, + "learning_rate": 2.2476190476190477e-06, + "loss": 0.1419, + "step": 8396 + }, + { + "epoch": 47.98285714285714, + "grad_norm": 82.33846282958984, + "learning_rate": 2.241269841269841e-06, + "loss": 0.1457, + "step": 8397 + }, + { + "epoch": 47.988571428571426, + "grad_norm": 40.23613739013672, + "learning_rate": 2.2349206349206347e-06, + "loss": 0.1066, + "step": 8398 + }, + { + "epoch": 47.994285714285716, + "grad_norm": 121.78873443603516, + "learning_rate": 2.228571428571429e-06, + "loss": 0.1353, + "step": 8399 + }, + { + "epoch": 48.0, + "grad_norm": 13.992203712463379, + "learning_rate": 2.2222222222222225e-06, + "loss": 0.1188, + "step": 8400 + }, + { + "epoch": 48.0, + "eval_classes": 0, + "eval_loss": 0.5805517435073853, + "eval_map": 0.9383, + "eval_map_50": 0.9725, + "eval_map_75": 0.9652, + "eval_map_large": 0.9383, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9383, + "eval_map_small": -1.0, + "eval_mar_1": 0.7952, + "eval_mar_10": 0.9727, + "eval_mar_100": 0.9743, + "eval_mar_100_per_class": 0.9743, + "eval_mar_large": 0.9743, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 12.762, + "eval_samples_per_second": 23.037, + "eval_steps_per_second": 2.899, + "step": 8400 + }, + { + "epoch": 48.005714285714284, + "grad_norm": 34.68983840942383, + "learning_rate": 2.215873015873016e-06, + "loss": 0.0969, + "step": 8401 + }, + { + "epoch": 48.011428571428574, + "grad_norm": 44.068450927734375, + "learning_rate": 2.2095238095238095e-06, + "loss": 0.1547, + "step": 8402 + }, + { + "epoch": 48.01714285714286, + "grad_norm": 68.57250213623047, + "learning_rate": 2.2031746031746034e-06, + "loss": 0.1059, + "step": 8403 + }, + { + "epoch": 48.02285714285714, + "grad_norm": 53.91074752807617, + "learning_rate": 2.196825396825397e-06, + "loss": 0.1564, + "step": 8404 + }, + { + "epoch": 48.02857142857143, + "grad_norm": 17.755817413330078, + "learning_rate": 2.1904761904761908e-06, + "loss": 0.107, + "step": 8405 + }, + { + "epoch": 48.034285714285716, + "grad_norm": 42.74783706665039, + "learning_rate": 2.1841269841269842e-06, + "loss": 0.1607, + "step": 8406 + }, + { + "epoch": 48.04, + "grad_norm": 51.61184310913086, + "learning_rate": 2.1777777777777777e-06, + "loss": 0.0835, + "step": 8407 + }, + { + "epoch": 48.04571428571428, + "grad_norm": 664.2689208984375, + "learning_rate": 2.1714285714285716e-06, + "loss": 0.1338, + "step": 8408 + }, + { + "epoch": 48.05142857142857, + "grad_norm": 50.177127838134766, + "learning_rate": 2.165079365079365e-06, + "loss": 0.1307, + "step": 8409 + }, + { + "epoch": 48.05714285714286, + "grad_norm": 56.43240737915039, + "learning_rate": 2.158730158730159e-06, + "loss": 0.1712, + "step": 8410 + }, + { + "epoch": 48.06285714285714, + "grad_norm": 68.9998550415039, + "learning_rate": 2.1523809523809525e-06, + "loss": 0.0927, + "step": 8411 + }, + { + "epoch": 48.06857142857143, + "grad_norm": 39.705413818359375, + "learning_rate": 2.146031746031746e-06, + "loss": 0.1695, + "step": 8412 + }, + { + "epoch": 48.074285714285715, + "grad_norm": 52.13355255126953, + "learning_rate": 2.13968253968254e-06, + "loss": 0.1622, + "step": 8413 + }, + { + "epoch": 48.08, + "grad_norm": 33.74021530151367, + "learning_rate": 2.1333333333333334e-06, + "loss": 0.1119, + "step": 8414 + }, + { + "epoch": 48.08571428571429, + "grad_norm": 57.10844802856445, + "learning_rate": 2.126984126984127e-06, + "loss": 0.1016, + "step": 8415 + }, + { + "epoch": 48.09142857142857, + "grad_norm": 80.46121978759766, + "learning_rate": 2.1206349206349208e-06, + "loss": 0.1084, + "step": 8416 + }, + { + "epoch": 48.097142857142856, + "grad_norm": 57.60420227050781, + "learning_rate": 2.1142857142857147e-06, + "loss": 0.1187, + "step": 8417 + }, + { + "epoch": 48.10285714285714, + "grad_norm": 18.46352195739746, + "learning_rate": 2.107936507936508e-06, + "loss": 0.1117, + "step": 8418 + }, + { + "epoch": 48.10857142857143, + "grad_norm": 63.23112869262695, + "learning_rate": 2.1015873015873016e-06, + "loss": 0.0882, + "step": 8419 + }, + { + "epoch": 48.114285714285714, + "grad_norm": 41.47560501098633, + "learning_rate": 2.095238095238095e-06, + "loss": 0.4097, + "step": 8420 + }, + { + "epoch": 48.12, + "grad_norm": 58.211090087890625, + "learning_rate": 2.088888888888889e-06, + "loss": 0.1006, + "step": 8421 + }, + { + "epoch": 48.12571428571429, + "grad_norm": 33.181087493896484, + "learning_rate": 2.082539682539683e-06, + "loss": 0.1426, + "step": 8422 + }, + { + "epoch": 48.13142857142857, + "grad_norm": 38.934871673583984, + "learning_rate": 2.0761904761904764e-06, + "loss": 0.1232, + "step": 8423 + }, + { + "epoch": 48.137142857142855, + "grad_norm": 183.18692016601562, + "learning_rate": 2.06984126984127e-06, + "loss": 0.1435, + "step": 8424 + }, + { + "epoch": 48.142857142857146, + "grad_norm": 52.49740219116211, + "learning_rate": 2.0634920634920634e-06, + "loss": 0.0913, + "step": 8425 + }, + { + "epoch": 48.14857142857143, + "grad_norm": 44.68959045410156, + "learning_rate": 2.0571428571428573e-06, + "loss": 0.1456, + "step": 8426 + }, + { + "epoch": 48.15428571428571, + "grad_norm": 72.08333587646484, + "learning_rate": 2.0507936507936508e-06, + "loss": 0.2143, + "step": 8427 + }, + { + "epoch": 48.16, + "grad_norm": 40.389060974121094, + "learning_rate": 2.0444444444444447e-06, + "loss": 0.1633, + "step": 8428 + }, + { + "epoch": 48.16571428571429, + "grad_norm": 70.50105285644531, + "learning_rate": 2.038095238095238e-06, + "loss": 0.1382, + "step": 8429 + }, + { + "epoch": 48.17142857142857, + "grad_norm": 45.87871551513672, + "learning_rate": 2.0317460317460316e-06, + "loss": 0.1231, + "step": 8430 + }, + { + "epoch": 48.177142857142854, + "grad_norm": 24.484031677246094, + "learning_rate": 2.0253968253968256e-06, + "loss": 0.1068, + "step": 8431 + }, + { + "epoch": 48.182857142857145, + "grad_norm": 39.662105560302734, + "learning_rate": 2.019047619047619e-06, + "loss": 0.1016, + "step": 8432 + }, + { + "epoch": 48.18857142857143, + "grad_norm": 71.07365417480469, + "learning_rate": 2.012698412698413e-06, + "loss": 0.1826, + "step": 8433 + }, + { + "epoch": 48.19428571428571, + "grad_norm": 48.34844970703125, + "learning_rate": 2.0063492063492064e-06, + "loss": 0.1473, + "step": 8434 + }, + { + "epoch": 48.2, + "grad_norm": 54.211822509765625, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.1128, + "step": 8435 + }, + { + "epoch": 48.205714285714286, + "grad_norm": 75.8840103149414, + "learning_rate": 1.993650793650794e-06, + "loss": 0.142, + "step": 8436 + }, + { + "epoch": 48.21142857142857, + "grad_norm": 21.041221618652344, + "learning_rate": 1.9873015873015873e-06, + "loss": 0.1227, + "step": 8437 + }, + { + "epoch": 48.21714285714286, + "grad_norm": 29.07630157470703, + "learning_rate": 1.9809523809523808e-06, + "loss": 0.185, + "step": 8438 + }, + { + "epoch": 48.222857142857144, + "grad_norm": 74.97119903564453, + "learning_rate": 1.9746031746031747e-06, + "loss": 0.1063, + "step": 8439 + }, + { + "epoch": 48.22857142857143, + "grad_norm": 32.257911682128906, + "learning_rate": 1.9682539682539686e-06, + "loss": 0.1198, + "step": 8440 + }, + { + "epoch": 48.23428571428571, + "grad_norm": 32.84730529785156, + "learning_rate": 1.961904761904762e-06, + "loss": 0.1552, + "step": 8441 + }, + { + "epoch": 48.24, + "grad_norm": 25.6329345703125, + "learning_rate": 1.9555555555555556e-06, + "loss": 0.2221, + "step": 8442 + }, + { + "epoch": 48.245714285714286, + "grad_norm": 43.75717544555664, + "learning_rate": 1.949206349206349e-06, + "loss": 0.1057, + "step": 8443 + }, + { + "epoch": 48.25142857142857, + "grad_norm": 105.43291473388672, + "learning_rate": 1.942857142857143e-06, + "loss": 0.1025, + "step": 8444 + }, + { + "epoch": 48.25714285714286, + "grad_norm": 51.59490203857422, + "learning_rate": 1.936507936507937e-06, + "loss": 0.1082, + "step": 8445 + }, + { + "epoch": 48.26285714285714, + "grad_norm": 24.162395477294922, + "learning_rate": 1.9301587301587303e-06, + "loss": 0.1386, + "step": 8446 + }, + { + "epoch": 48.26857142857143, + "grad_norm": 55.5849609375, + "learning_rate": 1.923809523809524e-06, + "loss": 0.2192, + "step": 8447 + }, + { + "epoch": 48.27428571428572, + "grad_norm": 107.56834411621094, + "learning_rate": 1.9174603174603173e-06, + "loss": 0.1601, + "step": 8448 + }, + { + "epoch": 48.28, + "grad_norm": 28.928194046020508, + "learning_rate": 1.9111111111111112e-06, + "loss": 0.1285, + "step": 8449 + }, + { + "epoch": 48.285714285714285, + "grad_norm": 19.675289154052734, + "learning_rate": 1.9047619047619051e-06, + "loss": 0.1165, + "step": 8450 + }, + { + "epoch": 48.29142857142857, + "grad_norm": 38.08034133911133, + "learning_rate": 1.8984126984126986e-06, + "loss": 0.1197, + "step": 8451 + }, + { + "epoch": 48.29714285714286, + "grad_norm": 31.34748077392578, + "learning_rate": 1.8920634920634923e-06, + "loss": 0.1073, + "step": 8452 + }, + { + "epoch": 48.30285714285714, + "grad_norm": 77.29824829101562, + "learning_rate": 1.8857142857142858e-06, + "loss": 0.1481, + "step": 8453 + }, + { + "epoch": 48.308571428571426, + "grad_norm": 22.881072998046875, + "learning_rate": 1.8793650793650795e-06, + "loss": 0.1077, + "step": 8454 + }, + { + "epoch": 48.31428571428572, + "grad_norm": 16.868284225463867, + "learning_rate": 1.873015873015873e-06, + "loss": 0.1626, + "step": 8455 + }, + { + "epoch": 48.32, + "grad_norm": 38.000587463378906, + "learning_rate": 1.8666666666666669e-06, + "loss": 0.1888, + "step": 8456 + }, + { + "epoch": 48.325714285714284, + "grad_norm": 54.66694641113281, + "learning_rate": 1.8603174603174606e-06, + "loss": 0.1385, + "step": 8457 + }, + { + "epoch": 48.331428571428575, + "grad_norm": 41.57939910888672, + "learning_rate": 1.853968253968254e-06, + "loss": 0.1119, + "step": 8458 + }, + { + "epoch": 48.33714285714286, + "grad_norm": 26.62154769897461, + "learning_rate": 1.8476190476190477e-06, + "loss": 0.0939, + "step": 8459 + }, + { + "epoch": 48.34285714285714, + "grad_norm": 57.56425094604492, + "learning_rate": 1.8412698412698412e-06, + "loss": 0.1442, + "step": 8460 + }, + { + "epoch": 48.348571428571425, + "grad_norm": 95.6938705444336, + "learning_rate": 1.834920634920635e-06, + "loss": 0.1888, + "step": 8461 + }, + { + "epoch": 48.354285714285716, + "grad_norm": 22.02704620361328, + "learning_rate": 1.8285714285714288e-06, + "loss": 0.1038, + "step": 8462 + }, + { + "epoch": 48.36, + "grad_norm": 28.582277297973633, + "learning_rate": 1.8222222222222223e-06, + "loss": 0.1591, + "step": 8463 + }, + { + "epoch": 48.36571428571428, + "grad_norm": 36.19487762451172, + "learning_rate": 1.815873015873016e-06, + "loss": 0.1642, + "step": 8464 + }, + { + "epoch": 48.371428571428574, + "grad_norm": 46.585838317871094, + "learning_rate": 1.8095238095238095e-06, + "loss": 0.1996, + "step": 8465 + }, + { + "epoch": 48.37714285714286, + "grad_norm": 26.59910774230957, + "learning_rate": 1.8031746031746032e-06, + "loss": 0.1811, + "step": 8466 + }, + { + "epoch": 48.38285714285714, + "grad_norm": 17.714712142944336, + "learning_rate": 1.7968253968253967e-06, + "loss": 0.1112, + "step": 8467 + }, + { + "epoch": 48.38857142857143, + "grad_norm": 54.50025939941406, + "learning_rate": 1.7904761904761908e-06, + "loss": 0.0952, + "step": 8468 + }, + { + "epoch": 48.394285714285715, + "grad_norm": 32.499568939208984, + "learning_rate": 1.7841269841269843e-06, + "loss": 0.1282, + "step": 8469 + }, + { + "epoch": 48.4, + "grad_norm": 35.08049774169922, + "learning_rate": 1.777777777777778e-06, + "loss": 0.1066, + "step": 8470 + }, + { + "epoch": 48.40571428571428, + "grad_norm": 36.948974609375, + "learning_rate": 1.7714285714285714e-06, + "loss": 0.1615, + "step": 8471 + }, + { + "epoch": 48.41142857142857, + "grad_norm": 29.219314575195312, + "learning_rate": 1.7650793650793651e-06, + "loss": 0.1151, + "step": 8472 + }, + { + "epoch": 48.417142857142856, + "grad_norm": 51.52848434448242, + "learning_rate": 1.758730158730159e-06, + "loss": 0.1129, + "step": 8473 + }, + { + "epoch": 48.42285714285714, + "grad_norm": 66.25816345214844, + "learning_rate": 1.7523809523809525e-06, + "loss": 0.1026, + "step": 8474 + }, + { + "epoch": 48.42857142857143, + "grad_norm": 52.06458282470703, + "learning_rate": 1.7460317460317462e-06, + "loss": 0.1346, + "step": 8475 + }, + { + "epoch": 48.434285714285714, + "grad_norm": 38.74394989013672, + "learning_rate": 1.7396825396825397e-06, + "loss": 0.1779, + "step": 8476 + }, + { + "epoch": 48.44, + "grad_norm": 48.79632568359375, + "learning_rate": 1.7333333333333334e-06, + "loss": 0.1647, + "step": 8477 + }, + { + "epoch": 48.44571428571429, + "grad_norm": 57.10668182373047, + "learning_rate": 1.7269841269841269e-06, + "loss": 0.1178, + "step": 8478 + }, + { + "epoch": 48.45142857142857, + "grad_norm": 30.231332778930664, + "learning_rate": 1.7206349206349208e-06, + "loss": 0.1314, + "step": 8479 + }, + { + "epoch": 48.457142857142856, + "grad_norm": 80.39659118652344, + "learning_rate": 1.7142857142857145e-06, + "loss": 0.1357, + "step": 8480 + }, + { + "epoch": 48.462857142857146, + "grad_norm": 65.28248596191406, + "learning_rate": 1.707936507936508e-06, + "loss": 0.1205, + "step": 8481 + }, + { + "epoch": 48.46857142857143, + "grad_norm": 91.30731964111328, + "learning_rate": 1.7015873015873017e-06, + "loss": 0.1867, + "step": 8482 + }, + { + "epoch": 48.47428571428571, + "grad_norm": 32.85758972167969, + "learning_rate": 1.6952380952380951e-06, + "loss": 0.1569, + "step": 8483 + }, + { + "epoch": 48.48, + "grad_norm": 18.715124130249023, + "learning_rate": 1.6888888888888888e-06, + "loss": 0.1912, + "step": 8484 + }, + { + "epoch": 48.48571428571429, + "grad_norm": 29.891252517700195, + "learning_rate": 1.6825396825396827e-06, + "loss": 0.1739, + "step": 8485 + }, + { + "epoch": 48.49142857142857, + "grad_norm": 33.41127014160156, + "learning_rate": 1.6761904761904764e-06, + "loss": 0.1207, + "step": 8486 + }, + { + "epoch": 48.497142857142855, + "grad_norm": 66.02049255371094, + "learning_rate": 1.66984126984127e-06, + "loss": 0.1557, + "step": 8487 + }, + { + "epoch": 48.502857142857145, + "grad_norm": 22.25055503845215, + "learning_rate": 1.6634920634920636e-06, + "loss": 0.1087, + "step": 8488 + }, + { + "epoch": 48.50857142857143, + "grad_norm": 29.54048728942871, + "learning_rate": 1.657142857142857e-06, + "loss": 0.1358, + "step": 8489 + }, + { + "epoch": 48.51428571428571, + "grad_norm": 17.46042251586914, + "learning_rate": 1.650793650793651e-06, + "loss": 0.0944, + "step": 8490 + }, + { + "epoch": 48.52, + "grad_norm": 69.29207611083984, + "learning_rate": 1.6444444444444447e-06, + "loss": 0.1351, + "step": 8491 + }, + { + "epoch": 48.52571428571429, + "grad_norm": 55.9390869140625, + "learning_rate": 1.6380952380952382e-06, + "loss": 0.1082, + "step": 8492 + }, + { + "epoch": 48.53142857142857, + "grad_norm": 40.031368255615234, + "learning_rate": 1.6317460317460319e-06, + "loss": 0.1397, + "step": 8493 + }, + { + "epoch": 48.537142857142854, + "grad_norm": 33.776851654052734, + "learning_rate": 1.6253968253968254e-06, + "loss": 0.127, + "step": 8494 + }, + { + "epoch": 48.542857142857144, + "grad_norm": 41.36642074584961, + "learning_rate": 1.619047619047619e-06, + "loss": 0.1472, + "step": 8495 + }, + { + "epoch": 48.54857142857143, + "grad_norm": 32.5374641418457, + "learning_rate": 1.612698412698413e-06, + "loss": 0.1165, + "step": 8496 + }, + { + "epoch": 48.55428571428571, + "grad_norm": 59.757659912109375, + "learning_rate": 1.6063492063492064e-06, + "loss": 0.1446, + "step": 8497 + }, + { + "epoch": 48.56, + "grad_norm": 28.34662628173828, + "learning_rate": 1.6000000000000001e-06, + "loss": 0.1066, + "step": 8498 + }, + { + "epoch": 48.565714285714286, + "grad_norm": 56.91286849975586, + "learning_rate": 1.5936507936507936e-06, + "loss": 0.0935, + "step": 8499 + }, + { + "epoch": 48.57142857142857, + "grad_norm": 27.71327018737793, + "learning_rate": 1.5873015873015873e-06, + "loss": 0.1222, + "step": 8500 + }, + { + "epoch": 48.57714285714286, + "grad_norm": 70.07229614257812, + "learning_rate": 1.5809523809523808e-06, + "loss": 0.2353, + "step": 8501 + }, + { + "epoch": 48.582857142857144, + "grad_norm": 61.30202865600586, + "learning_rate": 1.574603174603175e-06, + "loss": 0.2099, + "step": 8502 + }, + { + "epoch": 48.58857142857143, + "grad_norm": 30.183975219726562, + "learning_rate": 1.5682539682539684e-06, + "loss": 0.1917, + "step": 8503 + }, + { + "epoch": 48.59428571428572, + "grad_norm": 191.23448181152344, + "learning_rate": 1.561904761904762e-06, + "loss": 0.2012, + "step": 8504 + }, + { + "epoch": 48.6, + "grad_norm": 63.830299377441406, + "learning_rate": 1.5555555555555556e-06, + "loss": 0.1211, + "step": 8505 + }, + { + "epoch": 48.605714285714285, + "grad_norm": 23.363412857055664, + "learning_rate": 1.5492063492063493e-06, + "loss": 0.1067, + "step": 8506 + }, + { + "epoch": 48.61142857142857, + "grad_norm": 41.130428314208984, + "learning_rate": 1.542857142857143e-06, + "loss": 0.1431, + "step": 8507 + }, + { + "epoch": 48.61714285714286, + "grad_norm": 51.86970901489258, + "learning_rate": 1.5365079365079365e-06, + "loss": 0.1029, + "step": 8508 + }, + { + "epoch": 48.62285714285714, + "grad_norm": 46.21601486206055, + "learning_rate": 1.5301587301587304e-06, + "loss": 0.1305, + "step": 8509 + }, + { + "epoch": 48.628571428571426, + "grad_norm": 23.716323852539062, + "learning_rate": 1.5238095238095238e-06, + "loss": 0.113, + "step": 8510 + }, + { + "epoch": 48.63428571428572, + "grad_norm": 82.37190246582031, + "learning_rate": 1.5174603174603175e-06, + "loss": 0.1198, + "step": 8511 + }, + { + "epoch": 48.64, + "grad_norm": 20.815404891967773, + "learning_rate": 1.5111111111111112e-06, + "loss": 0.1161, + "step": 8512 + }, + { + "epoch": 48.645714285714284, + "grad_norm": 20.904815673828125, + "learning_rate": 1.504761904761905e-06, + "loss": 0.1524, + "step": 8513 + }, + { + "epoch": 48.651428571428575, + "grad_norm": 217.2928466796875, + "learning_rate": 1.4984126984126984e-06, + "loss": 0.1161, + "step": 8514 + }, + { + "epoch": 48.65714285714286, + "grad_norm": 46.758567810058594, + "learning_rate": 1.492063492063492e-06, + "loss": 0.1546, + "step": 8515 + }, + { + "epoch": 48.66285714285714, + "grad_norm": 39.870391845703125, + "learning_rate": 1.4857142857142858e-06, + "loss": 0.0949, + "step": 8516 + }, + { + "epoch": 48.668571428571425, + "grad_norm": 63.797542572021484, + "learning_rate": 1.4793650793650793e-06, + "loss": 0.1232, + "step": 8517 + }, + { + "epoch": 48.674285714285716, + "grad_norm": 41.95578384399414, + "learning_rate": 1.4730158730158732e-06, + "loss": 0.1823, + "step": 8518 + }, + { + "epoch": 48.68, + "grad_norm": 77.81548309326172, + "learning_rate": 1.4666666666666667e-06, + "loss": 0.1086, + "step": 8519 + }, + { + "epoch": 48.68571428571428, + "grad_norm": 877.9599609375, + "learning_rate": 1.4603174603174606e-06, + "loss": 0.1582, + "step": 8520 + }, + { + "epoch": 48.691428571428574, + "grad_norm": 50.353511810302734, + "learning_rate": 1.453968253968254e-06, + "loss": 0.168, + "step": 8521 + }, + { + "epoch": 48.69714285714286, + "grad_norm": 43.11552047729492, + "learning_rate": 1.4476190476190478e-06, + "loss": 0.1206, + "step": 8522 + }, + { + "epoch": 48.70285714285714, + "grad_norm": 48.17919158935547, + "learning_rate": 1.4412698412698414e-06, + "loss": 0.2004, + "step": 8523 + }, + { + "epoch": 48.70857142857143, + "grad_norm": 227.92027282714844, + "learning_rate": 1.434920634920635e-06, + "loss": 0.1767, + "step": 8524 + }, + { + "epoch": 48.714285714285715, + "grad_norm": 33.221317291259766, + "learning_rate": 1.4285714285714286e-06, + "loss": 0.1659, + "step": 8525 + }, + { + "epoch": 48.72, + "grad_norm": 17.4228458404541, + "learning_rate": 1.4222222222222223e-06, + "loss": 0.116, + "step": 8526 + }, + { + "epoch": 48.72571428571428, + "grad_norm": 35.15834045410156, + "learning_rate": 1.415873015873016e-06, + "loss": 0.221, + "step": 8527 + }, + { + "epoch": 48.73142857142857, + "grad_norm": 448.2790832519531, + "learning_rate": 1.4095238095238095e-06, + "loss": 0.2029, + "step": 8528 + }, + { + "epoch": 48.73714285714286, + "grad_norm": 43.80533981323242, + "learning_rate": 1.4031746031746034e-06, + "loss": 0.0964, + "step": 8529 + }, + { + "epoch": 48.74285714285714, + "grad_norm": 29.789897918701172, + "learning_rate": 1.3968253968253969e-06, + "loss": 0.133, + "step": 8530 + }, + { + "epoch": 48.74857142857143, + "grad_norm": 49.25103759765625, + "learning_rate": 1.3904761904761906e-06, + "loss": 0.1735, + "step": 8531 + }, + { + "epoch": 48.754285714285714, + "grad_norm": 47.496864318847656, + "learning_rate": 1.3841269841269843e-06, + "loss": 0.1617, + "step": 8532 + }, + { + "epoch": 48.76, + "grad_norm": 32.824214935302734, + "learning_rate": 1.3777777777777778e-06, + "loss": 0.1524, + "step": 8533 + }, + { + "epoch": 48.76571428571429, + "grad_norm": 22.41704750061035, + "learning_rate": 1.3714285714285715e-06, + "loss": 0.1183, + "step": 8534 + }, + { + "epoch": 48.77142857142857, + "grad_norm": 27.07340431213379, + "learning_rate": 1.3650793650793652e-06, + "loss": 0.1116, + "step": 8535 + }, + { + "epoch": 48.777142857142856, + "grad_norm": 32.53962707519531, + "learning_rate": 1.3587301587301588e-06, + "loss": 0.142, + "step": 8536 + }, + { + "epoch": 48.78285714285714, + "grad_norm": 52.79143524169922, + "learning_rate": 1.3523809523809523e-06, + "loss": 0.1096, + "step": 8537 + }, + { + "epoch": 48.78857142857143, + "grad_norm": 48.82529067993164, + "learning_rate": 1.346031746031746e-06, + "loss": 0.1123, + "step": 8538 + }, + { + "epoch": 48.794285714285714, + "grad_norm": 31.630796432495117, + "learning_rate": 1.3396825396825397e-06, + "loss": 0.098, + "step": 8539 + }, + { + "epoch": 48.8, + "grad_norm": 59.10980224609375, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.1288, + "step": 8540 + }, + { + "epoch": 48.80571428571429, + "grad_norm": 34.39828109741211, + "learning_rate": 1.3269841269841271e-06, + "loss": 0.1212, + "step": 8541 + }, + { + "epoch": 48.81142857142857, + "grad_norm": 59.65375900268555, + "learning_rate": 1.3206349206349206e-06, + "loss": 0.1016, + "step": 8542 + }, + { + "epoch": 48.817142857142855, + "grad_norm": 31.157609939575195, + "learning_rate": 1.3142857142857145e-06, + "loss": 0.1139, + "step": 8543 + }, + { + "epoch": 48.822857142857146, + "grad_norm": 42.51649856567383, + "learning_rate": 1.307936507936508e-06, + "loss": 0.1153, + "step": 8544 + }, + { + "epoch": 48.82857142857143, + "grad_norm": 64.73578643798828, + "learning_rate": 1.3015873015873017e-06, + "loss": 0.1305, + "step": 8545 + }, + { + "epoch": 48.83428571428571, + "grad_norm": 42.71442413330078, + "learning_rate": 1.2952380952380954e-06, + "loss": 0.1291, + "step": 8546 + }, + { + "epoch": 48.84, + "grad_norm": 58.65089797973633, + "learning_rate": 1.2888888888888889e-06, + "loss": 0.2238, + "step": 8547 + }, + { + "epoch": 48.84571428571429, + "grad_norm": 21.79871368408203, + "learning_rate": 1.2825396825396825e-06, + "loss": 0.1413, + "step": 8548 + }, + { + "epoch": 48.85142857142857, + "grad_norm": 63.050376892089844, + "learning_rate": 1.2761904761904762e-06, + "loss": 0.1224, + "step": 8549 + }, + { + "epoch": 48.857142857142854, + "grad_norm": 77.241943359375, + "learning_rate": 1.26984126984127e-06, + "loss": 0.1062, + "step": 8550 + }, + { + "epoch": 48.862857142857145, + "grad_norm": 63.40999984741211, + "learning_rate": 1.2634920634920634e-06, + "loss": 0.1387, + "step": 8551 + }, + { + "epoch": 48.86857142857143, + "grad_norm": 60.35781478881836, + "learning_rate": 1.2571428571428573e-06, + "loss": 0.2139, + "step": 8552 + }, + { + "epoch": 48.87428571428571, + "grad_norm": 1141.8404541015625, + "learning_rate": 1.2507936507936508e-06, + "loss": 0.1724, + "step": 8553 + }, + { + "epoch": 48.88, + "grad_norm": 43.64670181274414, + "learning_rate": 1.2444444444444445e-06, + "loss": 0.1049, + "step": 8554 + }, + { + "epoch": 48.885714285714286, + "grad_norm": 37.39603805541992, + "learning_rate": 1.2380952380952382e-06, + "loss": 0.1447, + "step": 8555 + }, + { + "epoch": 48.89142857142857, + "grad_norm": 64.08164978027344, + "learning_rate": 1.2317460317460317e-06, + "loss": 0.1202, + "step": 8556 + }, + { + "epoch": 48.89714285714286, + "grad_norm": 62.142242431640625, + "learning_rate": 1.2253968253968254e-06, + "loss": 0.1119, + "step": 8557 + }, + { + "epoch": 48.902857142857144, + "grad_norm": 53.52615737915039, + "learning_rate": 1.219047619047619e-06, + "loss": 0.1599, + "step": 8558 + }, + { + "epoch": 48.90857142857143, + "grad_norm": 29.947540283203125, + "learning_rate": 1.2126984126984128e-06, + "loss": 0.1357, + "step": 8559 + }, + { + "epoch": 48.91428571428571, + "grad_norm": 46.05306625366211, + "learning_rate": 1.2063492063492065e-06, + "loss": 0.1385, + "step": 8560 + }, + { + "epoch": 48.92, + "grad_norm": 24.307514190673828, + "learning_rate": 1.2000000000000002e-06, + "loss": 0.1233, + "step": 8561 + }, + { + "epoch": 48.925714285714285, + "grad_norm": 30.282651901245117, + "learning_rate": 1.1936507936507936e-06, + "loss": 0.0997, + "step": 8562 + }, + { + "epoch": 48.93142857142857, + "grad_norm": 61.168792724609375, + "learning_rate": 1.1873015873015873e-06, + "loss": 0.1346, + "step": 8563 + }, + { + "epoch": 48.93714285714286, + "grad_norm": 24.79856300354004, + "learning_rate": 1.180952380952381e-06, + "loss": 0.1251, + "step": 8564 + }, + { + "epoch": 48.94285714285714, + "grad_norm": 53.22463607788086, + "learning_rate": 1.1746031746031745e-06, + "loss": 0.1459, + "step": 8565 + }, + { + "epoch": 48.94857142857143, + "grad_norm": 47.92363739013672, + "learning_rate": 1.1682539682539684e-06, + "loss": 0.0947, + "step": 8566 + }, + { + "epoch": 48.95428571428572, + "grad_norm": 45.30970764160156, + "learning_rate": 1.161904761904762e-06, + "loss": 0.1523, + "step": 8567 + }, + { + "epoch": 48.96, + "grad_norm": 14.523917198181152, + "learning_rate": 1.1555555555555556e-06, + "loss": 0.163, + "step": 8568 + }, + { + "epoch": 48.965714285714284, + "grad_norm": 54.226322174072266, + "learning_rate": 1.1492063492063493e-06, + "loss": 0.1489, + "step": 8569 + }, + { + "epoch": 48.97142857142857, + "grad_norm": 27.205904006958008, + "learning_rate": 1.142857142857143e-06, + "loss": 0.1173, + "step": 8570 + }, + { + "epoch": 48.97714285714286, + "grad_norm": 382.25030517578125, + "learning_rate": 1.1365079365079365e-06, + "loss": 0.2088, + "step": 8571 + }, + { + "epoch": 48.98285714285714, + "grad_norm": 43.594722747802734, + "learning_rate": 1.1301587301587302e-06, + "loss": 0.1103, + "step": 8572 + }, + { + "epoch": 48.988571428571426, + "grad_norm": 43.90495300292969, + "learning_rate": 1.1238095238095239e-06, + "loss": 0.1303, + "step": 8573 + }, + { + "epoch": 48.994285714285716, + "grad_norm": 37.924713134765625, + "learning_rate": 1.1174603174603173e-06, + "loss": 0.1356, + "step": 8574 + }, + { + "epoch": 49.0, + "grad_norm": 26.365081787109375, + "learning_rate": 1.1111111111111112e-06, + "loss": 0.1228, + "step": 8575 + }, + { + "epoch": 49.0, + "eval_classes": 0, + "eval_loss": 0.5775097608566284, + "eval_map": 0.9383, + "eval_map_50": 0.9713, + "eval_map_75": 0.9641, + "eval_map_large": 0.9384, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9383, + "eval_map_small": -1.0, + "eval_mar_1": 0.7924, + "eval_mar_10": 0.9733, + "eval_mar_100": 0.9749, + "eval_mar_100_per_class": 0.9749, + "eval_mar_large": 0.9749, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 14.2189, + "eval_samples_per_second": 20.677, + "eval_steps_per_second": 2.602, + "step": 8575 + }, + { + "epoch": 49.005714285714284, + "grad_norm": 21.205801010131836, + "learning_rate": 1.1047619047619047e-06, + "loss": 0.1085, + "step": 8576 + }, + { + "epoch": 49.011428571428574, + "grad_norm": 32.06498336791992, + "learning_rate": 1.0984126984126984e-06, + "loss": 0.1302, + "step": 8577 + }, + { + "epoch": 49.01714285714286, + "grad_norm": 22.492408752441406, + "learning_rate": 1.0920634920634921e-06, + "loss": 0.1277, + "step": 8578 + }, + { + "epoch": 49.02285714285714, + "grad_norm": 95.90989685058594, + "learning_rate": 1.0857142857142858e-06, + "loss": 0.124, + "step": 8579 + }, + { + "epoch": 49.02857142857143, + "grad_norm": 31.616439819335938, + "learning_rate": 1.0793650793650795e-06, + "loss": 0.2782, + "step": 8580 + }, + { + "epoch": 49.034285714285716, + "grad_norm": 30.350252151489258, + "learning_rate": 1.073015873015873e-06, + "loss": 0.12, + "step": 8581 + }, + { + "epoch": 49.04, + "grad_norm": 45.6641960144043, + "learning_rate": 1.0666666666666667e-06, + "loss": 0.1092, + "step": 8582 + }, + { + "epoch": 49.04571428571428, + "grad_norm": 35.478946685791016, + "learning_rate": 1.0603174603174604e-06, + "loss": 0.0976, + "step": 8583 + }, + { + "epoch": 49.05142857142857, + "grad_norm": 56.84311294555664, + "learning_rate": 1.053968253968254e-06, + "loss": 0.122, + "step": 8584 + }, + { + "epoch": 49.05714285714286, + "grad_norm": 57.067138671875, + "learning_rate": 1.0476190476190476e-06, + "loss": 0.0985, + "step": 8585 + }, + { + "epoch": 49.06285714285714, + "grad_norm": 22.298297882080078, + "learning_rate": 1.0412698412698415e-06, + "loss": 0.116, + "step": 8586 + }, + { + "epoch": 49.06857142857143, + "grad_norm": 45.00107192993164, + "learning_rate": 1.034920634920635e-06, + "loss": 0.0977, + "step": 8587 + }, + { + "epoch": 49.074285714285715, + "grad_norm": 30.702762603759766, + "learning_rate": 1.0285714285714286e-06, + "loss": 0.1414, + "step": 8588 + }, + { + "epoch": 49.08, + "grad_norm": 2884.48095703125, + "learning_rate": 1.0222222222222223e-06, + "loss": 0.1384, + "step": 8589 + }, + { + "epoch": 49.08571428571429, + "grad_norm": 34.0587272644043, + "learning_rate": 1.0158730158730158e-06, + "loss": 0.1174, + "step": 8590 + }, + { + "epoch": 49.09142857142857, + "grad_norm": 47.35640335083008, + "learning_rate": 1.0095238095238095e-06, + "loss": 0.1177, + "step": 8591 + }, + { + "epoch": 49.097142857142856, + "grad_norm": 81.97245788574219, + "learning_rate": 1.0031746031746032e-06, + "loss": 0.1276, + "step": 8592 + }, + { + "epoch": 49.10285714285714, + "grad_norm": 71.25306701660156, + "learning_rate": 9.96825396825397e-07, + "loss": 0.1174, + "step": 8593 + }, + { + "epoch": 49.10857142857143, + "grad_norm": 36.5328483581543, + "learning_rate": 9.904761904761904e-07, + "loss": 0.1016, + "step": 8594 + }, + { + "epoch": 49.114285714285714, + "grad_norm": 33.6234245300293, + "learning_rate": 9.841269841269843e-07, + "loss": 0.1326, + "step": 8595 + }, + { + "epoch": 49.12, + "grad_norm": 109.39087677001953, + "learning_rate": 9.777777777777778e-07, + "loss": 0.0963, + "step": 8596 + }, + { + "epoch": 49.12571428571429, + "grad_norm": 42.822750091552734, + "learning_rate": 9.714285714285715e-07, + "loss": 0.1104, + "step": 8597 + }, + { + "epoch": 49.13142857142857, + "grad_norm": 45.368167877197266, + "learning_rate": 9.650793650793652e-07, + "loss": 0.1692, + "step": 8598 + }, + { + "epoch": 49.137142857142855, + "grad_norm": 23.05076789855957, + "learning_rate": 9.587301587301587e-07, + "loss": 0.153, + "step": 8599 + }, + { + "epoch": 49.142857142857146, + "grad_norm": 67.81226348876953, + "learning_rate": 9.523809523809526e-07, + "loss": 0.2117, + "step": 8600 + }, + { + "epoch": 49.14857142857143, + "grad_norm": 87.15576171875, + "learning_rate": 9.460317460317461e-07, + "loss": 0.1456, + "step": 8601 + }, + { + "epoch": 49.15428571428571, + "grad_norm": 22.477251052856445, + "learning_rate": 9.396825396825397e-07, + "loss": 0.1114, + "step": 8602 + }, + { + "epoch": 49.16, + "grad_norm": 48.177001953125, + "learning_rate": 9.333333333333334e-07, + "loss": 0.1037, + "step": 8603 + }, + { + "epoch": 49.16571428571429, + "grad_norm": 26.15044403076172, + "learning_rate": 9.26984126984127e-07, + "loss": 0.1582, + "step": 8604 + }, + { + "epoch": 49.17142857142857, + "grad_norm": 30.2648983001709, + "learning_rate": 9.206349206349206e-07, + "loss": 0.1152, + "step": 8605 + }, + { + "epoch": 49.177142857142854, + "grad_norm": 19.633533477783203, + "learning_rate": 9.142857142857144e-07, + "loss": 0.1913, + "step": 8606 + }, + { + "epoch": 49.182857142857145, + "grad_norm": 24.708240509033203, + "learning_rate": 9.07936507936508e-07, + "loss": 0.1436, + "step": 8607 + }, + { + "epoch": 49.18857142857143, + "grad_norm": 63.116065979003906, + "learning_rate": 9.015873015873016e-07, + "loss": 0.163, + "step": 8608 + }, + { + "epoch": 49.19428571428571, + "grad_norm": 43.32490539550781, + "learning_rate": 8.952380952380954e-07, + "loss": 0.1218, + "step": 8609 + }, + { + "epoch": 49.2, + "grad_norm": 20.782461166381836, + "learning_rate": 8.88888888888889e-07, + "loss": 0.1004, + "step": 8610 + }, + { + "epoch": 49.205714285714286, + "grad_norm": 61.93737030029297, + "learning_rate": 8.825396825396826e-07, + "loss": 0.1395, + "step": 8611 + }, + { + "epoch": 49.21142857142857, + "grad_norm": 41.53925704956055, + "learning_rate": 8.761904761904763e-07, + "loss": 0.1455, + "step": 8612 + }, + { + "epoch": 49.21714285714286, + "grad_norm": 48.69253921508789, + "learning_rate": 8.698412698412699e-07, + "loss": 0.0971, + "step": 8613 + }, + { + "epoch": 49.222857142857144, + "grad_norm": 17.066259384155273, + "learning_rate": 8.634920634920634e-07, + "loss": 0.1644, + "step": 8614 + }, + { + "epoch": 49.22857142857143, + "grad_norm": 38.65094757080078, + "learning_rate": 8.571428571428572e-07, + "loss": 0.1795, + "step": 8615 + }, + { + "epoch": 49.23428571428571, + "grad_norm": 15.854557991027832, + "learning_rate": 8.507936507936508e-07, + "loss": 0.1332, + "step": 8616 + }, + { + "epoch": 49.24, + "grad_norm": 33.58119583129883, + "learning_rate": 8.444444444444444e-07, + "loss": 0.1107, + "step": 8617 + }, + { + "epoch": 49.245714285714286, + "grad_norm": 19.7595157623291, + "learning_rate": 8.380952380952382e-07, + "loss": 0.1144, + "step": 8618 + }, + { + "epoch": 49.25142857142857, + "grad_norm": 65.31874084472656, + "learning_rate": 8.317460317460318e-07, + "loss": 0.184, + "step": 8619 + }, + { + "epoch": 49.25714285714286, + "grad_norm": 77.79670715332031, + "learning_rate": 8.253968253968255e-07, + "loss": 0.148, + "step": 8620 + }, + { + "epoch": 49.26285714285714, + "grad_norm": 33.58202362060547, + "learning_rate": 8.190476190476191e-07, + "loss": 0.0955, + "step": 8621 + }, + { + "epoch": 49.26857142857143, + "grad_norm": 65.25545501708984, + "learning_rate": 8.126984126984127e-07, + "loss": 0.1292, + "step": 8622 + }, + { + "epoch": 49.27428571428572, + "grad_norm": 26.848678588867188, + "learning_rate": 8.063492063492065e-07, + "loss": 0.0972, + "step": 8623 + }, + { + "epoch": 49.28, + "grad_norm": 33.38542556762695, + "learning_rate": 8.000000000000001e-07, + "loss": 0.138, + "step": 8624 + }, + { + "epoch": 49.285714285714285, + "grad_norm": 36.43754577636719, + "learning_rate": 7.936507936507937e-07, + "loss": 0.1028, + "step": 8625 + }, + { + "epoch": 49.29142857142857, + "grad_norm": 45.26066970825195, + "learning_rate": 7.873015873015875e-07, + "loss": 0.1223, + "step": 8626 + }, + { + "epoch": 49.29714285714286, + "grad_norm": 30.72954750061035, + "learning_rate": 7.80952380952381e-07, + "loss": 0.0998, + "step": 8627 + }, + { + "epoch": 49.30285714285714, + "grad_norm": 21.003131866455078, + "learning_rate": 7.746031746031746e-07, + "loss": 0.1052, + "step": 8628 + }, + { + "epoch": 49.308571428571426, + "grad_norm": 26.676870346069336, + "learning_rate": 7.682539682539682e-07, + "loss": 0.1597, + "step": 8629 + }, + { + "epoch": 49.31428571428572, + "grad_norm": 52.057456970214844, + "learning_rate": 7.619047619047619e-07, + "loss": 0.1111, + "step": 8630 + }, + { + "epoch": 49.32, + "grad_norm": 465.9215393066406, + "learning_rate": 7.555555555555556e-07, + "loss": 0.1259, + "step": 8631 + }, + { + "epoch": 49.325714285714284, + "grad_norm": 63.335350036621094, + "learning_rate": 7.492063492063492e-07, + "loss": 0.1646, + "step": 8632 + }, + { + "epoch": 49.331428571428575, + "grad_norm": 25.190292358398438, + "learning_rate": 7.428571428571429e-07, + "loss": 0.1723, + "step": 8633 + }, + { + "epoch": 49.33714285714286, + "grad_norm": 82.61519622802734, + "learning_rate": 7.365079365079366e-07, + "loss": 0.1337, + "step": 8634 + }, + { + "epoch": 49.34285714285714, + "grad_norm": 55.032196044921875, + "learning_rate": 7.301587301587303e-07, + "loss": 0.1, + "step": 8635 + }, + { + "epoch": 49.348571428571425, + "grad_norm": 90.56881713867188, + "learning_rate": 7.238095238095239e-07, + "loss": 0.1015, + "step": 8636 + }, + { + "epoch": 49.354285714285716, + "grad_norm": 42.1295051574707, + "learning_rate": 7.174603174603175e-07, + "loss": 0.1947, + "step": 8637 + }, + { + "epoch": 49.36, + "grad_norm": 31.98049545288086, + "learning_rate": 7.111111111111112e-07, + "loss": 0.1481, + "step": 8638 + }, + { + "epoch": 49.36571428571428, + "grad_norm": 611.3924560546875, + "learning_rate": 7.047619047619048e-07, + "loss": 0.1554, + "step": 8639 + }, + { + "epoch": 49.371428571428574, + "grad_norm": 68.15882873535156, + "learning_rate": 6.984126984126984e-07, + "loss": 0.1957, + "step": 8640 + }, + { + "epoch": 49.37714285714286, + "grad_norm": 12.325106620788574, + "learning_rate": 6.920634920634921e-07, + "loss": 0.1073, + "step": 8641 + }, + { + "epoch": 49.38285714285714, + "grad_norm": 33.361045837402344, + "learning_rate": 6.857142857142857e-07, + "loss": 0.0915, + "step": 8642 + }, + { + "epoch": 49.38857142857143, + "grad_norm": 63.9436149597168, + "learning_rate": 6.793650793650794e-07, + "loss": 0.1205, + "step": 8643 + }, + { + "epoch": 49.394285714285715, + "grad_norm": 28.686870574951172, + "learning_rate": 6.73015873015873e-07, + "loss": 0.1347, + "step": 8644 + }, + { + "epoch": 49.4, + "grad_norm": 24.923826217651367, + "learning_rate": 6.666666666666667e-07, + "loss": 0.1315, + "step": 8645 + }, + { + "epoch": 49.40571428571428, + "grad_norm": 27.837583541870117, + "learning_rate": 6.603174603174603e-07, + "loss": 0.1144, + "step": 8646 + }, + { + "epoch": 49.41142857142857, + "grad_norm": 282.70849609375, + "learning_rate": 6.53968253968254e-07, + "loss": 0.1473, + "step": 8647 + }, + { + "epoch": 49.417142857142856, + "grad_norm": 277.87164306640625, + "learning_rate": 6.476190476190477e-07, + "loss": 0.1597, + "step": 8648 + }, + { + "epoch": 49.42285714285714, + "grad_norm": 13.175591468811035, + "learning_rate": 6.412698412698413e-07, + "loss": 0.1367, + "step": 8649 + }, + { + "epoch": 49.42857142857143, + "grad_norm": 40.8079719543457, + "learning_rate": 6.34920634920635e-07, + "loss": 0.1257, + "step": 8650 + }, + { + "epoch": 49.434285714285714, + "grad_norm": 100.99056243896484, + "learning_rate": 6.285714285714287e-07, + "loss": 0.0908, + "step": 8651 + }, + { + "epoch": 49.44, + "grad_norm": 85.10597229003906, + "learning_rate": 6.222222222222223e-07, + "loss": 0.1284, + "step": 8652 + }, + { + "epoch": 49.44571428571429, + "grad_norm": 54.369171142578125, + "learning_rate": 6.158730158730158e-07, + "loss": 0.0878, + "step": 8653 + }, + { + "epoch": 49.45142857142857, + "grad_norm": 15.557859420776367, + "learning_rate": 6.095238095238095e-07, + "loss": 0.1084, + "step": 8654 + }, + { + "epoch": 49.457142857142856, + "grad_norm": 28.819398880004883, + "learning_rate": 6.031746031746032e-07, + "loss": 0.5745, + "step": 8655 + }, + { + "epoch": 49.462857142857146, + "grad_norm": 54.07514190673828, + "learning_rate": 5.968253968253968e-07, + "loss": 0.1251, + "step": 8656 + }, + { + "epoch": 49.46857142857143, + "grad_norm": 41.79790115356445, + "learning_rate": 5.904761904761905e-07, + "loss": 0.1454, + "step": 8657 + }, + { + "epoch": 49.47428571428571, + "grad_norm": 52.691593170166016, + "learning_rate": 5.841269841269842e-07, + "loss": 0.1064, + "step": 8658 + }, + { + "epoch": 49.48, + "grad_norm": 58.90571594238281, + "learning_rate": 5.777777777777778e-07, + "loss": 0.2098, + "step": 8659 + }, + { + "epoch": 49.48571428571429, + "grad_norm": 24.718616485595703, + "learning_rate": 5.714285714285715e-07, + "loss": 0.1146, + "step": 8660 + }, + { + "epoch": 49.49142857142857, + "grad_norm": 31.180971145629883, + "learning_rate": 5.650793650793651e-07, + "loss": 0.1749, + "step": 8661 + }, + { + "epoch": 49.497142857142855, + "grad_norm": 30.386829376220703, + "learning_rate": 5.587301587301587e-07, + "loss": 0.1383, + "step": 8662 + }, + { + "epoch": 49.502857142857145, + "grad_norm": 32.07866668701172, + "learning_rate": 5.523809523809524e-07, + "loss": 0.1654, + "step": 8663 + }, + { + "epoch": 49.50857142857143, + "grad_norm": 21.287935256958008, + "learning_rate": 5.460317460317461e-07, + "loss": 0.1027, + "step": 8664 + }, + { + "epoch": 49.51428571428571, + "grad_norm": 69.31727600097656, + "learning_rate": 5.396825396825398e-07, + "loss": 0.134, + "step": 8665 + }, + { + "epoch": 49.52, + "grad_norm": 26.733562469482422, + "learning_rate": 5.333333333333333e-07, + "loss": 0.1535, + "step": 8666 + }, + { + "epoch": 49.52571428571429, + "grad_norm": 40.09874725341797, + "learning_rate": 5.26984126984127e-07, + "loss": 0.116, + "step": 8667 + }, + { + "epoch": 49.53142857142857, + "grad_norm": 38.64852523803711, + "learning_rate": 5.206349206349207e-07, + "loss": 0.1157, + "step": 8668 + }, + { + "epoch": 49.537142857142854, + "grad_norm": 41.49262619018555, + "learning_rate": 5.142857142857143e-07, + "loss": 0.1353, + "step": 8669 + }, + { + "epoch": 49.542857142857144, + "grad_norm": 24.964202880859375, + "learning_rate": 5.079365079365079e-07, + "loss": 0.1327, + "step": 8670 + }, + { + "epoch": 49.54857142857143, + "grad_norm": 51.50368118286133, + "learning_rate": 5.015873015873016e-07, + "loss": 0.0924, + "step": 8671 + }, + { + "epoch": 49.55428571428571, + "grad_norm": 25.396739959716797, + "learning_rate": 4.952380952380952e-07, + "loss": 0.1331, + "step": 8672 + }, + { + "epoch": 49.56, + "grad_norm": 55.02810287475586, + "learning_rate": 4.888888888888889e-07, + "loss": 0.0904, + "step": 8673 + }, + { + "epoch": 49.565714285714286, + "grad_norm": 68.27679443359375, + "learning_rate": 4.825396825396826e-07, + "loss": 0.2253, + "step": 8674 + }, + { + "epoch": 49.57142857142857, + "grad_norm": 46.043724060058594, + "learning_rate": 4.761904761904763e-07, + "loss": 0.1871, + "step": 8675 + }, + { + "epoch": 49.57714285714286, + "grad_norm": 38.0540771484375, + "learning_rate": 4.6984126984126987e-07, + "loss": 0.1149, + "step": 8676 + }, + { + "epoch": 49.582857142857144, + "grad_norm": 21.746551513671875, + "learning_rate": 4.634920634920635e-07, + "loss": 0.1411, + "step": 8677 + }, + { + "epoch": 49.58857142857143, + "grad_norm": 21.1721134185791, + "learning_rate": 4.571428571428572e-07, + "loss": 0.0846, + "step": 8678 + }, + { + "epoch": 49.59428571428572, + "grad_norm": 30.42035675048828, + "learning_rate": 4.507936507936508e-07, + "loss": 0.1854, + "step": 8679 + }, + { + "epoch": 49.6, + "grad_norm": 17.304428100585938, + "learning_rate": 4.444444444444445e-07, + "loss": 0.1107, + "step": 8680 + }, + { + "epoch": 49.605714285714285, + "grad_norm": 97.10359954833984, + "learning_rate": 4.3809523809523813e-07, + "loss": 0.1457, + "step": 8681 + }, + { + "epoch": 49.61142857142857, + "grad_norm": 20.74465560913086, + "learning_rate": 4.317460317460317e-07, + "loss": 0.1375, + "step": 8682 + }, + { + "epoch": 49.61714285714286, + "grad_norm": 28.685523986816406, + "learning_rate": 4.253968253968254e-07, + "loss": 0.1129, + "step": 8683 + }, + { + "epoch": 49.62285714285714, + "grad_norm": 50.773048400878906, + "learning_rate": 4.190476190476191e-07, + "loss": 0.1449, + "step": 8684 + }, + { + "epoch": 49.628571428571426, + "grad_norm": 19.267837524414062, + "learning_rate": 4.1269841269841275e-07, + "loss": 0.1278, + "step": 8685 + }, + { + "epoch": 49.63428571428572, + "grad_norm": 17.098880767822266, + "learning_rate": 4.0634920634920634e-07, + "loss": 0.1052, + "step": 8686 + }, + { + "epoch": 49.64, + "grad_norm": 39.21644973754883, + "learning_rate": 4.0000000000000003e-07, + "loss": 0.0809, + "step": 8687 + }, + { + "epoch": 49.645714285714284, + "grad_norm": 159.01382446289062, + "learning_rate": 3.9365079365079373e-07, + "loss": 0.1159, + "step": 8688 + }, + { + "epoch": 49.651428571428575, + "grad_norm": 44.169063568115234, + "learning_rate": 3.873015873015873e-07, + "loss": 0.2899, + "step": 8689 + }, + { + "epoch": 49.65714285714286, + "grad_norm": 55.33827590942383, + "learning_rate": 3.8095238095238096e-07, + "loss": 0.0921, + "step": 8690 + }, + { + "epoch": 49.66285714285714, + "grad_norm": 16.90172004699707, + "learning_rate": 3.746031746031746e-07, + "loss": 0.1106, + "step": 8691 + }, + { + "epoch": 49.668571428571425, + "grad_norm": 42.39299011230469, + "learning_rate": 3.682539682539683e-07, + "loss": 0.1854, + "step": 8692 + }, + { + "epoch": 49.674285714285716, + "grad_norm": 42.97679901123047, + "learning_rate": 3.6190476190476194e-07, + "loss": 0.1753, + "step": 8693 + }, + { + "epoch": 49.68, + "grad_norm": 32.868167877197266, + "learning_rate": 3.555555555555556e-07, + "loss": 0.1082, + "step": 8694 + }, + { + "epoch": 49.68571428571428, + "grad_norm": 74.34040069580078, + "learning_rate": 3.492063492063492e-07, + "loss": 0.1063, + "step": 8695 + }, + { + "epoch": 49.691428571428574, + "grad_norm": 21.42570686340332, + "learning_rate": 3.4285714285714286e-07, + "loss": 0.1261, + "step": 8696 + }, + { + "epoch": 49.69714285714286, + "grad_norm": 35.38351821899414, + "learning_rate": 3.365079365079365e-07, + "loss": 0.1202, + "step": 8697 + }, + { + "epoch": 49.70285714285714, + "grad_norm": 32.82429122924805, + "learning_rate": 3.3015873015873015e-07, + "loss": 0.1131, + "step": 8698 + }, + { + "epoch": 49.70857142857143, + "grad_norm": 36.85771560668945, + "learning_rate": 3.2380952380952384e-07, + "loss": 0.1317, + "step": 8699 + }, + { + "epoch": 49.714285714285715, + "grad_norm": 20.852977752685547, + "learning_rate": 3.174603174603175e-07, + "loss": 0.1724, + "step": 8700 + }, + { + "epoch": 49.72, + "grad_norm": 27.297931671142578, + "learning_rate": 3.111111111111111e-07, + "loss": 0.1426, + "step": 8701 + }, + { + "epoch": 49.72571428571428, + "grad_norm": 80.59913635253906, + "learning_rate": 3.0476190476190477e-07, + "loss": 0.1696, + "step": 8702 + }, + { + "epoch": 49.73142857142857, + "grad_norm": 52.69660949707031, + "learning_rate": 2.984126984126984e-07, + "loss": 0.1506, + "step": 8703 + }, + { + "epoch": 49.73714285714286, + "grad_norm": 44.189369201660156, + "learning_rate": 2.920634920634921e-07, + "loss": 0.1293, + "step": 8704 + }, + { + "epoch": 49.74285714285714, + "grad_norm": 83.07208251953125, + "learning_rate": 2.8571428571428575e-07, + "loss": 0.143, + "step": 8705 + }, + { + "epoch": 49.74857142857143, + "grad_norm": 98.30414581298828, + "learning_rate": 2.7936507936507934e-07, + "loss": 0.1133, + "step": 8706 + }, + { + "epoch": 49.754285714285714, + "grad_norm": 39.12610626220703, + "learning_rate": 2.7301587301587303e-07, + "loss": 0.1472, + "step": 8707 + }, + { + "epoch": 49.76, + "grad_norm": 25.01207160949707, + "learning_rate": 2.6666666666666667e-07, + "loss": 0.1268, + "step": 8708 + }, + { + "epoch": 49.76571428571429, + "grad_norm": 29.20207405090332, + "learning_rate": 2.6031746031746037e-07, + "loss": 0.1149, + "step": 8709 + }, + { + "epoch": 49.77142857142857, + "grad_norm": 49.14268493652344, + "learning_rate": 2.5396825396825396e-07, + "loss": 0.1132, + "step": 8710 + }, + { + "epoch": 49.777142857142856, + "grad_norm": 239.8989715576172, + "learning_rate": 2.476190476190476e-07, + "loss": 0.1319, + "step": 8711 + }, + { + "epoch": 49.78285714285714, + "grad_norm": 737.55810546875, + "learning_rate": 2.412698412698413e-07, + "loss": 0.1384, + "step": 8712 + }, + { + "epoch": 49.78857142857143, + "grad_norm": 53.101776123046875, + "learning_rate": 2.3492063492063493e-07, + "loss": 0.1095, + "step": 8713 + }, + { + "epoch": 49.794285714285714, + "grad_norm": 54.95819854736328, + "learning_rate": 2.285714285714286e-07, + "loss": 0.103, + "step": 8714 + }, + { + "epoch": 49.8, + "grad_norm": 23.40339469909668, + "learning_rate": 2.2222222222222224e-07, + "loss": 0.1043, + "step": 8715 + }, + { + "epoch": 49.80571428571429, + "grad_norm": 57.28781509399414, + "learning_rate": 2.1587301587301586e-07, + "loss": 0.1065, + "step": 8716 + }, + { + "epoch": 49.81142857142857, + "grad_norm": 57.101924896240234, + "learning_rate": 2.0952380952380955e-07, + "loss": 0.1305, + "step": 8717 + }, + { + "epoch": 49.817142857142855, + "grad_norm": 59.88277053833008, + "learning_rate": 2.0317460317460317e-07, + "loss": 0.1376, + "step": 8718 + }, + { + "epoch": 49.822857142857146, + "grad_norm": 39.137847900390625, + "learning_rate": 1.9682539682539686e-07, + "loss": 0.1243, + "step": 8719 + }, + { + "epoch": 49.82857142857143, + "grad_norm": 18.661094665527344, + "learning_rate": 1.9047619047619048e-07, + "loss": 0.1702, + "step": 8720 + }, + { + "epoch": 49.83428571428571, + "grad_norm": 55.55431365966797, + "learning_rate": 1.8412698412698415e-07, + "loss": 0.1566, + "step": 8721 + }, + { + "epoch": 49.84, + "grad_norm": 55.64157485961914, + "learning_rate": 1.777777777777778e-07, + "loss": 0.1292, + "step": 8722 + }, + { + "epoch": 49.84571428571429, + "grad_norm": 19.37632942199707, + "learning_rate": 1.7142857142857143e-07, + "loss": 0.1204, + "step": 8723 + }, + { + "epoch": 49.85142857142857, + "grad_norm": 37.731903076171875, + "learning_rate": 1.6507936507936507e-07, + "loss": 0.1444, + "step": 8724 + }, + { + "epoch": 49.857142857142854, + "grad_norm": 88.25274658203125, + "learning_rate": 1.5873015873015874e-07, + "loss": 0.1353, + "step": 8725 + }, + { + "epoch": 49.862857142857145, + "grad_norm": 97.2085952758789, + "learning_rate": 1.5238095238095238e-07, + "loss": 0.1177, + "step": 8726 + }, + { + "epoch": 49.86857142857143, + "grad_norm": 34.84482192993164, + "learning_rate": 1.4603174603174605e-07, + "loss": 0.1166, + "step": 8727 + }, + { + "epoch": 49.87428571428571, + "grad_norm": 57.15216064453125, + "learning_rate": 1.3968253968253967e-07, + "loss": 0.1416, + "step": 8728 + }, + { + "epoch": 49.88, + "grad_norm": 37.720645904541016, + "learning_rate": 1.3333333333333334e-07, + "loss": 0.1528, + "step": 8729 + }, + { + "epoch": 49.885714285714286, + "grad_norm": 48.101558685302734, + "learning_rate": 1.2698412698412698e-07, + "loss": 0.1142, + "step": 8730 + }, + { + "epoch": 49.89142857142857, + "grad_norm": 41.24326705932617, + "learning_rate": 1.2063492063492065e-07, + "loss": 0.1573, + "step": 8731 + }, + { + "epoch": 49.89714285714286, + "grad_norm": 70.49794006347656, + "learning_rate": 1.142857142857143e-07, + "loss": 0.1123, + "step": 8732 + }, + { + "epoch": 49.902857142857144, + "grad_norm": 26.48020362854004, + "learning_rate": 1.0793650793650793e-07, + "loss": 0.1771, + "step": 8733 + }, + { + "epoch": 49.90857142857143, + "grad_norm": 30.033111572265625, + "learning_rate": 1.0158730158730159e-07, + "loss": 0.1004, + "step": 8734 + }, + { + "epoch": 49.91428571428571, + "grad_norm": 31.556259155273438, + "learning_rate": 9.523809523809524e-08, + "loss": 0.1414, + "step": 8735 + }, + { + "epoch": 49.92, + "grad_norm": 24.438173294067383, + "learning_rate": 8.88888888888889e-08, + "loss": 0.133, + "step": 8736 + }, + { + "epoch": 49.925714285714285, + "grad_norm": 24.674129486083984, + "learning_rate": 8.253968253968254e-08, + "loss": 0.1231, + "step": 8737 + }, + { + "epoch": 49.93142857142857, + "grad_norm": 59.45757293701172, + "learning_rate": 7.619047619047619e-08, + "loss": 0.1327, + "step": 8738 + }, + { + "epoch": 49.93714285714286, + "grad_norm": 26.112653732299805, + "learning_rate": 6.984126984126983e-08, + "loss": 0.0904, + "step": 8739 + }, + { + "epoch": 49.94285714285714, + "grad_norm": 41.43610382080078, + "learning_rate": 6.349206349206349e-08, + "loss": 0.1086, + "step": 8740 + }, + { + "epoch": 49.94857142857143, + "grad_norm": 48.097923278808594, + "learning_rate": 5.714285714285715e-08, + "loss": 0.1219, + "step": 8741 + }, + { + "epoch": 49.95428571428572, + "grad_norm": 34.53264236450195, + "learning_rate": 5.079365079365079e-08, + "loss": 0.1583, + "step": 8742 + }, + { + "epoch": 49.96, + "grad_norm": 33.10739517211914, + "learning_rate": 4.444444444444445e-08, + "loss": 0.1107, + "step": 8743 + }, + { + "epoch": 49.965714285714284, + "grad_norm": 14.772244453430176, + "learning_rate": 3.8095238095238096e-08, + "loss": 0.1375, + "step": 8744 + }, + { + "epoch": 49.97142857142857, + "grad_norm": 29.29127311706543, + "learning_rate": 3.1746031746031744e-08, + "loss": 0.1135, + "step": 8745 + }, + { + "epoch": 49.97714285714286, + "grad_norm": 25.30208396911621, + "learning_rate": 2.5396825396825396e-08, + "loss": 0.0992, + "step": 8746 + }, + { + "epoch": 49.98285714285714, + "grad_norm": 50.606361389160156, + "learning_rate": 1.9047619047619048e-08, + "loss": 0.1279, + "step": 8747 + }, + { + "epoch": 49.988571428571426, + "grad_norm": 40.236900329589844, + "learning_rate": 1.2698412698412698e-08, + "loss": 0.1602, + "step": 8748 + }, + { + "epoch": 49.994285714285716, + "grad_norm": 18.14468765258789, + "learning_rate": 6.349206349206349e-09, + "loss": 0.1338, + "step": 8749 + }, + { + "epoch": 50.0, + "grad_norm": 48.93581771850586, + "learning_rate": 0.0, + "loss": 0.1094, + "step": 8750 + }, + { + "epoch": 50.0, + "eval_classes": 0, + "eval_loss": 0.5793861150741577, + "eval_map": 0.9388, + "eval_map_50": 0.9721, + "eval_map_75": 0.9641, + "eval_map_large": 0.9388, + "eval_map_medium": -1.0, + "eval_map_per_class": 0.9388, + "eval_map_small": -1.0, + "eval_mar_1": 0.7959, + "eval_mar_10": 0.9733, + "eval_mar_100": 0.9749, + "eval_mar_100_per_class": 0.9749, + "eval_mar_large": 0.9749, + "eval_mar_medium": -1.0, + "eval_mar_small": -1.0, + "eval_runtime": 13.5545, + "eval_samples_per_second": 21.69, + "eval_steps_per_second": 2.73, + "step": 8750 + } + ], + "logging_steps": 1, + "max_steps": 8750, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 15, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 14 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.2356489609216e+19, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/best_checkpoint/training_args.bin b/best_checkpoint/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b94cd4028ea19975be06e91a5eb60032a049eff1 --- /dev/null +++ b/best_checkpoint/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3706f9f79f5744209c871ccf9fbee60fa5a8e284a17427199064284853941395 +size 5496 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5f75103d2a8582229fe2ced88551ad02cbb27e1c --- /dev/null +++ b/config.json @@ -0,0 +1,61 @@ +{ + "_name_or_path": "microsoft/conditional-detr-resnet-50", + "activation_dropout": 0.0, + "activation_function": "relu", + "architectures": [ + "ConditionalDetrForObjectDetection" + ], + "attention_dropout": 0.0, + "auxiliary_loss": false, + "backbone": "resnet50", + "backbone_config": null, + "backbone_kwargs": { + "in_chans": 3, + "out_indices": [ + 1, + 2, + 3, + 4 + ] + }, + "bbox_cost": 5, + "bbox_loss_coefficient": 5, + "class_cost": 2, + "cls_loss_coefficient": 2, + "d_model": 256, + "decoder_attention_heads": 8, + "decoder_ffn_dim": 2048, + "decoder_layerdrop": 0.0, + "decoder_layers": 6, + "dice_loss_coefficient": 1, + "dilation": false, + "dropout": 0.1, + "encoder_attention_heads": 8, + "encoder_ffn_dim": 2048, + "encoder_layerdrop": 0.0, + "encoder_layers": 6, + "focal_alpha": 0.25, + "giou_cost": 2, + "giou_loss_coefficient": 2, + "id2label": { + "0": "signature" + }, + "init_std": 0.02, + "init_xavier_std": 1.0, + "is_encoder_decoder": true, + "label2id": { + "signature": 0 + }, + "mask_loss_coefficient": 1, + "max_position_embeddings": 1024, + "model_type": "conditional_detr", + "num_channels": 3, + "num_hidden_layers": 6, + "num_queries": 300, + "position_embedding_type": "sine", + "scale_embedding": false, + "torch_dtype": "float32", + "transformers_version": "4.46.3", + "use_pretrained_backbone": true, + "use_timm_backbone": true +} diff --git a/eval/cpu/confusion_matrix.png b/eval/cpu/confusion_matrix.png new file mode 100644 index 0000000000000000000000000000000000000000..7f2f714632f724cef03630c5d2ca0fc496e25918 Binary files /dev/null and b/eval/cpu/confusion_matrix.png differ diff --git a/eval/cpu/inference_grid_0.png b/eval/cpu/inference_grid_0.png new file mode 100644 index 0000000000000000000000000000000000000000..8de1fbc123b2cf18fc4698918b915bf5fb76aed7 --- /dev/null +++ b/eval/cpu/inference_grid_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:531b5f201a53888a78883489ebe2b4abcedb73829aca2838a925d4c003917e33 +size 115618 diff --git a/eval/cpu/inference_grid_1.png b/eval/cpu/inference_grid_1.png new file mode 100644 index 0000000000000000000000000000000000000000..1f20309c4fbef8a3c873cc0e05e54f5ce8021cb8 --- /dev/null +++ b/eval/cpu/inference_grid_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a74c8f9e0be541121074e1146d26e64ff84b46b76fa673a2f23d5358babb65 +size 129712 diff --git a/eval/cpu/inference_grid_10.png b/eval/cpu/inference_grid_10.png new file mode 100644 index 0000000000000000000000000000000000000000..ffabf4f6d030bf38dad7bf336b4d774b9627c53a --- /dev/null +++ b/eval/cpu/inference_grid_10.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e12511e3892731bfe17542eacd66cd59fe55860d88b0f5eeb027eb14b50fd9 +size 111497 diff --git a/eval/cpu/inference_grid_11.png b/eval/cpu/inference_grid_11.png new file mode 100644 index 0000000000000000000000000000000000000000..15d67438388e1cdbe7baaea4cd1f032a0015a525 --- /dev/null +++ b/eval/cpu/inference_grid_11.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10023be3aacc95f06ba3ad7eea15f11762dde10ce82728f23d76cc6b83df34b0 +size 139997 diff --git a/eval/cpu/inference_grid_12.png b/eval/cpu/inference_grid_12.png new file mode 100644 index 0000000000000000000000000000000000000000..0cf10e1e259648a7720ed042a682b5a7e5c7fc6f Binary files /dev/null and b/eval/cpu/inference_grid_12.png differ diff --git a/eval/cpu/inference_grid_13.png b/eval/cpu/inference_grid_13.png new file mode 100644 index 0000000000000000000000000000000000000000..469b50b0c4c52d1755f1c2a6b18774976726e13a Binary files /dev/null and b/eval/cpu/inference_grid_13.png differ diff --git a/eval/cpu/inference_grid_14.png b/eval/cpu/inference_grid_14.png new file mode 100644 index 0000000000000000000000000000000000000000..a8d46a0c0aeba076c88d444a6e7fe96991d22fbd Binary files /dev/null and b/eval/cpu/inference_grid_14.png differ diff --git a/eval/cpu/inference_grid_15.png b/eval/cpu/inference_grid_15.png new file mode 100644 index 0000000000000000000000000000000000000000..ac37870e39bbe5f3bd26a39492dc5bd2a810f248 Binary files /dev/null and b/eval/cpu/inference_grid_15.png differ diff --git a/eval/cpu/inference_grid_16.png b/eval/cpu/inference_grid_16.png new file mode 100644 index 0000000000000000000000000000000000000000..bb054abfdafb2630f136bb89dcc90e70320db49a --- /dev/null +++ b/eval/cpu/inference_grid_16.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c16506561145145a7c3d67d326c8fbecaa9c3db93b052e544a100a4a6f77289 +size 143964 diff --git a/eval/cpu/inference_grid_17.png b/eval/cpu/inference_grid_17.png new file mode 100644 index 0000000000000000000000000000000000000000..d81f8f0e31bc2eab176a5a5f76adb609818ae00c Binary files /dev/null and b/eval/cpu/inference_grid_17.png differ diff --git a/eval/cpu/inference_grid_18.png b/eval/cpu/inference_grid_18.png new file mode 100644 index 0000000000000000000000000000000000000000..68cea80454f4cc93429fd76cbadba46b5621b70e Binary files /dev/null and b/eval/cpu/inference_grid_18.png differ diff --git a/eval/cpu/inference_grid_19.png b/eval/cpu/inference_grid_19.png new file mode 100644 index 0000000000000000000000000000000000000000..fbf0f7b127265413e7522923d7c5d9988796d0f1 --- /dev/null +++ b/eval/cpu/inference_grid_19.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db0a3330ad0555c48b7fa9e5653d96a6068cee7a61b803a85fe6baba0b887e3 +size 112716 diff --git a/eval/cpu/inference_grid_2.png b/eval/cpu/inference_grid_2.png new file mode 100644 index 0000000000000000000000000000000000000000..8e858e7deeec4e5293e8dbc44268f2c9e831ad77 --- /dev/null +++ b/eval/cpu/inference_grid_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c04041c23c4290b35614cd463e5f3d94fbd1f4130bf1beec2e87a7a136cf38 +size 100960 diff --git a/eval/cpu/inference_grid_20.png b/eval/cpu/inference_grid_20.png new file mode 100644 index 0000000000000000000000000000000000000000..977df1464d59ae55a0518f636216d7ea8f0cb92a --- /dev/null +++ b/eval/cpu/inference_grid_20.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9045dc42b8df8df426cd0544df2f6a287ad62fffa5d43fd47931419a57f6004e +size 157969 diff --git a/eval/cpu/inference_grid_21.png b/eval/cpu/inference_grid_21.png new file mode 100644 index 0000000000000000000000000000000000000000..f2fc6c16a0771d53c90f20fb4664ce7f372e2522 Binary files /dev/null and b/eval/cpu/inference_grid_21.png differ diff --git a/eval/cpu/inference_grid_22.png b/eval/cpu/inference_grid_22.png new file mode 100644 index 0000000000000000000000000000000000000000..728c960025c26d3be319a146fd9800d2290b82f9 --- /dev/null +++ b/eval/cpu/inference_grid_22.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21de7020d60fd497f31ba954ff35fcced26fb75cf2224ad10049b1046bee202f +size 116087 diff --git a/eval/cpu/inference_grid_23.png b/eval/cpu/inference_grid_23.png new file mode 100644 index 0000000000000000000000000000000000000000..7ac776f83b7f6d34d612561efe5613e6c86bfc79 --- /dev/null +++ b/eval/cpu/inference_grid_23.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b4b553806a31b7fe66a330087d3a2dc1fb23a5037f15e4c35c30cbb15acdba +size 145482 diff --git a/eval/cpu/inference_grid_24.png b/eval/cpu/inference_grid_24.png new file mode 100644 index 0000000000000000000000000000000000000000..804ded4c2ed36ba80ab7f69cc96860ca4677de1f Binary files /dev/null and b/eval/cpu/inference_grid_24.png differ diff --git a/eval/cpu/inference_grid_3.png b/eval/cpu/inference_grid_3.png new file mode 100644 index 0000000000000000000000000000000000000000..86bdee4fb7c6c90e01fb8ab4f1cd262ef3d9dd4e Binary files /dev/null and b/eval/cpu/inference_grid_3.png differ diff --git a/eval/cpu/inference_grid_4.png b/eval/cpu/inference_grid_4.png new file mode 100644 index 0000000000000000000000000000000000000000..c87774fa5cf4a40cd3eb881d746557bc768a3dd8 Binary files /dev/null and b/eval/cpu/inference_grid_4.png differ diff --git a/eval/cpu/inference_grid_5.png b/eval/cpu/inference_grid_5.png new file mode 100644 index 0000000000000000000000000000000000000000..71c10626e12b937cd8ad217dfc0e10c006fa7205 --- /dev/null +++ b/eval/cpu/inference_grid_5.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8263af9871e524c9745f2160e83e9eb8009349c8847c05b45f8a62f7e267b999 +size 110258 diff --git a/eval/cpu/inference_grid_6.png b/eval/cpu/inference_grid_6.png new file mode 100644 index 0000000000000000000000000000000000000000..e612fcedebee35ff0d51d24547d0247a0f6f4ba9 Binary files /dev/null and b/eval/cpu/inference_grid_6.png differ diff --git a/eval/cpu/inference_grid_7.png b/eval/cpu/inference_grid_7.png new file mode 100644 index 0000000000000000000000000000000000000000..7a8ec3067ed29ebe36d32e9164939167f1dfd1d5 Binary files /dev/null and b/eval/cpu/inference_grid_7.png differ diff --git a/eval/cpu/inference_grid_8.png b/eval/cpu/inference_grid_8.png new file mode 100644 index 0000000000000000000000000000000000000000..f682e85d9bc2a2a349fabb4d11c3b0fd5f6a6e78 --- /dev/null +++ b/eval/cpu/inference_grid_8.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b7bd53402eb2c8fdfae4bea951cb590cef2b16349f072024966417f83b55f7 +size 115151 diff --git a/eval/cpu/inference_grid_9.png b/eval/cpu/inference_grid_9.png new file mode 100644 index 0000000000000000000000000000000000000000..538550324be98fb6796080695c390d9aba6f8c2c --- /dev/null +++ b/eval/cpu/inference_grid_9.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1fc49bd0f4cc40091408d4c03a317ec602e89bc72d614abb17cc44971d99973 +size 145357 diff --git a/eval/gpu/confusion_matrix.png b/eval/gpu/confusion_matrix.png new file mode 100644 index 0000000000000000000000000000000000000000..7f2f714632f724cef03630c5d2ca0fc496e25918 Binary files /dev/null and b/eval/gpu/confusion_matrix.png differ diff --git a/eval/gpu/inference_grid_0.png b/eval/gpu/inference_grid_0.png new file mode 100644 index 0000000000000000000000000000000000000000..8de1fbc123b2cf18fc4698918b915bf5fb76aed7 --- /dev/null +++ b/eval/gpu/inference_grid_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:531b5f201a53888a78883489ebe2b4abcedb73829aca2838a925d4c003917e33 +size 115618 diff --git a/eval/gpu/inference_grid_1.png b/eval/gpu/inference_grid_1.png new file mode 100644 index 0000000000000000000000000000000000000000..1f20309c4fbef8a3c873cc0e05e54f5ce8021cb8 --- /dev/null +++ b/eval/gpu/inference_grid_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a74c8f9e0be541121074e1146d26e64ff84b46b76fa673a2f23d5358babb65 +size 129712 diff --git a/eval/gpu/inference_grid_10.png b/eval/gpu/inference_grid_10.png new file mode 100644 index 0000000000000000000000000000000000000000..ffabf4f6d030bf38dad7bf336b4d774b9627c53a --- /dev/null +++ b/eval/gpu/inference_grid_10.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e12511e3892731bfe17542eacd66cd59fe55860d88b0f5eeb027eb14b50fd9 +size 111497 diff --git a/eval/gpu/inference_grid_11.png b/eval/gpu/inference_grid_11.png new file mode 100644 index 0000000000000000000000000000000000000000..15d67438388e1cdbe7baaea4cd1f032a0015a525 --- /dev/null +++ b/eval/gpu/inference_grid_11.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10023be3aacc95f06ba3ad7eea15f11762dde10ce82728f23d76cc6b83df34b0 +size 139997 diff --git a/eval/gpu/inference_grid_12.png b/eval/gpu/inference_grid_12.png new file mode 100644 index 0000000000000000000000000000000000000000..0cf10e1e259648a7720ed042a682b5a7e5c7fc6f Binary files /dev/null and b/eval/gpu/inference_grid_12.png differ diff --git a/eval/gpu/inference_grid_13.png b/eval/gpu/inference_grid_13.png new file mode 100644 index 0000000000000000000000000000000000000000..469b50b0c4c52d1755f1c2a6b18774976726e13a Binary files /dev/null and b/eval/gpu/inference_grid_13.png differ diff --git a/eval/gpu/inference_grid_14.png b/eval/gpu/inference_grid_14.png new file mode 100644 index 0000000000000000000000000000000000000000..a8d46a0c0aeba076c88d444a6e7fe96991d22fbd Binary files /dev/null and b/eval/gpu/inference_grid_14.png differ diff --git a/eval/gpu/inference_grid_15.png b/eval/gpu/inference_grid_15.png new file mode 100644 index 0000000000000000000000000000000000000000..ac37870e39bbe5f3bd26a39492dc5bd2a810f248 Binary files /dev/null and b/eval/gpu/inference_grid_15.png differ diff --git a/eval/gpu/inference_grid_16.png b/eval/gpu/inference_grid_16.png new file mode 100644 index 0000000000000000000000000000000000000000..bb054abfdafb2630f136bb89dcc90e70320db49a --- /dev/null +++ b/eval/gpu/inference_grid_16.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c16506561145145a7c3d67d326c8fbecaa9c3db93b052e544a100a4a6f77289 +size 143964 diff --git a/eval/gpu/inference_grid_17.png b/eval/gpu/inference_grid_17.png new file mode 100644 index 0000000000000000000000000000000000000000..d81f8f0e31bc2eab176a5a5f76adb609818ae00c Binary files /dev/null and b/eval/gpu/inference_grid_17.png differ diff --git a/eval/gpu/inference_grid_18.png b/eval/gpu/inference_grid_18.png new file mode 100644 index 0000000000000000000000000000000000000000..68cea80454f4cc93429fd76cbadba46b5621b70e Binary files /dev/null and b/eval/gpu/inference_grid_18.png differ diff --git a/eval/gpu/inference_grid_19.png b/eval/gpu/inference_grid_19.png new file mode 100644 index 0000000000000000000000000000000000000000..fbf0f7b127265413e7522923d7c5d9988796d0f1 --- /dev/null +++ b/eval/gpu/inference_grid_19.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8db0a3330ad0555c48b7fa9e5653d96a6068cee7a61b803a85fe6baba0b887e3 +size 112716 diff --git a/eval/gpu/inference_grid_2.png b/eval/gpu/inference_grid_2.png new file mode 100644 index 0000000000000000000000000000000000000000..8e858e7deeec4e5293e8dbc44268f2c9e831ad77 --- /dev/null +++ b/eval/gpu/inference_grid_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c04041c23c4290b35614cd463e5f3d94fbd1f4130bf1beec2e87a7a136cf38 +size 100960 diff --git a/eval/gpu/inference_grid_20.png b/eval/gpu/inference_grid_20.png new file mode 100644 index 0000000000000000000000000000000000000000..977df1464d59ae55a0518f636216d7ea8f0cb92a --- /dev/null +++ b/eval/gpu/inference_grid_20.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9045dc42b8df8df426cd0544df2f6a287ad62fffa5d43fd47931419a57f6004e +size 157969 diff --git a/eval/gpu/inference_grid_21.png b/eval/gpu/inference_grid_21.png new file mode 100644 index 0000000000000000000000000000000000000000..f2fc6c16a0771d53c90f20fb4664ce7f372e2522 Binary files /dev/null and b/eval/gpu/inference_grid_21.png differ diff --git a/eval/gpu/inference_grid_22.png b/eval/gpu/inference_grid_22.png new file mode 100644 index 0000000000000000000000000000000000000000..728c960025c26d3be319a146fd9800d2290b82f9 --- /dev/null +++ b/eval/gpu/inference_grid_22.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21de7020d60fd497f31ba954ff35fcced26fb75cf2224ad10049b1046bee202f +size 116087 diff --git a/eval/gpu/inference_grid_23.png b/eval/gpu/inference_grid_23.png new file mode 100644 index 0000000000000000000000000000000000000000..7ac776f83b7f6d34d612561efe5613e6c86bfc79 --- /dev/null +++ b/eval/gpu/inference_grid_23.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b4b553806a31b7fe66a330087d3a2dc1fb23a5037f15e4c35c30cbb15acdba +size 145482 diff --git a/eval/gpu/inference_grid_24.png b/eval/gpu/inference_grid_24.png new file mode 100644 index 0000000000000000000000000000000000000000..804ded4c2ed36ba80ab7f69cc96860ca4677de1f Binary files /dev/null and b/eval/gpu/inference_grid_24.png differ diff --git a/eval/gpu/inference_grid_3.png b/eval/gpu/inference_grid_3.png new file mode 100644 index 0000000000000000000000000000000000000000..86bdee4fb7c6c90e01fb8ab4f1cd262ef3d9dd4e Binary files /dev/null and b/eval/gpu/inference_grid_3.png differ diff --git a/eval/gpu/inference_grid_4.png b/eval/gpu/inference_grid_4.png new file mode 100644 index 0000000000000000000000000000000000000000..c87774fa5cf4a40cd3eb881d746557bc768a3dd8 Binary files /dev/null and b/eval/gpu/inference_grid_4.png differ diff --git a/eval/gpu/inference_grid_5.png b/eval/gpu/inference_grid_5.png new file mode 100644 index 0000000000000000000000000000000000000000..71c10626e12b937cd8ad217dfc0e10c006fa7205 --- /dev/null +++ b/eval/gpu/inference_grid_5.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8263af9871e524c9745f2160e83e9eb8009349c8847c05b45f8a62f7e267b999 +size 110258 diff --git a/eval/gpu/inference_grid_6.png b/eval/gpu/inference_grid_6.png new file mode 100644 index 0000000000000000000000000000000000000000..e612fcedebee35ff0d51d24547d0247a0f6f4ba9 Binary files /dev/null and b/eval/gpu/inference_grid_6.png differ diff --git a/eval/gpu/inference_grid_7.png b/eval/gpu/inference_grid_7.png new file mode 100644 index 0000000000000000000000000000000000000000..7a8ec3067ed29ebe36d32e9164939167f1dfd1d5 Binary files /dev/null and b/eval/gpu/inference_grid_7.png differ diff --git a/eval/gpu/inference_grid_8.png b/eval/gpu/inference_grid_8.png new file mode 100644 index 0000000000000000000000000000000000000000..f682e85d9bc2a2a349fabb4d11c3b0fd5f6a6e78 --- /dev/null +++ b/eval/gpu/inference_grid_8.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b7bd53402eb2c8fdfae4bea951cb590cef2b16349f072024966417f83b55f7 +size 115151 diff --git a/eval/gpu/inference_grid_9.png b/eval/gpu/inference_grid_9.png new file mode 100644 index 0000000000000000000000000000000000000000..538550324be98fb6796080695c390d9aba6f8c2c --- /dev/null +++ b/eval/gpu/inference_grid_9.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1fc49bd0f4cc40091408d4c03a317ec602e89bc72d614abb17cc44971d99973 +size 145357 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a451517bf33441e9ca58d4a535f308e73a397cb --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d76af422bc714de4e13e585e25ed5cf64f2934c795f70450f642a3769c363bde +size 174075684 diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fa4bd2a1e0250a62cdd19c0597f2100eec51d15 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,26 @@ +{ + "do_convert_annotations": true, + "do_normalize": true, + "do_pad": true, + "do_rescale": true, + "do_resize": true, + "format": "coco_detection", + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "ConditionalDetrImageProcessor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "pad_size": null, + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 640, + "width": 640 + } +}