model_name: Llama-3.2-1B out_dir: /home/aiops/zhuty/litgpt/out/pretrain/llama3-1b-amv1-4epoch-2k precision: bf16-mixed initial_checkpoint_dir: checkpoints/meta-llama/Llama-3.2-1B resume: auto data: class_path: litgpt.data.TextFiles init_args: train_data_path: /home/aiops/zhuty/cont_data/bio/amv1/train val_data_path: /home/aiops/zhuty/cont_data/bio/amv1/test seed: 42 num_workers: 10 add_eos: true train: save_interval: 2500 save_interval_epochs: 1 save_optimizer_state: false log_interval: 1 global_batch_size: 256 micro_batch_size: 1 lr_warmup_fraction: 0.01 epochs: 4 max_seq_length: 2048 max_norm: 1.0 min_lr: 5.0e-06 eval: interval: 20 max_iters: 100 initial_validation: true final_validation: true evaluate_example: first num_generation_examples: 1 calculate_exact_match: false log: project: mathcont optimizer: class_path: torch.optim.AdamW init_args: lr: 5.0e-05 weight_decay: 0.1 betas: - 0.9 - 0.95 devices: auto num_nodes: 1 tokenizer_dir: checkpoints/meta-llama/Llama-3.2-1B logger_name: wandb seed: 42 compiler: torch executors: - sdpa - torchcompile - torch strategy: fsdp