model_name: Llama-3.2-3B out_dir: /home/aiops/zhuty/litgpt/out/pretrain/llama3-3B-bio1k-2 precision: bf16-mixed initial_checkpoint_dir: checkpoints/meta-llama/Llama-3.2-3B resume: auto data: class_path: litgpt.data.LitData init_args: data_path: /home/aiops/zhuty/cont_pretrain_processed/bio_1k split_names: - train - valid seed: 42 num_workers: 8 seq_length: 2048 train: save_interval: 2500 log_interval: 1 global_batch_size: 256 micro_batch_size: 1 lr_warmup_fraction: 0.01 max_tokens: 30000000 max_seq_length: 1024 max_norm: 1.0 min_lr: 5.0e-06 eval: interval: 10 max_iters: 100 initial_validation: true final_validation: true evaluate_example: first log: project: mathcont optimizer: class_path: torch.optim.AdamW init_args: lr: 5.0e-05 weight_decay: 0.1 betas: - 0.9 - 0.95 devices: auto num_nodes: 1 logger_name: wandb seed: 42 compiler: torch executors: - sdpa - torchcompile - torch strategy: fsdp