craffel HF Staff commited on
Commit
7c6481d
·
verified ·
1 Parent(s): e1c3187

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ test_checkpoint/.metadata filter=lfs diff=lfs merge=lfs -text
37
+ test_checkpoint/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
38
+ test_checkpoint/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
39
+ test_checkpoint/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
40
+ test_checkpoint/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
41
+ test_checkpoint/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
42
+ test_checkpoint/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
43
+ test_checkpoint/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
44
+ test_checkpoint/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
test_checkpoint/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aae0a131c12a13150d44772f27ec22bf439479d8b8ce6cd851631cb9495fd5fa
3
+ size 1148630
test_checkpoint/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:545a6cb4f806af2be76a3b286e1414e7fba73c3a6e225bcbde91d4533188b3cd
3
+ size 3500586192
test_checkpoint/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a0c5b2a8d111563d9a9fb7656a628db487b46287afbe9dc1fd7e75ed479634c
3
+ size 3500625132
test_checkpoint/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c76319820e3ba0255cfa88495247e05beb416a07803653bafc8fef70dd9b573a
3
+ size 3500625132
test_checkpoint/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b925eb0885203da3c32402e19dcba1f99f37e44d32f0398fe191753a5ae74a
3
+ size 3500625132
test_checkpoint/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c870055a60c221d7736277af5a566446892a9c2af29a5d5799cbf60b0c1adff6
3
+ size 3500625132
test_checkpoint/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940ca5aa4915fbbecef5dad00f35e73bb858cfdd5b4feb06afbce98ddb39c5a3
3
+ size 3500627408
test_checkpoint/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69d367c7ef4946b59a917b20a7c7be5c56192030b8e0d60397018beba50eb8f0
3
+ size 3500627408
test_checkpoint/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835db5eb72693f48b070c0c0c16b9cf45a9ce8da323b819b48c125427a22d9ae
3
+ size 3500635664
test_checkpoint/params.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name": "google-gemma-2-2b", "dump_dir": "/fsx/craffel/toksuite/lingua_logs/google-gemma-2-2b/", "seed": 777, "grad_acc_steps": 8, "gc_collect_freq": 1000, "probe_freq": null, "steps": 60000, "data": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.5, "cmn_Hani": 0.1, "tur_Latn": 0.1, "ita_Latn": 0.1, "fas_Arab": 0.1, "stack_edu": 0.1}, "batch_size": 4, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "huggingface", "path": "google/gemma-2-2b", "n_words": null}}, "optim": {"lr": 0.001, "weight_decay": 0.1, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 2000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": null, "n_heads": 16, "n_kv_heads": null, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 4096, "seed": 42, "vocab_size": 256000, "weight_tying": false, "sliding_window": null}, "distributed": {"dp_shard": 1, "dp_replicate": 8, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": false, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 6000, "keep": -1}, "eval": {"every": 2000, "keep": -1}, "path": "/fsx/craffel/toksuite/lingua_logs/google-gemma-2-2b/checkpoints", "init_ckpt_path": "/fsx/craffel/toksuite/init_checkpoints/google-gemma-2-2b/model_dcp", "load_init_optimizer_state": false, "save_init_ckpt": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 8, "eval": {"harness": {"tasks": ["hellaswag", "piqa", "arc_easy", "arc_challenge", "include_base_44_turkish", "include_base_44_italian", "include_base_44_chinese", "belebele_pes_Arab", "belebele_eng_Latn", "belebele_ita_Latn", "belebele_tur_Latn", "belebele_zho_Hans", "xnli_en", "xnli_tr", "humaneval"], "confirm_run_unsafe_code": true}, "generator": {"max_tokens": 8192, "dtype": "bf16"}}}
test_checkpoint/train_state_00000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 50000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 569, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.5, "cmn_Hani": 0.1, "tur_Latn": 0.1, "ita_Latn": 0.1, "fas_Arab": 0.1, "stack_edu": 0.1}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.00.jsonl", "position": 18011595311, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.00.jsonl", "position": 2977174099, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.00.jsonl", "position": 2420382621, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.00.jsonl", "position": 2607454771, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.00.jsonl", "position": 314418732, "block_size": 1, "offset": 0, "current_iter": 1}, "stack_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/stack_edu/stack_edu.chunk.00.jsonl", "position": 3707987433, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 120204084478957477010757924730884201130, "inc": 252101603063402394885084957393789173453}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "google/gemma-2-2b"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 640, "rng_state": {"bit_generator": "PCG64", "state": {"state": 178563617281479037847152049348078900392, "inc": 257317082376085721142933171929815648017}, "has_uint32": 0, "uinteger": 2064537375}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 50000, "verbose": false, "_step_count": 50001, "_get_lr_called_within_step": false, "_last_lr": [7.157234034479355e-05], "lr_lambdas": [{}]}}
test_checkpoint/train_state_00001.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 50000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 272, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.5, "cmn_Hani": 0.1, "tur_Latn": 0.1, "ita_Latn": 0.1, "fas_Arab": 0.1, "stack_edu": 0.1}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.01.jsonl", "position": 18011435196, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.01.jsonl", "position": 2982960015, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.01.jsonl", "position": 2437300342, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.01.jsonl", "position": 2603201448, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.01.jsonl", "position": 331901344, "block_size": 1, "offset": 0, "current_iter": 1}, "stack_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/stack_edu/stack_edu.chunk.01.jsonl", "position": 3676886775, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 75499279334076499819138845636886208810, "inc": 246509925186285949978196491240064802315}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "google/gemma-2-2b"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 640, "rng_state": {"bit_generator": "PCG64", "state": {"state": 129376025135317909054550778266736994970, "inc": 173555323965545256606922338259303677603}, "has_uint32": 1, "uinteger": 2164706399}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 50000, "verbose": false, "_step_count": 50001, "_get_lr_called_within_step": false, "_last_lr": [7.157234034479355e-05], "lr_lambdas": [{}]}}
test_checkpoint/train_state_00002.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 50000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 41, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.5, "cmn_Hani": 0.1, "tur_Latn": 0.1, "ita_Latn": 0.1, "fas_Arab": 0.1, "stack_edu": 0.1}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.02.jsonl", "position": 17983244852, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.02.jsonl", "position": 2965786821, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.02.jsonl", "position": 2415050253, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.02.jsonl", "position": 2601044898, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.02.jsonl", "position": 325320863, "block_size": 1, "offset": 0, "current_iter": 1}, "stack_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/stack_edu/stack_edu.chunk.02.jsonl", "position": 3726813843, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 172128873262853592450742622445841639642, "inc": 234358335530849485425064040311006256713}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "google/gemma-2-2b"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 640, "rng_state": {"bit_generator": "PCG64", "state": {"state": 324710038444534592061023193553191235544, "inc": 319170006889470250209362588441616495209}, "has_uint32": 0, "uinteger": 4005642628}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 50000, "verbose": false, "_step_count": 50001, "_get_lr_called_within_step": false, "_last_lr": [7.157234034479355e-05], "lr_lambdas": [{}]}}
test_checkpoint/train_state_00003.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 50000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 607, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.5, "cmn_Hani": 0.1, "tur_Latn": 0.1, "ita_Latn": 0.1, "fas_Arab": 0.1, "stack_edu": 0.1}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.03.jsonl", "position": 17937276742, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.03.jsonl", "position": 2971163869, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.03.jsonl", "position": 2428808960, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.03.jsonl", "position": 2579776057, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.03.jsonl", "position": 327825195, "block_size": 1, "offset": 0, "current_iter": 1}, "stack_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/stack_edu/stack_edu.chunk.03.jsonl", "position": 3730592567, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 126960094075626033294202693284865727759, "inc": 148211758571781046255077612135386035203}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "google/gemma-2-2b"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 640, "rng_state": {"bit_generator": "PCG64", "state": {"state": 145534257848372480678823981949894983833, "inc": 115810872492597857501795428972873905393}, "has_uint32": 1, "uinteger": 1761662862}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 50000, "verbose": false, "_step_count": 50001, "_get_lr_called_within_step": false, "_last_lr": [7.157234034479355e-05], "lr_lambdas": [{}]}}
test_checkpoint/train_state_00004.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 50000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 607, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.5, "cmn_Hani": 0.1, "tur_Latn": 0.1, "ita_Latn": 0.1, "fas_Arab": 0.1, "stack_edu": 0.1}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.04.jsonl", "position": 17971035720, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.04.jsonl", "position": 2966982964, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.04.jsonl", "position": 2433092294, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.04.jsonl", "position": 2589957578, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.04.jsonl", "position": 318966382, "block_size": 1, "offset": 0, "current_iter": 1}, "stack_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/stack_edu/stack_edu.chunk.04.jsonl", "position": 3738245597, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 87803569877252413075971318869651280696, "inc": 186633262021180533256729114674950595327}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "google/gemma-2-2b"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 640, "rng_state": {"bit_generator": "PCG64", "state": {"state": 179726827556496152591432356028790539825, "inc": 303111205818808944921858206842105131807}, "has_uint32": 1, "uinteger": 1411968337}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 50000, "verbose": false, "_step_count": 50001, "_get_lr_called_within_step": false, "_last_lr": [7.157234034479355e-05], "lr_lambdas": [{}]}}
test_checkpoint/train_state_00005.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 50000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 2339, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.5, "cmn_Hani": 0.1, "tur_Latn": 0.1, "ita_Latn": 0.1, "fas_Arab": 0.1, "stack_edu": 0.1}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.05.jsonl", "position": 18019505841, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.05.jsonl", "position": 2980567581, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.05.jsonl", "position": 2414493560, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.05.jsonl", "position": 2603168075, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.05.jsonl", "position": 334438578, "block_size": 1, "offset": 0, "current_iter": 1}, "stack_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/stack_edu/stack_edu.chunk.05.jsonl", "position": 3695579665, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 19446519833519699608481222145159877135, "inc": 329233669073478483697346584247981015037}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "google/gemma-2-2b"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 640, "rng_state": {"bit_generator": "PCG64", "state": {"state": 198437843517952865227907303713763696079, "inc": 47382953940698287647753879262736142901}, "has_uint32": 0, "uinteger": 3984885821}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 50000, "verbose": false, "_step_count": 50001, "_get_lr_called_within_step": false, "_last_lr": [7.157234034479355e-05], "lr_lambdas": [{}]}}
test_checkpoint/train_state_00006.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 50000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 13201, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.5, "cmn_Hani": 0.1, "tur_Latn": 0.1, "ita_Latn": 0.1, "fas_Arab": 0.1, "stack_edu": 0.1}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.06.jsonl", "position": 17933892688, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.06.jsonl", "position": 2970389289, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.06.jsonl", "position": 2404415830, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.06.jsonl", "position": 2595071920, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.06.jsonl", "position": 298144886, "block_size": 1, "offset": 0, "current_iter": 1}, "stack_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/stack_edu/stack_edu.chunk.06.jsonl", "position": 3766072839, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 240625023021210377371866929630832792167, "inc": 95963489890761403814531195999220475639}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "google/gemma-2-2b"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 640, "rng_state": {"bit_generator": "PCG64", "state": {"state": 237644487912628111688754850269988282987, "inc": 72545526324180839152750112646078969085}, "has_uint32": 0, "uinteger": 1792414338}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 50000, "verbose": false, "_step_count": 50001, "_get_lr_called_within_step": false, "_last_lr": [7.157234034479355e-05], "lr_lambdas": [{}]}}
test_checkpoint/train_state_00007.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 50000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 86, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/tokenizer_training/", "sources": {"fw_edu": 0.5, "cmn_Hani": 0.1, "tur_Latn": 0.1, "ita_Latn": 0.1, "fas_Arab": 0.1, "stack_edu": 0.1}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fw_edu/fineweb_edu_100bt.chunk.07.jsonl", "position": 17959158555, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.07.jsonl", "position": 2969202618, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/tur_Latn/fineweb_2_hq.tur_Latn.chunk.07.jsonl", "position": 2428179980, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/ita_Latn/fineweb_2_hq.ita_Latn.chunk.07.jsonl", "position": 2606289472, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/fas_Arab/fineweb_2_hq.fas_Arab.chunk.07.jsonl", "position": 333712242, "block_size": 1, "offset": 0, "current_iter": 1}, "stack_edu": {"file_path": "/scratch/craffel/lingua/data/tokenizer_training/stack_edu/stack_edu.chunk.07.jsonl", "position": 3721281758, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 84332591710760149560402094804650815431, "inc": 53245743019587277358203950863334653629}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "huggingface", "path": "google/gemma-2-2b"}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 640, "rng_state": {"bit_generator": "PCG64", "state": {"state": 278593708416828065817994571306888703266, "inc": 19761753544780285878460645500694854795}, "has_uint32": 1, "uinteger": 2783498895}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 50000, "verbose": false, "_step_count": 50001, "_get_lr_called_within_step": false, "_last_lr": [7.157234034479355e-05], "lr_lambdas": [{}]}}