willtensora commited on
Commit
3bb3cae
·
verified ·
1 Parent(s): ddb1636

Training in progress, step 40

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
00000000-0000-0000-0000-000000000000.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: peft-internal-testing/tiny-dummy-qwen2
2
+ batch_size: 8
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - format: custom
7
+ path: argilla/databricks-dolly-15k-curated-en
8
+ type:
9
+ field_input: original-instruction
10
+ field_instruction: original-instruction
11
+ field_output: original-response
12
+ format: '{instruction} {input}'
13
+ no_input_format: '{instruction}'
14
+ system_format: '{system}'
15
+ system_prompt: ''
16
+ eval_steps: 20
17
+ flash_attention: true
18
+ gpu_memory_limit: 80GiB
19
+ gradient_checkpointing: true
20
+ group_by_length: true
21
+ hub_model_id: willtensora/test-repo
22
+ hub_strategy: checkpoint
23
+ learning_rate: 0.002
24
+ load_best_model_at_end: true
25
+ logging_steps: 10
26
+ lr_scheduler: cosine
27
+ max_steps: 1
28
+ micro_batch_size: 1
29
+ model_type: AutoModelForCausalLM
30
+ num_epochs: 100
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 8
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.001
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: peft-internal-testing/tiny-dummy-qwen2-argilla/databricks-dolly-15k-curated-en
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
03a659ff-e350-4bb9-8ff3-8c658a5d0dff.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: fxmarty/tiny-llama-fast-tokenizer
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - fc6136aac03f618a_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/fc6136aac03f618a_train_data.json
11
+ type:
12
+ field_instruction: text
13
+ field_output: title
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: </s>
41
+ tokenizer_type: LlamaTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: fxmarty/tiny-llama-fast-tokenizer-/workspace/input_data/fc6136aac03f618a_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
077fd330-87f9-4bc4-b449-7713fbdaf1b0.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/mistral-7b-v0.3
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - ca0152973425c947_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/ca0152973425c947_train_data.json
11
+ type:
12
+ field_input: code
13
+ field_instruction: func_name
14
+ field_output: docstring
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/5a2f5ce6-446b-4282-bb4d-9ee4e970231f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/mistral-7b-v0.3-/tmp/ca0152973425c947_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
21315ae5-16ee-43cd-9612-743524060933.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Meta-Llama-3.1-8B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 562fa3aeea07046a_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/562fa3aeea07046a_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: text
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/c4596edc-efad-4776-86a1-caa06bffcada
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: PreTrainedTokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Meta-Llama-3.1-8B-/workspace/input_data/562fa3aeea07046a_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
2eaa630f-7785-4ca3-b46f-be41dcf74f78.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: katuni4ka/tiny-random-qwen1.5-moe
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 95544452e61c7393_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/95544452e61c7393_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/e61e89f0-854a-4922-8d25-dae435e91af0
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: Qwen2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: katuni4ka/tiny-random-qwen1.5-moe-/workspace/input_data/95544452e61c7393_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
40f27435-f59d-488f-b2d6-01e356d79c48.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - df925134bb2c32b8_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/df925134bb2c32b8_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: amoral
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/ba640bbe-3257-40d8-88fe-26152f412bb7
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: Qwen/Qwen2-1.5B-Instruct-/tmp/df925134bb2c32b8_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
427d02be-6008-4556-9a5e-9c7cb7503058.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Phi-3.5-mini-instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 7e5b54272524b996_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/7e5b54272524b996_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/ae26a9e9-089e-4d4a-b592-d8935df7c18d
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/Phi-3.5-mini-instruct-/workspace/input_data/7e5b54272524b996_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
54c39bbc-809b-4c67-a254-0e03a4884b4e.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/codegemma-7b-it
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 2ebe89763cb3150d_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/2ebe89763cb3150d_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/a0bc38f8-fcd3-4d7e-9a3f-3aa2e8a4204f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GemmaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/codegemma-7b-it-/tmp/2ebe89763cb3150d_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
5ff7bf5f-96dc-43dd-aeeb-560c0ab78db8.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: NousResearch/Hermes-3-Llama-3.1-8B
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 69447058613b41d8_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/69447058613b41d8_train_data.json
11
+ type:
12
+ field_input: sectionParentTitre
13
+ field_instruction: title_main
14
+ field_output: texte
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/942aa5fc-b540-46ce-b482-e38c4f637264
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: PreTrainedTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: NousResearch/Hermes-3-Llama-3.1-8B-/workspace/input_data/69447058613b41d8_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
63345f8a-4ec9-47f0-9956-6eaa52b2c2a6.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: heegyu/WizardVicuna-open-llama-3b-v2
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - fe9267419ea75ad2_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/fe9267419ea75ad2_train_data.json
11
+ type:
12
+ field_instruction: ca_topic
13
+ field_output: article
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/7114c34f-852f-43da-b985-b7f0b6d6d724
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ special_tokens:
40
+ pad_token: </s>
41
+ tokenizer_type: LlamaTokenizerFast
42
+ train_on_inputs: false
43
+ trust_remote_code: true
44
+ val_set_size: 0.1
45
+ wandb_entity: ''
46
+ wandb_mode: online
47
+ wandb_name: heegyu/WizardVicuna-open-llama-3b-v2-/tmp/fe9267419ea75ad2_train_data.json
48
+ wandb_project: Gradients-On-Demand
49
+ wandb_run: your_name
50
+ wandb_runid: default
51
+ warmup_ratio: 0.05
52
+ xformers_attention: true
6c7ae056-3b4d-460b-ba7b-a4000f32b3f1.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/gemma-2-2b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - b98d5b59c20c6595_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/b98d5b59c20c6595_train_data.json
11
+ type:
12
+ field_input: metadata
13
+ field_instruction: text
14
+ field_output: tags_str
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/429ee307-6dd2-4dd7-9e1d-7384d807a3df
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GemmaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/gemma-2-2b-/tmp/b98d5b59c20c6595_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
75b21ca4-feab-4bdd-92b0-ea6d90dfa18f.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Qwen/Qwen2.5-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - c6adcdcb593a3ee4_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/c6adcdcb593a3ee4_train_data.json
11
+ type:
12
+ field_input: abstract
13
+ field_instruction: question_en_origin
14
+ field_output: answer_en_origin
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/2faf844e-4a0a-4d23-95f4-a055e4864133
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: Qwen2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: Qwen/Qwen2.5-1.5B-Instruct-/workspace/input_data/c6adcdcb593a3ee4_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
879db250-c3f5-4d43-a7c5-c5a456ae5803.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Qwen2.5-Coder-1.5B-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 4d85b564dafa38db_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/4d85b564dafa38db_train_data.json
11
+ type:
12
+ field_instruction: prompt
13
+ field_output: response
14
+ format: '{instruction}'
15
+ no_input_format: '{instruction}'
16
+ system_format: '{system}'
17
+ system_prompt: ''
18
+ eval_steps: 20
19
+ flash_attention: true
20
+ gpu_memory_limit: 80GiB
21
+ gradient_checkpointing: true
22
+ group_by_length: true
23
+ hub_model_id: willtensora/876ff803-5357-4240-8766-c54166515403
24
+ hub_strategy: checkpoint
25
+ learning_rate: 0.0002
26
+ logging_steps: 10
27
+ lr_scheduler: cosine
28
+ max_steps: 2500
29
+ micro_batch_size: 4
30
+ model_type: AutoModelForCausalLM
31
+ optimizer: adamw_bnb_8bit
32
+ output_dir: /workspace/axolotl/configs
33
+ pad_to_sequence_len: true
34
+ resize_token_embeddings_to_32x: false
35
+ sample_packing: false
36
+ save_steps: 40
37
+ save_total_limit: 1
38
+ sequence_len: 2048
39
+ tokenizer_type: Qwen2TokenizerFast
40
+ train_on_inputs: false
41
+ trust_remote_code: true
42
+ val_set_size: 0.1
43
+ wandb_entity: ''
44
+ wandb_mode: online
45
+ wandb_name: unsloth/Qwen2.5-Coder-1.5B-Instruct-/workspace/input_data/4d85b564dafa38db_train_data.json
46
+ wandb_project: Gradients-On-Demand
47
+ wandb_run: your_name
48
+ wandb_runid: default
49
+ warmup_ratio: 0.05
50
+ xformers_attention: true
8910478d-79cf-499e-8fed-7a2142f7ee60.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/Phi-3-medium-4k-instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f6199f34ade98809_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f6199f34ade98809_train_data.json
11
+ type:
12
+ field_input: choices
13
+ field_instruction: question
14
+ field_output: answer
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/2d37ba50-cd70-4895-be62-3477f5193e86
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: LlamaTokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/Phi-3-medium-4k-instruct-/tmp/f6199f34ade98809_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
README.md ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ base_model: fxmarty/tiny-llama-fast-tokenizer
4
+ tags:
5
+ - axolotl
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
16
+ <details><summary>See axolotl config</summary>
17
+
18
+ axolotl version: `0.4.1`
19
+ ```yaml
20
+ base_model: fxmarty/tiny-llama-fast-tokenizer
21
+ batch_size: 32
22
+ bf16: true
23
+ chat_template: tokenizer_default_fallback_alpaca
24
+ datasets:
25
+ - data_files:
26
+ - fc6136aac03f618a_train_data.json
27
+ ds_type: json
28
+ format: custom
29
+ path: /workspace/input_data/fc6136aac03f618a_train_data.json
30
+ type:
31
+ field_instruction: text
32
+ field_output: title
33
+ format: '{instruction}'
34
+ no_input_format: '{instruction}'
35
+ system_format: '{system}'
36
+ system_prompt: ''
37
+ eval_steps: 20
38
+ flash_attention: true
39
+ gpu_memory_limit: 80GiB
40
+ gradient_checkpointing: true
41
+ group_by_length: true
42
+ hub_model_id: willtensora/b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
43
+ hub_strategy: checkpoint
44
+ learning_rate: 0.0002
45
+ logging_steps: 10
46
+ lr_scheduler: cosine
47
+ max_steps: 2500
48
+ micro_batch_size: 4
49
+ model_type: AutoModelForCausalLM
50
+ optimizer: adamw_bnb_8bit
51
+ output_dir: /workspace/axolotl/configs
52
+ pad_to_sequence_len: true
53
+ resize_token_embeddings_to_32x: false
54
+ sample_packing: false
55
+ save_steps: 40
56
+ save_total_limit: 1
57
+ sequence_len: 2048
58
+ special_tokens:
59
+ pad_token: </s>
60
+ tokenizer_type: LlamaTokenizerFast
61
+ train_on_inputs: false
62
+ trust_remote_code: true
63
+ val_set_size: 0.1
64
+ wandb_entity: ''
65
+ wandb_mode: online
66
+ wandb_name: fxmarty/tiny-llama-fast-tokenizer-/workspace/input_data/fc6136aac03f618a_train_data.json
67
+ wandb_project: Gradients-On-Demand
68
+ wandb_run: your_name
69
+ wandb_runid: default
70
+ warmup_ratio: 0.05
71
+ xformers_attention: true
72
+
73
+ ```
74
+
75
+ </details><br>
76
+
77
+ # b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
78
+
79
+ This model is a fine-tuned version of [fxmarty/tiny-llama-fast-tokenizer](https://huggingface.co/fxmarty/tiny-llama-fast-tokenizer) on the None dataset.
80
+
81
+ ## Model description
82
+
83
+ More information needed
84
+
85
+ ## Intended uses & limitations
86
+
87
+ More information needed
88
+
89
+ ## Training and evaluation data
90
+
91
+ More information needed
92
+
93
+ ## Training procedure
94
+
95
+ ### Training hyperparameters
96
+
97
+ The following hyperparameters were used during training:
98
+ - learning_rate: 0.0002
99
+ - train_batch_size: 4
100
+ - eval_batch_size: 4
101
+ - seed: 42
102
+ - distributed_type: multi-GPU
103
+ - num_devices: 8
104
+ - total_train_batch_size: 32
105
+ - total_eval_batch_size: 32
106
+ - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
107
+ - lr_scheduler_type: cosine
108
+ - training_steps: 18
109
+
110
+ ### Training results
111
+
112
+ | Training Loss | Epoch | Step | Validation Loss |
113
+ |:-------------:|:------:|:----:|:---------------:|
114
+ | No log | 0.0071 | 1 | 10.3739 |
115
+
116
+
117
+ ### Framework versions
118
+
119
+ - Transformers 4.46.0
120
+ - Pytorch 2.5.0+cu124
121
+ - Datasets 3.0.1
122
+ - Tokenizers 0.20.1
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "katuni4ka/tiny-random-qwen1.5-moe",
3
+ "architectures": [
4
+ "Qwen2MoeForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "decoder_sparse_step": 1,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 32,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 22,
13
+ "max_position_embeddings": 2048,
14
+ "max_window_layers": 2,
15
+ "mlp_only_layers": [],
16
+ "model_type": "qwen2_moe",
17
+ "moe_intermediate_size": 44,
18
+ "norm_topk_prob": false,
19
+ "num_attention_heads": 4,
20
+ "num_experts": 8,
21
+ "num_experts_per_tok": 4,
22
+ "num_hidden_layers": 4,
23
+ "num_key_value_heads": 2,
24
+ "output_router_logits": false,
25
+ "rms_norm_eps": 1e-06,
26
+ "rope_scaling": null,
27
+ "rope_theta": 1000000.0,
28
+ "router_aux_loss_coef": 0.001,
29
+ "shared_expert_intermediate_size": 22,
30
+ "sliding_window": null,
31
+ "tie_word_embeddings": false,
32
+ "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.46.0",
34
+ "use_cache": false,
35
+ "use_sliding_window": false,
36
+ "vocab_size": 151936
37
+ }
da9e44b3-e4fb-4905-9c7c-6b03aad6b593.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/SmolLM2-360M-Instruct
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - f1ccd02a885008e6_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/f1ccd02a885008e6_train_data.json
11
+ type:
12
+ field_input: target
13
+ field_instruction: user
14
+ field_output: assistant
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/3da0a03a-adbb-42e3-8fd7-bd7c0b1d3e9f
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GPT2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/SmolLM2-360M-Instruct-/tmp/f1ccd02a885008e6_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
ee62f35d-1a99-4f1c-a69c-c91bc444b71f.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: EleutherAI/pythia-1b
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - b2a4966d9a5c880e_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/b2a4966d9a5c880e_train_data.json
11
+ type:
12
+ field_input: input
13
+ field_instruction: instruction
14
+ field_output: output
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/ee937811-31d0-4e11-944a-f4f8e06309d2
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ special_tokens:
41
+ pad_token: <|endoftext|>
42
+ tokenizer_type: GPTNeoXTokenizerFast
43
+ train_on_inputs: false
44
+ trust_remote_code: true
45
+ val_set_size: 0.1
46
+ wandb_entity: ''
47
+ wandb_mode: online
48
+ wandb_name: EleutherAI/pythia-1b-/workspace/input_data/b2a4966d9a5c880e_train_data.json
49
+ wandb_project: Gradients-On-Demand
50
+ wandb_run: your_name
51
+ wandb_runid: default
52
+ warmup_ratio: 0.05
53
+ xformers_attention: true
ef61f40b-eca8-4670-964b-fdd3d1d0f066.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: unsloth/SmolLM-135M
2
+ batch_size: 32
3
+ bf16: true
4
+ chat_template: tokenizer_default_fallback_alpaca
5
+ datasets:
6
+ - data_files:
7
+ - 658988857b0a29c9_train_data.json
8
+ ds_type: json
9
+ format: custom
10
+ path: /workspace/input_data/658988857b0a29c9_train_data.json
11
+ type:
12
+ field_input: choices
13
+ field_instruction: subject
14
+ field_output: question
15
+ format: '{instruction} {input}'
16
+ no_input_format: '{instruction}'
17
+ system_format: '{system}'
18
+ system_prompt: ''
19
+ eval_steps: 20
20
+ flash_attention: true
21
+ gpu_memory_limit: 80GiB
22
+ gradient_checkpointing: true
23
+ group_by_length: true
24
+ hub_model_id: willtensora/09370687-f28e-45e5-91f6-f87011850a94
25
+ hub_strategy: checkpoint
26
+ learning_rate: 0.0002
27
+ logging_steps: 10
28
+ lr_scheduler: cosine
29
+ max_steps: 2500
30
+ micro_batch_size: 4
31
+ model_type: AutoModelForCausalLM
32
+ optimizer: adamw_bnb_8bit
33
+ output_dir: /workspace/axolotl/configs
34
+ pad_to_sequence_len: true
35
+ resize_token_embeddings_to_32x: false
36
+ sample_packing: false
37
+ save_steps: 40
38
+ save_total_limit: 1
39
+ sequence_len: 2048
40
+ tokenizer_type: GPT2TokenizerFast
41
+ train_on_inputs: false
42
+ trust_remote_code: true
43
+ val_set_size: 0.1
44
+ wandb_entity: ''
45
+ wandb_mode: online
46
+ wandb_name: unsloth/SmolLM-135M-/workspace/input_data/658988857b0a29c9_train_data.json
47
+ wandb_project: Gradients-On-Demand
48
+ wandb_run: your_name
49
+ wandb_runid: default
50
+ warmup_ratio: 0.05
51
+ xformers_attention: true
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "do_sample": true,
5
+ "eos_token_id": 1,
6
+ "pad_token_id": 1,
7
+ "transformers_version": "4.46.0"
8
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14e6134b1d643c9c7f346b655687dc075b0fd0c7aa1ee4ce46f59192017c3ee2
3
+ size 19782560
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ecbabedee28483af8dce99f4dd8fe36ef9c6c66877e669db930fe3569128330
3
+ size 2071661
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|endoftext|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcfe42da0a4497e8b2b172c1f9f4ec423a46dc12907f4349c55025f670422ba9
3
+ size 11418266
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|endoftext|>",
37
+ "errors": "replace",
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "split_special_tokens": false,
41
+ "tokenizer_class": "Qwen2Tokenizer",
42
+ "unk_token": null,
43
+ "use_fast": true
44
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75877ad3818f65eedd53378ad3c43f71fa3e0c734e1e9a0f2b80517730b5a861
3
+ size 6648
vocab.json ADDED
The diff for this file is too large to render. See raw diff