sravanthib commited on
Commit
33dbb1e
·
verified ·
1 Parent(s): 37b2eee

Training completed

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. all_results.json +5 -5
  3. train_results.json +5 -5
  4. trainer_state.json +12 -12
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- license: apache-2.0
3
- base_model: Qwen/Qwen2-1.5B-Instruct
4
  tags:
5
  - generated_from_trainer
6
  library_name: peft
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # model
16
 
17
- This model is a fine-tuned version of [Qwen/Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct) on an unknown dataset.
18
 
19
  ## Model description
20
 
 
1
  ---
2
+ license: llama3.2
3
+ base_model: meta-llama/Llama-3.2-3B-Instruct
4
  tags:
5
  - generated_from_trainer
6
  library_name: peft
 
14
 
15
  # model
16
 
17
+ This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on an unknown dataset.
18
 
19
  ## Model description
20
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.0547945205479452,
3
- "total_flos": 7.783158486191309e+16,
4
- "train_loss": 2.2262256761391956,
5
- "train_runtime": 395.2928,
6
- "train_samples_per_second": 12.143,
7
- "train_steps_per_second": 0.076
8
  }
 
1
  {
2
  "epoch": 0.0547945205479452,
3
+ "total_flos": 1.6697353660111258e+17,
4
+ "train_loss": 1.2756919225056966,
5
+ "train_runtime": 450.633,
6
+ "train_samples_per_second": 10.652,
7
+ "train_steps_per_second": 0.067
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.0547945205479452,
3
- "total_flos": 7.783158486191309e+16,
4
- "train_loss": 2.2262256761391956,
5
- "train_runtime": 395.2928,
6
- "train_samples_per_second": 12.143,
7
- "train_steps_per_second": 0.076
8
  }
 
1
  {
2
  "epoch": 0.0547945205479452,
3
+ "total_flos": 1.6697353660111258e+17,
4
+ "train_loss": 1.2756919225056966,
5
+ "train_runtime": 450.633,
6
+ "train_samples_per_second": 10.652,
7
+ "train_steps_per_second": 0.067
8
  }
trainer_state.json CHANGED
@@ -10,33 +10,33 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0182648401826484,
13
- "grad_norm": 6.306942939758301,
14
  "learning_rate": 0.0001,
15
- "loss": 6.0878,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.0365296803652968,
20
- "grad_norm": 2.749385118484497,
21
  "learning_rate": 0.0001,
22
- "loss": 0.5211,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.0547945205479452,
27
- "grad_norm": 0.11284179240465164,
28
  "learning_rate": 0.0001,
29
- "loss": 0.0697,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.0547945205479452,
34
  "step": 30,
35
- "total_flos": 7.783158486191309e+16,
36
- "train_loss": 2.2262256761391956,
37
- "train_runtime": 395.2928,
38
- "train_samples_per_second": 12.143,
39
- "train_steps_per_second": 0.076
40
  }
41
  ],
42
  "logging_steps": 10,
@@ -56,7 +56,7 @@
56
  "attributes": {}
57
  }
58
  },
59
- "total_flos": 7.783158486191309e+16,
60
  "train_batch_size": 2,
61
  "trial_name": null,
62
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0182648401826484,
13
+ "grad_norm": 0.45940226316452026,
14
  "learning_rate": 0.0001,
15
+ "loss": 3.7646,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.0365296803652968,
20
+ "grad_norm": 0.056412823498249054,
21
  "learning_rate": 0.0001,
22
+ "loss": 0.0326,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.0547945205479452,
27
+ "grad_norm": 0.055685561150312424,
28
  "learning_rate": 0.0001,
29
+ "loss": 0.0299,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.0547945205479452,
34
  "step": 30,
35
+ "total_flos": 1.6697353660111258e+17,
36
+ "train_loss": 1.2756919225056966,
37
+ "train_runtime": 450.633,
38
+ "train_samples_per_second": 10.652,
39
+ "train_steps_per_second": 0.067
40
  }
41
  ],
42
  "logging_steps": 10,
 
56
  "attributes": {}
57
  }
58
  },
59
+ "total_flos": 1.6697353660111258e+17,
60
  "train_batch_size": 2,
61
  "trial_name": null,
62
  "trial_params": null