mwhanna commited on
Commit
6a1d56b
·
verified ·
1 Parent(s): c35c50f

Upload wandb-config.yaml

Browse files
Files changed (1) hide show
  1. wandb-config.yaml +93 -0
wandb-config.yaml ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.19.11
4
+ m:
5
+ - "1": gpu/memory_allocated_gb
6
+ "6":
7
+ - 3
8
+ "7": []
9
+ - "1": gpu/max_memory_allocated_gb
10
+ "6":
11
+ - 3
12
+ "7": []
13
+ - "1": gpu/memory_reserved_gb
14
+ "6":
15
+ - 3
16
+ "7": []
17
+ python_version: 3.11.10
18
+ t:
19
+ "1":
20
+ - 1
21
+ - 11
22
+ - 49
23
+ - 51
24
+ - 55
25
+ - 71
26
+ "2":
27
+ - 1
28
+ - 11
29
+ - 49
30
+ - 51
31
+ - 55
32
+ - 71
33
+ "3":
34
+ - 2
35
+ - 7
36
+ - 13
37
+ - 16
38
+ - 23
39
+ - 55
40
+ - 61
41
+ "4": 3.11.10
42
+ "5": 0.19.11
43
+ "6": 4.52.4
44
+ "8":
45
+ - 5
46
+ "12": 0.19.11
47
+ "13": linux-x86_64
48
+ act_fn:
49
+ value: relu
50
+ batch_size:
51
+ value: 8192
52
+ before_ln:
53
+ value: false
54
+ c_coeff:
55
+ value: 4
56
+ cooldown_start_frac:
57
+ value: 0.8
58
+ d_feature:
59
+ value: 163840
60
+ d_model:
61
+ value: 5120
62
+ device:
63
+ value: cuda:0
64
+ initial_lr:
65
+ value: 0.0002
66
+ layer_idx:
67
+ value: 0
68
+ lr:
69
+ value: 0.0002
70
+ min_lr_ratio:
71
+ value: 0
72
+ model_name:
73
+ value: Qwen/Qwen3-14B
74
+ model_type:
75
+ value: qwen
76
+ n_batches:
77
+ value: 277
78
+ n_grad_steps:
79
+ value: 4
80
+ n_steps:
81
+ value: 122070
82
+ preact_coeff:
83
+ value: 6e-05
84
+ shuffle_buffer_batches:
85
+ value: 32
86
+ skip_connections:
87
+ value: false
88
+ sparsity_coeff_final:
89
+ value: 8
90
+ x_scale:
91
+ value: 1
92
+ y_scale:
93
+ value: 1