mwhanna commited on
Commit
9c1b17d
·
verified ·
1 Parent(s): b798168

Upload wandb-config.yaml

Browse files
Files changed (1) hide show
  1. wandb-config.yaml +85 -0
wandb-config.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.20.1
4
+ m:
5
+ - "1": gpu/max_memory_allocated_gb
6
+ "6":
7
+ - 3
8
+ "7": []
9
+ - "1": gpu/memory_allocated_gb
10
+ "6":
11
+ - 3
12
+ "7": []
13
+ - "1": gpu/memory_reserved_gb
14
+ "6":
15
+ - 3
16
+ "7": []
17
+ python_version: 3.11.10
18
+ t:
19
+ "1":
20
+ - 1
21
+ - 11
22
+ - 41
23
+ - 49
24
+ - 71
25
+ "2":
26
+ - 1
27
+ - 11
28
+ - 41
29
+ - 49
30
+ - 71
31
+ "3":
32
+ - 7
33
+ - 13
34
+ - 16
35
+ - 55
36
+ - 61
37
+ "4": 3.11.10
38
+ "5": 0.20.1
39
+ "6": 4.52.4
40
+ "12": 0.20.1
41
+ "13": linux-x86_64
42
+ act_fn:
43
+ value: relu
44
+ batch_size:
45
+ value: 8192
46
+ before_ln:
47
+ value: false
48
+ c_coeff:
49
+ value: 4
50
+ cooldown_start_frac:
51
+ value: 0.8
52
+ d_feature:
53
+ value: 163840
54
+ d_model:
55
+ value: 2048
56
+ device:
57
+ value: cuda:0
58
+ initial_lr:
59
+ value: 0.0002
60
+ layer_idx:
61
+ value: 0
62
+ lr:
63
+ value: 0.0002
64
+ min_lr_ratio:
65
+ value: 0
66
+ model_name:
67
+ value: Qwen/Qwen3-1.7B
68
+ model_type:
69
+ value: qwen
70
+ n_batches:
71
+ value: 780
72
+ n_grad_steps:
73
+ value: 4
74
+ n_steps:
75
+ value: 122070
76
+ preact_coeff:
77
+ value: 6e-05
78
+ skip_connections:
79
+ value: false
80
+ sparsity_coeff_final:
81
+ value: 16
82
+ x_scale:
83
+ value: 1
84
+ y_scale:
85
+ value: 1