mwhanna
/

qwen3-14b-transcoders

Model card Files Files and versions

mwhanna commited on Jun 22

Commit

6a1d56b

·

verified ·

1 Parent(s): c35c50f

Upload wandb-config.yaml

Files changed (1) hide show

wandb-config.yaml +93 -0

wandb-config.yaml ADDED Viewed

	@@ -0,0 +1,93 @@

+_wandb:
+    value:
+        cli_version: 0.19.11
+        m:
+            - "1": gpu/memory_allocated_gb
+              "6":
+                - 3
+              "7": []
+            - "1": gpu/max_memory_allocated_gb
+              "6":
+                - 3
+              "7": []
+            - "1": gpu/memory_reserved_gb
+              "6":
+                - 3
+              "7": []
+        python_version: 3.11.10
+        t:
+            "1":
+                - 1
+                - 11
+                - 49
+                - 51
+                - 55
+                - 71
+            "2":
+                - 1
+                - 11
+                - 49
+                - 51
+                - 55
+                - 71
+            "3":
+                - 2
+                - 7
+                - 13
+                - 16
+                - 23
+                - 55
+                - 61
+            "4": 3.11.10
+            "5": 0.19.11
+            "6": 4.52.4
+            "8":
+                - 5
+            "12": 0.19.11
+            "13": linux-x86_64
+act_fn:
+    value: relu
+batch_size:
+    value: 8192
+before_ln:
+    value: false
+c_coeff:
+    value: 4
+cooldown_start_frac:
+    value: 0.8
+d_feature:
+    value: 163840
+d_model:
+    value: 5120
+device:
+    value: cuda:0
+initial_lr:
+    value: 0.0002
+layer_idx:
+    value: 0
+lr:
+    value: 0.0002
+min_lr_ratio:
+    value: 0
+model_name:
+    value: Qwen/Qwen3-14B
+model_type:
+    value: qwen
+n_batches:
+    value: 277
+n_grad_steps:
+    value: 4
+n_steps:
+    value: 122070
+preact_coeff:
+    value: 6e-05
+shuffle_buffer_batches:
+    value: 32
+skip_connections:
+    value: false
+sparsity_coeff_final:
+    value: 8
+x_scale:
+    value: 1
+y_scale:
+    value: 1