mwhanna
/

qwen3-1.7b-transcoders-lowl0

Model card Files Files and versions Community

mwhanna commited on 23 days ago

Commit

9c1b17d

·

verified ·

1 Parent(s): b798168

Upload wandb-config.yaml

Files changed (1) hide show

wandb-config.yaml +85 -0

wandb-config.yaml ADDED Viewed

	@@ -0,0 +1,85 @@

+_wandb:
+    value:
+        cli_version: 0.20.1
+        m:
+            - "1": gpu/max_memory_allocated_gb
+              "6":
+                - 3
+              "7": []
+            - "1": gpu/memory_allocated_gb
+              "6":
+                - 3
+              "7": []
+            - "1": gpu/memory_reserved_gb
+              "6":
+                - 3
+              "7": []
+        python_version: 3.11.10
+        t:
+            "1":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 71
+            "2":
+                - 1
+                - 11
+                - 41
+                - 49
+                - 71
+            "3":
+                - 7
+                - 13
+                - 16
+                - 55
+                - 61
+            "4": 3.11.10
+            "5": 0.20.1
+            "6": 4.52.4
+            "12": 0.20.1
+            "13": linux-x86_64
+act_fn:
+    value: relu
+batch_size:
+    value: 8192
+before_ln:
+    value: false
+c_coeff:
+    value: 4
+cooldown_start_frac:
+    value: 0.8
+d_feature:
+    value: 163840
+d_model:
+    value: 2048
+device:
+    value: cuda:0
+initial_lr:
+    value: 0.0002
+layer_idx:
+    value: 0
+lr:
+    value: 0.0002
+min_lr_ratio:
+    value: 0
+model_name:
+    value: Qwen/Qwen3-1.7B
+model_type:
+    value: qwen
+n_batches:
+    value: 780
+n_grad_steps:
+    value: 4
+n_steps:
+    value: 122070
+preact_coeff:
+    value: 6e-05
+skip_connections:
+    value: false
+sparsity_coeff_final:
+    value: 16
+x_scale:
+    value: 1
+y_scale:
+    value: 1