File size: 2,020 Bytes
c37b11f
c793fee
 
 
a3eb1a9
c37b11f
 
a3eb1a9
 
ded606b
a3eb1a9
ded606b
c37b11f
a3eb1a9
c37b11f
a3eb1a9
 
 
 
 
 
 
 
 
 
 
c37b11f
a3eb1a9
c37b11f
 
a3eb1a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c37b11f
a3eb1a9
 
 
c37b11f
a3eb1a9
c37b11f
 
a3eb1a9
c37b11f
 
 
 
 
 
ded606b
c37b11f
 
a3eb1a9
 
 
 
 
 
 
 
c37b11f
a3eb1a9
 
ded606b
c37b11f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{
  "architectures": [
    "Zamba2ForCausalLM"
  ],
  "adapter_rank": 128,
  "add_bias_linear": false,
  "attention_dropout": 0.0,
  "attention_head_dim": 160,
  "attention_hidden_size": 5120,
  "bos_token_id": 1,
  "chunk_size": 256,
  "eos_token_id": 2,
  "ffn_hidden_size": 10240,
  "hidden_act": "gelu",
  "hidden_size": 2560,
  "hybrid_layer_ids": [
    6,
    12,
    18,
    24,
    30,
    36,
    42,
    47,
    51
  ],
  "initializer_range": 0.02,
  "intermediate_size": 10240,
  "kv_channels": 80,
  "layers_block_type": [
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "hybrid",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "hybrid",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "hybrid",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "hybrid",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "hybrid",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "hybrid",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "hybrid",
    "mamba",
    "mamba",
    "mamba",
    "mamba",
    "hybrid",
    "mamba",
    "mamba",
    "mamba",
    "hybrid",
    "mamba",
    "mamba"
  ],
  "mamba_d_conv": 4,
  "mamba_d_state": 64,
  "mamba_expand": 2,
  "mamba_headdim": 64,
  "mamba_ngroups": 1,
  "max_position_embeddings": 4096,
  "model_type": "zamba2",
  "n_mamba_heads": 80,
  "num_attention_heads": 32,
  "num_hidden_layers": 54,
  "num_key_value_heads": 32,
  "num_logits_to_keep": 1,
  "num_mem_blocks": 2,
  "num_query_groups": 32,
  "pad_token_id": 0,
  "rms_norm_eps": 1e-05,
  "rope_theta": 10000,
  "time_step_floor": 0.0001,
  "time_step_limit": null,
  "time_step_max": 0.1,
  "time_step_min": 0.001,
  "transformers_version": "4.49.0.dev0",
  "use_cache": true,
  "use_conv_bias": true,
  "use_long_context": false,
  "use_mem_rope": false,
  "use_shared_attention_adapter": false,
  "use_shared_mlp_adapter": true,
  "vocab_size": 32000
}