File size: 2,020 Bytes
c37b11f c793fee a3eb1a9 c37b11f a3eb1a9 ded606b a3eb1a9 ded606b c37b11f a3eb1a9 c37b11f a3eb1a9 c37b11f a3eb1a9 c37b11f a3eb1a9 c37b11f a3eb1a9 c37b11f a3eb1a9 c37b11f a3eb1a9 c37b11f ded606b c37b11f a3eb1a9 c37b11f a3eb1a9 ded606b c37b11f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
{
"architectures": [
"Zamba2ForCausalLM"
],
"adapter_rank": 128,
"add_bias_linear": false,
"attention_dropout": 0.0,
"attention_head_dim": 160,
"attention_hidden_size": 5120,
"bos_token_id": 1,
"chunk_size": 256,
"eos_token_id": 2,
"ffn_hidden_size": 10240,
"hidden_act": "gelu",
"hidden_size": 2560,
"hybrid_layer_ids": [
6,
12,
18,
24,
30,
36,
42,
47,
51
],
"initializer_range": 0.02,
"intermediate_size": 10240,
"kv_channels": 80,
"layers_block_type": [
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba",
"mamba",
"hybrid",
"mamba",
"mamba"
],
"mamba_d_conv": 4,
"mamba_d_state": 64,
"mamba_expand": 2,
"mamba_headdim": 64,
"mamba_ngroups": 1,
"max_position_embeddings": 4096,
"model_type": "zamba2",
"n_mamba_heads": 80,
"num_attention_heads": 32,
"num_hidden_layers": 54,
"num_key_value_heads": 32,
"num_logits_to_keep": 1,
"num_mem_blocks": 2,
"num_query_groups": 32,
"pad_token_id": 0,
"rms_norm_eps": 1e-05,
"rope_theta": 10000,
"time_step_floor": 0.0001,
"time_step_limit": null,
"time_step_max": 0.1,
"time_step_min": 0.001,
"transformers_version": "4.49.0.dev0",
"use_cache": true,
"use_conv_bias": true,
"use_long_context": false,
"use_mem_rope": false,
"use_shared_attention_adapter": false,
"use_shared_mlp_adapter": true,
"vocab_size": 32000
}
|