yujiepan commited on
Commit
fc24fc8
·
verified ·
1 Parent(s): 526c9f2

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +1 -0
  2. config.json +0 -62
README.md CHANGED
@@ -93,6 +93,7 @@ config.num_attention_heads = 8
93
  config.num_hidden_layers = 2
94
  config.max_window_layers = 1
95
  config.tie_word_embeddings = False
 
96
  model = AutoModelForCausalLM.from_config(
97
  config,
98
  torch_dtype=torch.bfloat16,
 
93
  config.num_hidden_layers = 2
94
  config.max_window_layers = 1
95
  config.tie_word_embeddings = False
96
+ config.layer_types = ['full_attention'] * 2
97
  model = AutoModelForCausalLM.from_config(
98
  config,
99
  torch_dtype=torch.bfloat16,
config.json CHANGED
@@ -12,68 +12,6 @@
12
  "initializer_range": 0.02,
13
  "intermediate_size": 32,
14
  "layer_types": [
15
- "full_attention",
16
- "full_attention",
17
- "full_attention",
18
- "full_attention",
19
- "full_attention",
20
- "full_attention",
21
- "full_attention",
22
- "full_attention",
23
- "full_attention",
24
- "full_attention",
25
- "full_attention",
26
- "full_attention",
27
- "full_attention",
28
- "full_attention",
29
- "full_attention",
30
- "full_attention",
31
- "full_attention",
32
- "full_attention",
33
- "full_attention",
34
- "full_attention",
35
- "full_attention",
36
- "full_attention",
37
- "full_attention",
38
- "full_attention",
39
- "full_attention",
40
- "full_attention",
41
- "full_attention",
42
- "full_attention",
43
- "full_attention",
44
- "full_attention",
45
- "full_attention",
46
- "full_attention",
47
- "full_attention",
48
- "full_attention",
49
- "full_attention",
50
- "full_attention",
51
- "full_attention",
52
- "full_attention",
53
- "full_attention",
54
- "full_attention",
55
- "full_attention",
56
- "full_attention",
57
- "full_attention",
58
- "full_attention",
59
- "full_attention",
60
- "full_attention",
61
- "full_attention",
62
- "full_attention",
63
- "full_attention",
64
- "full_attention",
65
- "full_attention",
66
- "full_attention",
67
- "full_attention",
68
- "full_attention",
69
- "full_attention",
70
- "full_attention",
71
- "full_attention",
72
- "full_attention",
73
- "full_attention",
74
- "full_attention",
75
- "full_attention",
76
- "full_attention",
77
  "full_attention",
78
  "full_attention"
79
  ],
 
12
  "initializer_range": 0.02,
13
  "intermediate_size": 32,
14
  "layer_types": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "full_attention",
16
  "full_attention"
17
  ],