Upload 7 files
Browse files- README.md +102 -3
- added_tokens.json +987 -0
- config.json +26 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +23 -0
- tokenizer.model +3 -0
- tokenizer_config.json +35 -0
README.md
CHANGED
@@ -1,3 +1,102 @@
|
|
1 |
-
---
|
2 |
-
license: mit
|
3 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
---
|
4 |
+
|
5 |
+
|
6 |
+
<div align="center">
|
7 |
+
|
8 |
+
<h3>InstructBioMol: A Multimodal LLM for Biomolecule Understanding and Design</h3>
|
9 |
+
|
10 |
+
<p align="center">
|
11 |
+
<a href="https://arxiv.org/abs/2410.07919">Paper</a> •
|
12 |
+
<a href="https://github.com/HICAI-ZJU/InstructBioMol">Project</a> •
|
13 |
+
<a href="#quickstart">Quickstart</a> •
|
14 |
+
<a href="#citation">Citation</a>
|
15 |
+
</p>
|
16 |
+
</div>
|
17 |
+
|
18 |
+
### Model Description
|
19 |
+
|
20 |
+
InstructBioMol is a multimodal large language model that bridges natural language with biomolecules (proteins and small molecules). It achieves any-to-any alignment between natural language, molecules, and proteins through comprehensive instruction tuning.
|
21 |
+
|
22 |
+
*For detailed information, please refer to our [paper](https://arxiv.org/abs/2410.07919) and [code repository](https://github.com/HICAI-ZJU/InstructBioMol).*
|
23 |
+
### Released Variants
|
24 |
+
|
25 |
+
| Model Name | Stage | Multimodal| Description |
|
26 |
+
|------------|-----------| -------| -------|
|
27 |
+
| [InstructBioMol-base](https://huggingface.co/hicai-zju/InstructBioMol-base) (*This Model*) | Pretraining | ❎| Continual pretrained model on molecular sequences, protein sequences, and scientific literature. |
|
28 |
+
| [InstructBioMol-instruct-stage1](https://huggingface.co/hicai-zju/InstructBioMol-instruct-stage1) | Instruction tuning (stage 1) | ✅ | Stage1 instruction-tuned model with biomolecular multimodal processing capabilities. (e.g., 3D molecules/proteins) |
|
29 |
+
| [InstructBioMol-instruct](https://huggingface.co/hicai-zju/InstructBioMol-instruct) | Instruction tuning (stage 1 and 2) | ✅| Fully instruction-tuned model (stage1 & stage2) with biomolecular multimodal processing capabilities (e.g., 3D molecules/proteins) |
|
30 |
+
### Training Details
|
31 |
+
|
32 |
+
**Base Architecture**: LLaMA-2-7B
|
33 |
+
|
34 |
+
**Training Data**:
|
35 |
+
|
36 |
+
1. Molecular Sequences:
|
37 |
+
|
38 |
+
- Format: SELFIES
|
39 |
+
- Source: PubChem
|
40 |
+
- Size: 100 million (100M) entries
|
41 |
+
|
42 |
+
2. Protein Sequences:
|
43 |
+
- Format: FASTA-like, prefixed with `<p>` (e.g., `<p>M<p>A<p>L<p>W...`).
|
44 |
+
- Source: UniRef50
|
45 |
+
- Size: 59 million (59M) entries
|
46 |
+
|
47 |
+
3. Natural Language Texts:
|
48 |
+
- Source: Abstracts from PubMed, bioRxiv, and ChemRxiv
|
49 |
+
- Size: 6 million (6M) abstracts
|
50 |
+
|
51 |
+
**Training Objective**: Causal language modeling (self-supervised)
|
52 |
+
|
53 |
+
### Quick Start
|
54 |
+
```python
|
55 |
+
from transformers import LlamaForCausalLM, LlamaTokenizer
|
56 |
+
import torch
|
57 |
+
|
58 |
+
model_name = "hicai-zju/InstructBioMol-base"
|
59 |
+
tokenizer = LlamaTokenizer.from_pretrained(model_name)
|
60 |
+
model = LlamaForCausalLM.from_pretrained(model_name, device_map="auto")
|
61 |
+
|
62 |
+
prompt = "<p>M" # protein sequence
|
63 |
+
# prompt = "[C]" # molecule sequence
|
64 |
+
# prompt = 'Scientific' # natural language
|
65 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
66 |
+
|
67 |
+
with torch.no_grad():
|
68 |
+
outputs = model.generate(
|
69 |
+
**inputs,
|
70 |
+
max_new_tokens=100,
|
71 |
+
temperature=0.7,
|
72 |
+
top_p=0.9,
|
73 |
+
do_sample=True
|
74 |
+
)
|
75 |
+
|
76 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
77 |
+
print(generated_text)
|
78 |
+
```
|
79 |
+
|
80 |
+
### Citation
|
81 |
+
|
82 |
+
```bibtex
|
83 |
+
@article{DBLP:journals/corr/abs-2410-07919,
|
84 |
+
author = {Xiang Zhuang and
|
85 |
+
Keyan Ding and
|
86 |
+
Tianwen Lyu and
|
87 |
+
Yinuo Jiang and
|
88 |
+
Xiaotong Li and
|
89 |
+
Zhuoyi Xiang and
|
90 |
+
Zeyuan Wang and
|
91 |
+
Ming Qin and
|
92 |
+
Kehua Feng and
|
93 |
+
Jike Wang and
|
94 |
+
Qiang Zhang and
|
95 |
+
Huajun Chen},
|
96 |
+
title = {InstructBioMol: Advancing Biomolecule Understanding and Design Following
|
97 |
+
Human Instructions},
|
98 |
+
journal = {CoRR},
|
99 |
+
volume = {abs/2410.07919},
|
100 |
+
year = {2024}
|
101 |
+
}
|
102 |
+
```
|
added_tokens.json
ADDED
@@ -0,0 +1,987 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"</FASTA>": 32007,
|
3 |
+
"</MOL>": 32001,
|
4 |
+
"</PROT>": 32003,
|
5 |
+
"</SELFIES>": 32005,
|
6 |
+
"<FASTA>": 32006,
|
7 |
+
"<MOL>": 32000,
|
8 |
+
"<PROT>": 32002,
|
9 |
+
"<SELFIES>": 32004,
|
10 |
+
"<p>A": 32008,
|
11 |
+
"<p>C": 32009,
|
12 |
+
"<p>D": 32010,
|
13 |
+
"<p>E": 32011,
|
14 |
+
"<p>F": 32012,
|
15 |
+
"<p>G": 32013,
|
16 |
+
"<p>H": 32014,
|
17 |
+
"<p>I": 32015,
|
18 |
+
"<p>K": 32016,
|
19 |
+
"<p>L": 32017,
|
20 |
+
"<p>M": 32018,
|
21 |
+
"<p>N": 32019,
|
22 |
+
"<p>P": 32020,
|
23 |
+
"<p>Q": 32021,
|
24 |
+
"<p>R": 32022,
|
25 |
+
"<p>S": 32023,
|
26 |
+
"<p>T": 32024,
|
27 |
+
"<p>V": 32025,
|
28 |
+
"<p>W": 32026,
|
29 |
+
"<p>Y": 32027,
|
30 |
+
"[#Al]": 32463,
|
31 |
+
"[#As+1]": 32509,
|
32 |
+
"[#As]": 32536,
|
33 |
+
"[#B-1]": 32337,
|
34 |
+
"[#B]": 32044,
|
35 |
+
"[#Bi]": 32705,
|
36 |
+
"[#Branch1]": 32094,
|
37 |
+
"[#Branch2]": 32110,
|
38 |
+
"[#C+1]": 32247,
|
39 |
+
"[#C-1]": 32922,
|
40 |
+
"[#C]": 32380,
|
41 |
+
"[#Ce]": 32153,
|
42 |
+
"[#Co]": 32551,
|
43 |
+
"[#Cr]": 32206,
|
44 |
+
"[#Dy]": 32097,
|
45 |
+
"[#Er]": 32128,
|
46 |
+
"[#Eu]": 32517,
|
47 |
+
"[#Fe+1]": 32599,
|
48 |
+
"[#Fe]": 32941,
|
49 |
+
"[#Ga]": 32114,
|
50 |
+
"[#Gd]": 32404,
|
51 |
+
"[#GeH1]": 32732,
|
52 |
+
"[#Ge]": 32312,
|
53 |
+
"[#Hf+1]": 32244,
|
54 |
+
"[#Ho]": 32340,
|
55 |
+
"[#In]": 32357,
|
56 |
+
"[#Ir+1]": 32628,
|
57 |
+
"[#La]": 32291,
|
58 |
+
"[#Lu]": 32405,
|
59 |
+
"[#Mn]": 32583,
|
60 |
+
"[#Mo+1]": 32085,
|
61 |
+
"[#Mo]": 32811,
|
62 |
+
"[#N+1]": 32040,
|
63 |
+
"[#NH1+1]": 32806,
|
64 |
+
"[#N]": 32302,
|
65 |
+
"[#Nb]": 32903,
|
66 |
+
"[#Nd]": 32683,
|
67 |
+
"[#Ni]": 32578,
|
68 |
+
"[#O+1]": 32055,
|
69 |
+
"[#Os+2]": 32199,
|
70 |
+
"[#Os]": 32892,
|
71 |
+
"[#P+1]": 32379,
|
72 |
+
"[#PH1+1]": 32919,
|
73 |
+
"[#P]": 32557,
|
74 |
+
"[#PbH1]": 32211,
|
75 |
+
"[#Pb]": 32845,
|
76 |
+
"[#Pd]": 32746,
|
77 |
+
"[#Re]": 32346,
|
78 |
+
"[#Ring1]": 32755,
|
79 |
+
"[#Ring2]": 32947,
|
80 |
+
"[#Ru]": 32702,
|
81 |
+
"[#S+1]": 32975,
|
82 |
+
"[#S-1]": 32658,
|
83 |
+
"[#SH1]": 32230,
|
84 |
+
"[#S]": 32965,
|
85 |
+
"[#Sb]": 32592,
|
86 |
+
"[#Sc]": 32043,
|
87 |
+
"[#SeH1]": 32185,
|
88 |
+
"[#Se]": 32593,
|
89 |
+
"[#Si+1]": 32799,
|
90 |
+
"[#Si-1]": 32832,
|
91 |
+
"[#SiH1]": 32812,
|
92 |
+
"[#Si]": 32465,
|
93 |
+
"[#Sm]": 32666,
|
94 |
+
"[#Sn]": 32435,
|
95 |
+
"[#Ta+1]": 32283,
|
96 |
+
"[#Ta]": 32684,
|
97 |
+
"[#Tb]": 32507,
|
98 |
+
"[#Tc+1]": 32757,
|
99 |
+
"[#Tc]": 32801,
|
100 |
+
"[#Te]": 32296,
|
101 |
+
"[#Th]": 32563,
|
102 |
+
"[#Ti+1]": 32790,
|
103 |
+
"[#Ti]": 32464,
|
104 |
+
"[#Tl]": 32401,
|
105 |
+
"[#Tm]": 32689,
|
106 |
+
"[#U]": 32257,
|
107 |
+
"[#V+1]": 32896,
|
108 |
+
"[#V]": 32663,
|
109 |
+
"[#W+1]": 32285,
|
110 |
+
"[#WH1]": 32719,
|
111 |
+
"[#W]": 32550,
|
112 |
+
"[#Y]": 32863,
|
113 |
+
"[#Yb]": 32075,
|
114 |
+
"[#Zr+1]": 32745,
|
115 |
+
"[#Zr]": 32184,
|
116 |
+
"[-/Ring1]": 32685,
|
117 |
+
"[-/Ring2]": 32647,
|
118 |
+
"[-\\Ring1]": 32798,
|
119 |
+
"[-\\Ring2]": 32140,
|
120 |
+
"[/Br]": 32707,
|
121 |
+
"[/C@@H1]": 32679,
|
122 |
+
"[/C@@]": 32598,
|
123 |
+
"[/C@H1]": 32278,
|
124 |
+
"[/C@]": 32622,
|
125 |
+
"[/CH1-1]": 32814,
|
126 |
+
"[/C]": 32348,
|
127 |
+
"[/Cl]": 32893,
|
128 |
+
"[/F]": 32381,
|
129 |
+
"[/I]": 32982,
|
130 |
+
"[/N+1]": 32698,
|
131 |
+
"[/N-1]": 32837,
|
132 |
+
"[/NH1+1]": 32237,
|
133 |
+
"[/NH1]": 32147,
|
134 |
+
"[/NH2+1]": 32973,
|
135 |
+
"[/N]": 32101,
|
136 |
+
"[/O-1]": 32152,
|
137 |
+
"[/O]": 32427,
|
138 |
+
"[/P+1]": 32430,
|
139 |
+
"[/P]": 32731,
|
140 |
+
"[/S]": 32098,
|
141 |
+
"[10B]": 32554,
|
142 |
+
"[111Cd]": 32079,
|
143 |
+
"[115Sn]": 32254,
|
144 |
+
"[117Sn]": 32793,
|
145 |
+
"[119Sn]": 32498,
|
146 |
+
"[11B]": 32960,
|
147 |
+
"[11CH3]": 32969,
|
148 |
+
"[121Sb]": 32395,
|
149 |
+
"[123I]": 32107,
|
150 |
+
"[123Sb]": 32359,
|
151 |
+
"[125I]": 32937,
|
152 |
+
"[125Te]": 32699,
|
153 |
+
"[127IH1]": 32688,
|
154 |
+
"[129Xe]": 32660,
|
155 |
+
"[131I]": 32899,
|
156 |
+
"[139La]": 32378,
|
157 |
+
"[13C@@H1]": 32623,
|
158 |
+
"[13C@H1]": 32844,
|
159 |
+
"[13CH1]": 32036,
|
160 |
+
"[13CH2]": 32615,
|
161 |
+
"[13CH3]": 32625,
|
162 |
+
"[13C]": 32095,
|
163 |
+
"[13NH3]": 32154,
|
164 |
+
"[14CH3]": 32362,
|
165 |
+
"[14C]": 32785,
|
166 |
+
"[151Eu]": 32754,
|
167 |
+
"[15NH2]": 32809,
|
168 |
+
"[15OH2]": 32220,
|
169 |
+
"[16OH2]": 32950,
|
170 |
+
"[17OH2]": 32270,
|
171 |
+
"[183W]": 32642,
|
172 |
+
"[18FH1]": 32933,
|
173 |
+
"[18F]": 32039,
|
174 |
+
"[18OH2]": 32908,
|
175 |
+
"[197Au]": 32571,
|
176 |
+
"[197Hg]": 32708,
|
177 |
+
"[1H+1]": 32636,
|
178 |
+
"[1HH1]": 32332,
|
179 |
+
"[203Hg]": 32880,
|
180 |
+
"[203Tl]": 32588,
|
181 |
+
"[205Tl]": 32966,
|
182 |
+
"[210Po]": 32213,
|
183 |
+
"[23Na]": 32559,
|
184 |
+
"[25Mg]": 32493,
|
185 |
+
"[28Si]": 32487,
|
186 |
+
"[29Si]": 32740,
|
187 |
+
"[2HH1]": 32902,
|
188 |
+
"[2H]": 32413,
|
189 |
+
"[30Si]": 32423,
|
190 |
+
"[31Si]": 32879,
|
191 |
+
"[32SH2]": 32365,
|
192 |
+
"[32Si]": 32274,
|
193 |
+
"[33PH3]": 32587,
|
194 |
+
"[33SH2]": 32438,
|
195 |
+
"[35SH2]": 32219,
|
196 |
+
"[36SH2]": 32064,
|
197 |
+
"[39Ar]": 32860,
|
198 |
+
"[39K]": 32376,
|
199 |
+
"[3HH1]": 32087,
|
200 |
+
"[3H]": 32193,
|
201 |
+
"[3He]": 32339,
|
202 |
+
"[45Sc]": 32066,
|
203 |
+
"[4He]": 32276,
|
204 |
+
"[51Cr]": 32475,
|
205 |
+
"[51V]": 32275,
|
206 |
+
"[57Fe]": 32632,
|
207 |
+
"[63Cu]": 32897,
|
208 |
+
"[65Zn]": 32166,
|
209 |
+
"[67Zn]": 32353,
|
210 |
+
"[6Li]": 32631,
|
211 |
+
"[73Ge]": 32226,
|
212 |
+
"[77Se]": 32038,
|
213 |
+
"[79BrH1]": 32239,
|
214 |
+
"[7Li]": 32229,
|
215 |
+
"[87Rb]": 32333,
|
216 |
+
"[89Y]": 32322,
|
217 |
+
"[93Nb]": 32262,
|
218 |
+
"[95Mo]": 32156,
|
219 |
+
"[9Be]": 32792,
|
220 |
+
"[=13CH1]": 32454,
|
221 |
+
"[=13C]": 32834,
|
222 |
+
"[=Ag]": 32148,
|
223 |
+
"[=Al-1]": 32930,
|
224 |
+
"[=AlH1]": 32569,
|
225 |
+
"[=Al]": 32601,
|
226 |
+
"[=AsH1]": 32855,
|
227 |
+
"[=AsH2]": 32843,
|
228 |
+
"[=AsH3]": 32138,
|
229 |
+
"[=As]": 32948,
|
230 |
+
"[=Au]": 32567,
|
231 |
+
"[=B-1]": 32730,
|
232 |
+
"[=BH1-1]": 32558,
|
233 |
+
"[=BH2-1]": 32242,
|
234 |
+
"[=B]": 32936,
|
235 |
+
"[=Ba]": 32410,
|
236 |
+
"[=Bi+1]": 32387,
|
237 |
+
"[=BiH1]": 32343,
|
238 |
+
"[=Bi]": 32407,
|
239 |
+
"[=Branch1]": 32328,
|
240 |
+
"[=Branch2]": 32316,
|
241 |
+
"[=C+1]": 32594,
|
242 |
+
"[=C-1]": 32963,
|
243 |
+
"[=CH0]": 32223,
|
244 |
+
"[=CH1+1]": 32091,
|
245 |
+
"[=CH1-1]": 32214,
|
246 |
+
"[=C]": 32176,
|
247 |
+
"[=Ca]": 32279,
|
248 |
+
"[=Cd]": 32573,
|
249 |
+
"[=Ce]": 32531,
|
250 |
+
"[=CoH1]": 32859,
|
251 |
+
"[=Co]": 32497,
|
252 |
+
"[=CrH2]": 32656,
|
253 |
+
"[=Cr]": 32856,
|
254 |
+
"[=Cu]": 32775,
|
255 |
+
"[=Dy]": 32617,
|
256 |
+
"[=Eu]": 32388,
|
257 |
+
"[=Fe+1]": 32170,
|
258 |
+
"[=Fe]": 32773,
|
259 |
+
"[=Ga]": 32748,
|
260 |
+
"[=Gd]": 32048,
|
261 |
+
"[=GeH1]": 32927,
|
262 |
+
"[=GeH2]": 32661,
|
263 |
+
"[=Ge]": 32547,
|
264 |
+
"[=Hf+2]": 32172,
|
265 |
+
"[=Hf]": 32390,
|
266 |
+
"[=Hg]": 32556,
|
267 |
+
"[=Ho]": 32142,
|
268 |
+
"[=I]": 32676,
|
269 |
+
"[=InH1]": 32668,
|
270 |
+
"[=In]": 32155,
|
271 |
+
"[=IrH1]": 32252,
|
272 |
+
"[=Ir]": 32503,
|
273 |
+
"[=La]": 32641,
|
274 |
+
"[=Lu]": 32260,
|
275 |
+
"[=Mg]": 32253,
|
276 |
+
"[=Mn-1]": 32455,
|
277 |
+
"[=MnH1]": 32417,
|
278 |
+
"[=Mn]": 32957,
|
279 |
+
"[=Mo+2]": 32326,
|
280 |
+
"[=Mo+4]": 32083,
|
281 |
+
"[=MoH1]": 32611,
|
282 |
+
"[=MoH2]": 32263,
|
283 |
+
"[=MoH3]": 32338,
|
284 |
+
"[=Mo]": 32889,
|
285 |
+
"[=N+1]": 32467,
|
286 |
+
"[=N-1]": 32917,
|
287 |
+
"[=NH0]": 32756,
|
288 |
+
"[=NH1+1]": 32373,
|
289 |
+
"[=NH2+1]": 32619,
|
290 |
+
"[=N]": 32366,
|
291 |
+
"[=Nb]": 32741,
|
292 |
+
"[=Ni]": 32335,
|
293 |
+
"[=O+1]": 32425,
|
294 |
+
"[=OH1+1]": 32131,
|
295 |
+
"[=O]": 32294,
|
296 |
+
"[=Os+2]": 32847,
|
297 |
+
"[=Os]": 32568,
|
298 |
+
"[=P+1]": 32912,
|
299 |
+
"[=P-1]": 32687,
|
300 |
+
"[=PH1+1]": 32029,
|
301 |
+
"[=PH1]": 32319,
|
302 |
+
"[=PH2+1]": 32499,
|
303 |
+
"[=PH2]": 32259,
|
304 |
+
"[=PH3]": 32816,
|
305 |
+
"[=P]": 32954,
|
306 |
+
"[=PbH2]": 32466,
|
307 |
+
"[=Pb]": 32585,
|
308 |
+
"[=Pd]": 32839,
|
309 |
+
"[=Pr+1]": 32300,
|
310 |
+
"[=Pr]": 32579,
|
311 |
+
"[=Pt]": 32489,
|
312 |
+
"[=ReH1]": 32144,
|
313 |
+
"[=Re]": 32956,
|
314 |
+
"[=RhH1]": 32310,
|
315 |
+
"[=Rh]": 32779,
|
316 |
+
"[=Ring1]": 32271,
|
317 |
+
"[=Ring2]": 32494,
|
318 |
+
"[=Ring3]": 32422,
|
319 |
+
"[=Ru+1]": 32073,
|
320 |
+
"[=RuH1]": 32907,
|
321 |
+
"[=RuH2]": 32609,
|
322 |
+
"[=Ru]": 32958,
|
323 |
+
"[=S+1]": 32501,
|
324 |
+
"[=S-1]": 32750,
|
325 |
+
"[=S@@]": 32301,
|
326 |
+
"[=SH0]": 32163,
|
327 |
+
"[=SH1+1]": 32772,
|
328 |
+
"[=SH1-1]": 32979,
|
329 |
+
"[=SH1]": 32552,
|
330 |
+
"[=SH2]": 32667,
|
331 |
+
"[=SH4]": 32864,
|
332 |
+
"[=S]": 32768,
|
333 |
+
"[=Sb+1]": 32288,
|
334 |
+
"[=SbH1]": 32045,
|
335 |
+
"[=SbH2]": 32420,
|
336 |
+
"[=SbH3]": 32604,
|
337 |
+
"[=Sb]": 32444,
|
338 |
+
"[=Sc]": 32511,
|
339 |
+
"[=Se+1]": 32106,
|
340 |
+
"[=SeH1]": 32868,
|
341 |
+
"[=SeH2]": 32764,
|
342 |
+
"[=Se]": 32726,
|
343 |
+
"[=Si+1]": 32549,
|
344 |
+
"[=Si+2]": 32174,
|
345 |
+
"[=Si-1]": 32284,
|
346 |
+
"[=SiH1+1]": 32850,
|
347 |
+
"[=SiH1-1]": 32904,
|
348 |
+
"[=SiH1]": 32777,
|
349 |
+
"[=SiH2]": 32492,
|
350 |
+
"[=Si]": 32854,
|
351 |
+
"[=SnH1]": 32510,
|
352 |
+
"[=SnH2]": 32458,
|
353 |
+
"[=Sn]": 32886,
|
354 |
+
"[=Sr]": 32090,
|
355 |
+
"[=TaH1]": 32303,
|
356 |
+
"[=Ta]": 32162,
|
357 |
+
"[=Tb]": 32681,
|
358 |
+
"[=Tc+1]": 32032,
|
359 |
+
"[=Tc+2]": 32612,
|
360 |
+
"[=Tc+3]": 32874,
|
361 |
+
"[=Tc+4]": 32970,
|
362 |
+
"[=Tc+5]": 32697,
|
363 |
+
"[=Tc]": 32076,
|
364 |
+
"[=TeH2]": 32926,
|
365 |
+
"[=Te]": 32158,
|
366 |
+
"[=Th+2]": 32086,
|
367 |
+
"[=Th]": 32400,
|
368 |
+
"[=Ti+1]": 32542,
|
369 |
+
"[=Ti+2]": 32881,
|
370 |
+
"[=Ti]": 32788,
|
371 |
+
"[=Tl]": 32309,
|
372 |
+
"[=Tm]": 32540,
|
373 |
+
"[=U+2]": 32190,
|
374 |
+
"[=UH1]": 32201,
|
375 |
+
"[=U]": 32895,
|
376 |
+
"[=V+2]": 32796,
|
377 |
+
"[=VH1]": 32058,
|
378 |
+
"[=VH2]": 32364,
|
379 |
+
"[=V]": 32232,
|
380 |
+
"[=W-1]": 32050,
|
381 |
+
"[=WH1]": 32829,
|
382 |
+
"[=WH2]": 32961,
|
383 |
+
"[=WH4]": 32727,
|
384 |
+
"[=W]": 32069,
|
385 |
+
"[=Xe]": 32249,
|
386 |
+
"[=YH1]": 32787,
|
387 |
+
"[=Y]": 32654,
|
388 |
+
"[=Yb]": 32885,
|
389 |
+
"[=Zn]": 32134,
|
390 |
+
"[=Zr+2]": 32635,
|
391 |
+
"[=ZrH2]": 32061,
|
392 |
+
"[=Zr]": 32129,
|
393 |
+
"[Ac]": 32603,
|
394 |
+
"[Ag+1]": 32115,
|
395 |
+
"[Ag-1]": 32409,
|
396 |
+
"[AgH1]": 32962,
|
397 |
+
"[Ag]": 32502,
|
398 |
+
"[Al+1]": 32350,
|
399 |
+
"[Al+2]": 32496,
|
400 |
+
"[Al+3]": 32921,
|
401 |
+
"[Al-1]": 32168,
|
402 |
+
"[Al-3]": 32945,
|
403 |
+
"[AlH1+1]": 32857,
|
404 |
+
"[AlH1+2]": 32883,
|
405 |
+
"[AlH1-1]": 32369,
|
406 |
+
"[AlH1]": 32477,
|
407 |
+
"[AlH2+1]": 32597,
|
408 |
+
"[AlH2-1]": 32882,
|
409 |
+
"[AlH2]": 32072,
|
410 |
+
"[AlH3-1]": 32870,
|
411 |
+
"[AlH3]": 32637,
|
412 |
+
"[AlH4-1]": 32840,
|
413 |
+
"[Al]": 32694,
|
414 |
+
"[Am]": 32581,
|
415 |
+
"[Ar]": 32113,
|
416 |
+
"[As+1]": 32393,
|
417 |
+
"[As+3]": 32739,
|
418 |
+
"[As-1]": 32452,
|
419 |
+
"[AsH1+1]": 32068,
|
420 |
+
"[AsH1]": 32914,
|
421 |
+
"[AsH2+1]": 32909,
|
422 |
+
"[AsH2]": 32711,
|
423 |
+
"[AsH3+1]": 32305,
|
424 |
+
"[AsH3]": 32292,
|
425 |
+
"[As]": 32810,
|
426 |
+
"[AtH1]": 32445,
|
427 |
+
"[At]": 32835,
|
428 |
+
"[Au+1]": 32808,
|
429 |
+
"[Au+3]": 32749,
|
430 |
+
"[Au-1]": 32234,
|
431 |
+
"[AuH1]": 32210,
|
432 |
+
"[Au]": 32383,
|
433 |
+
"[B-1]": 32544,
|
434 |
+
"[BH0]": 32728,
|
435 |
+
"[BH1-1]": 32876,
|
436 |
+
"[BH2-1]": 32071,
|
437 |
+
"[BH3-1]": 32938,
|
438 |
+
"[BH4-1]": 32830,
|
439 |
+
"[B]": 32828,
|
440 |
+
"[Ba+2]": 32392,
|
441 |
+
"[Be+2]": 32108,
|
442 |
+
"[Be]": 32621,
|
443 |
+
"[Bh]": 32751,
|
444 |
+
"[Bi+1]": 32122,
|
445 |
+
"[Bi+2]": 32537,
|
446 |
+
"[Bi+3]": 32649,
|
447 |
+
"[Bi-1]": 32721,
|
448 |
+
"[Bi-2]": 32875,
|
449 |
+
"[BiH1]": 32959,
|
450 |
+
"[BiH2+2]": 32256,
|
451 |
+
"[BiH2]": 32710,
|
452 |
+
"[BiH3]": 32672,
|
453 |
+
"[Bi]": 32607,
|
454 |
+
"[Bk]": 32651,
|
455 |
+
"[Br+1]": 32165,
|
456 |
+
"[Br+2]": 32299,
|
457 |
+
"[Br+3]": 32983,
|
458 |
+
"[Br-1]": 32057,
|
459 |
+
"[Br]": 32150,
|
460 |
+
"[Branch1]": 32136,
|
461 |
+
"[Branch2]": 32216,
|
462 |
+
"[Branch3]": 32576,
|
463 |
+
"[C+1]": 32034,
|
464 |
+
"[C-1]": 32709,
|
465 |
+
"[C@@H1]": 32640,
|
466 |
+
"[C@@]": 32074,
|
467 |
+
"[C@H1]": 32179,
|
468 |
+
"[C@]": 32560,
|
469 |
+
"[CH0]": 32250,
|
470 |
+
"[CH1+1]": 32762,
|
471 |
+
"[CH1-1]": 32575,
|
472 |
+
"[CH1]": 32195,
|
473 |
+
"[CH2+1]": 32692,
|
474 |
+
"[CH2-1]": 32384,
|
475 |
+
"[CH3+1]": 32734,
|
476 |
+
"[CH3-1]": 32123,
|
477 |
+
"[C]": 32824,
|
478 |
+
"[Ca+2]": 32474,
|
479 |
+
"[Ca]": 32634,
|
480 |
+
"[Cd+2]": 32093,
|
481 |
+
"[Cd-1]": 32461,
|
482 |
+
"[Cd-2]": 32967,
|
483 |
+
"[Cd]": 32738,
|
484 |
+
"[Ce+3]": 32505,
|
485 |
+
"[Ce+4]": 32231,
|
486 |
+
"[CeH1]": 32389,
|
487 |
+
"[Ce]": 32255,
|
488 |
+
"[Cf]": 32789,
|
489 |
+
"[Cl+1]": 32446,
|
490 |
+
"[Cl+2]": 32696,
|
491 |
+
"[Cl+3]": 32911,
|
492 |
+
"[Cl-1]": 32513,
|
493 |
+
"[ClH0]": 32853,
|
494 |
+
"[ClH1+1]": 32887,
|
495 |
+
"[ClH2+1]": 32297,
|
496 |
+
"[ClH3+2]": 32591,
|
497 |
+
"[Cl]": 32441,
|
498 |
+
"[Cm]": 32733,
|
499 |
+
"[Co+1]": 32541,
|
500 |
+
"[Co+2]": 32356,
|
501 |
+
"[Co+3]": 32315,
|
502 |
+
"[Co-1]": 32187,
|
503 |
+
"[Co-2]": 32724,
|
504 |
+
"[Co-3]": 32180,
|
505 |
+
"[CoH1+1]": 32841,
|
506 |
+
"[CoH1+2]": 32890,
|
507 |
+
"[CoH2]": 32602,
|
508 |
+
"[Co]": 32495,
|
509 |
+
"[Cr+1]": 32512,
|
510 |
+
"[Cr+2]": 32361,
|
511 |
+
"[Cr+3]": 32949,
|
512 |
+
"[Cr+4]": 32119,
|
513 |
+
"[Cr+5]": 32215,
|
514 |
+
"[Cr+6]": 32508,
|
515 |
+
"[Cr-1]": 32295,
|
516 |
+
"[Cr-2]": 32173,
|
517 |
+
"[Cr-3]": 32873,
|
518 |
+
"[CrH1+2]": 32718,
|
519 |
+
"[CrH2]": 32382,
|
520 |
+
"[Cr]": 32747,
|
521 |
+
"[Cs+1]": 32624,
|
522 |
+
"[Cu+1]": 32375,
|
523 |
+
"[Cu+2]": 32081,
|
524 |
+
"[Cu-1]": 32671,
|
525 |
+
"[Cu-2]": 32803,
|
526 |
+
"[CuH1+1]": 32084,
|
527 |
+
"[CuH1]": 32704,
|
528 |
+
"[CuH2-1]": 32778,
|
529 |
+
"[Cu]": 32111,
|
530 |
+
"[Db]": 32935,
|
531 |
+
"[Dy+3]": 32221,
|
532 |
+
"[Dy]": 32526,
|
533 |
+
"[Er+3]": 32327,
|
534 |
+
"[Er]": 32208,
|
535 |
+
"[Es]": 32833,
|
536 |
+
"[Eu+2]": 32955,
|
537 |
+
"[Eu+3]": 32062,
|
538 |
+
"[Eu]": 32898,
|
539 |
+
"[F-1]": 32807,
|
540 |
+
"[F]": 32188,
|
541 |
+
"[Fe+1]": 32877,
|
542 |
+
"[Fe+2]": 32125,
|
543 |
+
"[Fe+3]": 32776,
|
544 |
+
"[Fe+4]": 32453,
|
545 |
+
"[Fe+5]": 32918,
|
546 |
+
"[Fe+6]": 32934,
|
547 |
+
"[Fe-1]": 32046,
|
548 |
+
"[Fe-2]": 32916,
|
549 |
+
"[Fe-3]": 32118,
|
550 |
+
"[Fe-4]": 32673,
|
551 |
+
"[FeH1]": 32659,
|
552 |
+
"[FeH2]": 32737,
|
553 |
+
"[FeH3]": 32416,
|
554 |
+
"[Fe]": 32159,
|
555 |
+
"[Fm]": 32530,
|
556 |
+
"[Ga+3]": 32049,
|
557 |
+
"[Ga-1]": 32334,
|
558 |
+
"[GaH1]": 32700,
|
559 |
+
"[GaH2]": 32209,
|
560 |
+
"[GaH3]": 32402,
|
561 |
+
"[Ga]": 32582,
|
562 |
+
"[Gd+2]": 32434,
|
563 |
+
"[Gd+3]": 32978,
|
564 |
+
"[Gd-4]": 32265,
|
565 |
+
"[Gd]": 32146,
|
566 |
+
"[Ge+4]": 32429,
|
567 |
+
"[GeH1]": 32646,
|
568 |
+
"[GeH2]": 32228,
|
569 |
+
"[GeH3]": 32871,
|
570 |
+
"[GeH4]": 32469,
|
571 |
+
"[Ge]": 32240,
|
572 |
+
"[H+1]": 32054,
|
573 |
+
"[H-1]": 32481,
|
574 |
+
"[HH1]": 32761,
|
575 |
+
"[H]": 32318,
|
576 |
+
"[He]": 32135,
|
577 |
+
"[Hf+2]": 32606,
|
578 |
+
"[Hf+4]": 32342,
|
579 |
+
"[Hf]": 32584,
|
580 |
+
"[Hg+1]": 32849,
|
581 |
+
"[Hg+2]": 32218,
|
582 |
+
"[Hg-1]": 32664,
|
583 |
+
"[Hg-2]": 32515,
|
584 |
+
"[HgH1]": 32984,
|
585 |
+
"[Hg]": 32104,
|
586 |
+
"[Ho+3]": 32644,
|
587 |
+
"[Ho]": 32281,
|
588 |
+
"[Hs]": 32564,
|
589 |
+
"[I+1]": 32894,
|
590 |
+
"[I+2]": 32695,
|
591 |
+
"[I+3]": 32742,
|
592 |
+
"[I-1]": 32753,
|
593 |
+
"[IH0]": 32472,
|
594 |
+
"[IH1+1]": 32082,
|
595 |
+
"[IH1-1]": 32406,
|
596 |
+
"[IH2+1]": 32491,
|
597 |
+
"[I]": 32720,
|
598 |
+
"[In+1]": 32183,
|
599 |
+
"[In+3]": 32268,
|
600 |
+
"[In-1]": 32974,
|
601 |
+
"[InH1]": 32307,
|
602 |
+
"[InH2]": 32451,
|
603 |
+
"[InH3]": 32519,
|
604 |
+
"[In]": 32572,
|
605 |
+
"[Ir+1]": 32331,
|
606 |
+
"[Ir+2]": 32202,
|
607 |
+
"[Ir+3]": 32565,
|
608 |
+
"[Ir-2]": 32433,
|
609 |
+
"[Ir-3]": 32415,
|
610 |
+
"[IrH1+2]": 32528,
|
611 |
+
"[IrH1]": 32650,
|
612 |
+
"[IrH2]": 32780,
|
613 |
+
"[IrH3]": 32878,
|
614 |
+
"[Ir]": 32653,
|
615 |
+
"[K+1]": 32396,
|
616 |
+
"[K]": 32942,
|
617 |
+
"[Kr]": 32706,
|
618 |
+
"[La+3]": 32539,
|
619 |
+
"[La]": 32600,
|
620 |
+
"[Li+1]": 32030,
|
621 |
+
"[Li-1]": 32888,
|
622 |
+
"[Li]": 32287,
|
623 |
+
"[Lr]": 32191,
|
624 |
+
"[Lu+3]": 32358,
|
625 |
+
"[Lu]": 32205,
|
626 |
+
"[Md]": 32320,
|
627 |
+
"[Mg+1]": 32665,
|
628 |
+
"[Mg+2]": 32447,
|
629 |
+
"[Mg]": 32836,
|
630 |
+
"[Mn+1]": 32913,
|
631 |
+
"[Mn+2]": 32324,
|
632 |
+
"[Mn+3]": 32838,
|
633 |
+
"[Mn-2]": 32207,
|
634 |
+
"[MnH1+1]": 32457,
|
635 |
+
"[MnH1]": 32212,
|
636 |
+
"[Mn]": 32932,
|
637 |
+
"[Mo+1]": 32341,
|
638 |
+
"[Mo+2]": 32905,
|
639 |
+
"[Mo+3]": 32562,
|
640 |
+
"[Mo+4]": 32629,
|
641 |
+
"[Mo-2]": 32476,
|
642 |
+
"[MoH1]": 32488,
|
643 |
+
"[MoH2]": 32412,
|
644 |
+
"[Mo]": 32759,
|
645 |
+
"[Mt]": 32139,
|
646 |
+
"[N+1]": 32171,
|
647 |
+
"[N-1]": 32524,
|
648 |
+
"[N@+1]": 32520,
|
649 |
+
"[N@@+1]": 32805,
|
650 |
+
"[N@@H1+1]": 32289,
|
651 |
+
"[N@H1+1]": 32852,
|
652 |
+
"[N@]": 32298,
|
653 |
+
"[NH0]": 32815,
|
654 |
+
"[NH1+1]": 32626,
|
655 |
+
"[NH1-1]": 32121,
|
656 |
+
"[NH1]": 32485,
|
657 |
+
"[NH2+1]": 32946,
|
658 |
+
"[NH2-1]": 32103,
|
659 |
+
"[NH3+1]": 32067,
|
660 |
+
"[NH4+1]": 32910,
|
661 |
+
"[N]": 32349,
|
662 |
+
"[Na+1]": 32831,
|
663 |
+
"[Na]": 32827,
|
664 |
+
"[Nb+2]": 32323,
|
665 |
+
"[Nb+3]": 32428,
|
666 |
+
"[Nb+5]": 32105,
|
667 |
+
"[Nb-1]": 32371,
|
668 |
+
"[Nb-2]": 32813,
|
669 |
+
"[NbH3]": 32538,
|
670 |
+
"[Nb]": 32825,
|
671 |
+
"[Nd+3]": 32545,
|
672 |
+
"[Nd]": 32532,
|
673 |
+
"[Ne]": 32555,
|
674 |
+
"[Ni+1]": 32630,
|
675 |
+
"[Ni+2]": 32783,
|
676 |
+
"[Ni+3]": 32133,
|
677 |
+
"[Ni-1]": 32304,
|
678 |
+
"[Ni-2]": 32680,
|
679 |
+
"[Ni-3]": 32293,
|
680 |
+
"[NiH1]": 32241,
|
681 |
+
"[NiH2]": 32204,
|
682 |
+
"[Ni]": 32372,
|
683 |
+
"[No]": 32804,
|
684 |
+
"[Np]": 32355,
|
685 |
+
"[O+1]": 32151,
|
686 |
+
"[O-1]": 32473,
|
687 |
+
"[O-2]": 32483,
|
688 |
+
"[OH0]": 32729,
|
689 |
+
"[OH1+1]": 32951,
|
690 |
+
"[OH1-1]": 32194,
|
691 |
+
"[OH2+1]": 32063,
|
692 |
+
"[OH3+1]": 32117,
|
693 |
+
"[O]": 32767,
|
694 |
+
"[Os+1]": 32070,
|
695 |
+
"[Os+2]": 32928,
|
696 |
+
"[Os+4]": 32370,
|
697 |
+
"[Os+5]": 32160,
|
698 |
+
"[Os+6]": 32518,
|
699 |
+
"[Os+7]": 32088,
|
700 |
+
"[Os+8]": 32137,
|
701 |
+
"[Os-1]": 32968,
|
702 |
+
"[Os-2]": 32306,
|
703 |
+
"[Os-3]": 32398,
|
704 |
+
"[OsH1-1]": 32648,
|
705 |
+
"[OsH1]": 32823,
|
706 |
+
"[OsH2]": 32716,
|
707 |
+
"[Os]": 32662,
|
708 |
+
"[P+1]": 32198,
|
709 |
+
"[P-1]": 32657,
|
710 |
+
"[P-3]": 32421,
|
711 |
+
"[P@@]": 32437,
|
712 |
+
"[P@]": 32802,
|
713 |
+
"[PH0]": 32460,
|
714 |
+
"[PH1+1]": 32399,
|
715 |
+
"[PH1-1]": 32586,
|
716 |
+
"[PH1]": 32971,
|
717 |
+
"[PH2+1]": 32099,
|
718 |
+
"[PH2-1]": 32374,
|
719 |
+
"[PH2]": 32330,
|
720 |
+
"[PH3+1]": 32196,
|
721 |
+
"[PH3]": 32655,
|
722 |
+
"[PH4+1]": 32675,
|
723 |
+
"[PH4]": 32590,
|
724 |
+
"[P]": 32456,
|
725 |
+
"[Pa]": 32906,
|
726 |
+
"[Pb+2]": 32533,
|
727 |
+
"[PbH1]": 32077,
|
728 |
+
"[PbH2+2]": 32482,
|
729 |
+
"[PbH2]": 32553,
|
730 |
+
"[PbH3]": 32674,
|
731 |
+
"[PbH4]": 32826,
|
732 |
+
"[Pb]": 32525,
|
733 |
+
"[Pd+1]": 32891,
|
734 |
+
"[Pd+2]": 32701,
|
735 |
+
"[Pd-1]": 32821,
|
736 |
+
"[Pd-2]": 32795,
|
737 |
+
"[PdH1+1]": 32053,
|
738 |
+
"[PdH1]": 32385,
|
739 |
+
"[PdH2]": 32923,
|
740 |
+
"[Pd]": 32713,
|
741 |
+
"[Pm]": 32431,
|
742 |
+
"[PoH1]": 32245,
|
743 |
+
"[Po]": 32419,
|
744 |
+
"[Pr+3]": 32596,
|
745 |
+
"[Pr]": 32432,
|
746 |
+
"[Pt+1]": 32360,
|
747 |
+
"[Pt+2]": 32763,
|
748 |
+
"[Pt+4]": 32089,
|
749 |
+
"[Pt-1]": 32872,
|
750 |
+
"[Pt-2]": 32858,
|
751 |
+
"[PtH1+1]": 32161,
|
752 |
+
"[PtH1]": 32822,
|
753 |
+
"[PtH2]": 32470,
|
754 |
+
"[PtH3]": 32181,
|
755 |
+
"[Pt]": 32236,
|
756 |
+
"[Pu]": 32426,
|
757 |
+
"[Rb+1]": 32217,
|
758 |
+
"[Rb]": 32758,
|
759 |
+
"[Re-1]": 32782,
|
760 |
+
"[Re-2]": 32506,
|
761 |
+
"[ReH1]": 32589,
|
762 |
+
"[ReH2]": 32286,
|
763 |
+
"[ReH3]": 32620,
|
764 |
+
"[ReH4]": 32031,
|
765 |
+
"[ReH7]": 32329,
|
766 |
+
"[Re]": 32126,
|
767 |
+
"[Rf]": 32677,
|
768 |
+
"[Rh+1]": 32277,
|
769 |
+
"[Rh+2]": 32862,
|
770 |
+
"[Rh+3]": 32608,
|
771 |
+
"[Rh-1]": 32574,
|
772 |
+
"[Rh-2]": 32233,
|
773 |
+
"[Rh-3]": 32670,
|
774 |
+
"[RhH1+2]": 32391,
|
775 |
+
"[RhH1]": 32869,
|
776 |
+
"[RhH2]": 32272,
|
777 |
+
"[RhH3]": 32120,
|
778 |
+
"[Rh]": 32516,
|
779 |
+
"[Ring1]": 32884,
|
780 |
+
"[Ring2]": 32028,
|
781 |
+
"[Ring3]": 32786,
|
782 |
+
"[Rn]": 32645,
|
783 |
+
"[Ru+1]": 32408,
|
784 |
+
"[Ru+2]": 32321,
|
785 |
+
"[Ru+3]": 32943,
|
786 |
+
"[Ru+4]": 32818,
|
787 |
+
"[Ru+5]": 32566,
|
788 |
+
"[Ru+6]": 32939,
|
789 |
+
"[Ru+8]": 32052,
|
790 |
+
"[Ru-1]": 32141,
|
791 |
+
"[Ru-2]": 32471,
|
792 |
+
"[Ru-3]": 32486,
|
793 |
+
"[Ru-4]": 32227,
|
794 |
+
"[RuH1+1]": 32132,
|
795 |
+
"[RuH1+2]": 32325,
|
796 |
+
"[RuH1+3]": 32504,
|
797 |
+
"[RuH1-1]": 32273,
|
798 |
+
"[RuH1]": 32443,
|
799 |
+
"[RuH2]": 32693,
|
800 |
+
"[RuH3]": 32690,
|
801 |
+
"[RuH4]": 32462,
|
802 |
+
"[Ru]": 32439,
|
803 |
+
"[S+1]": 32638,
|
804 |
+
"[S-1]": 32035,
|
805 |
+
"[S-2]": 32848,
|
806 |
+
"[S@+1]": 32100,
|
807 |
+
"[S@@+1]": 32514,
|
808 |
+
"[S@@]": 32546,
|
809 |
+
"[S@]": 32784,
|
810 |
+
"[SH0]": 32940,
|
811 |
+
"[SH1+1]": 32817,
|
812 |
+
"[SH1-1]": 32127,
|
813 |
+
"[SH1]": 32267,
|
814 |
+
"[SH2+1]": 32521,
|
815 |
+
"[SH2]": 32925,
|
816 |
+
"[SH3+1]": 32186,
|
817 |
+
"[SH3]": 32929,
|
818 |
+
"[SH4]": 32116,
|
819 |
+
"[S]": 32610,
|
820 |
+
"[Sb+1]": 32317,
|
821 |
+
"[Sb+3]": 32377,
|
822 |
+
"[Sb+5]": 32820,
|
823 |
+
"[Sb-1]": 32715,
|
824 |
+
"[SbH1+1]": 32769,
|
825 |
+
"[SbH1]": 32042,
|
826 |
+
"[SbH2+1]": 32580,
|
827 |
+
"[SbH2]": 32459,
|
828 |
+
"[SbH3+1]": 32484,
|
829 |
+
"[SbH4]": 32867,
|
830 |
+
"[Sb]": 32363,
|
831 |
+
"[Sc+3]": 32344,
|
832 |
+
"[Sc]": 32725,
|
833 |
+
"[Se+1]": 32336,
|
834 |
+
"[Se+4]": 32222,
|
835 |
+
"[Se-1]": 32614,
|
836 |
+
"[Se-2]": 32920,
|
837 |
+
"[SeH1+1]": 32145,
|
838 |
+
"[SeH1-1]": 32414,
|
839 |
+
"[SeH1]": 32102,
|
840 |
+
"[SeH2+1]": 32313,
|
841 |
+
"[SeH2]": 32440,
|
842 |
+
"[SeH3+1]": 32842,
|
843 |
+
"[SeH5]": 32723,
|
844 |
+
"[Se]": 32577,
|
845 |
+
"[Sg]": 32595,
|
846 |
+
"[Si+1]": 32041,
|
847 |
+
"[Si+2]": 32235,
|
848 |
+
"[Si+3]": 32866,
|
849 |
+
"[Si+4]": 32703,
|
850 |
+
"[Si-1]": 32770,
|
851 |
+
"[SiH1+1]": 32924,
|
852 |
+
"[SiH1-1]": 32976,
|
853 |
+
"[SiH1]": 32080,
|
854 |
+
"[SiH2+1]": 32771,
|
855 |
+
"[SiH2-1]": 32047,
|
856 |
+
"[SiH2]": 32964,
|
857 |
+
"[SiH3-1]": 32192,
|
858 |
+
"[SiH3]": 32130,
|
859 |
+
"[SiH4-1]": 32752,
|
860 |
+
"[SiH4]": 32682,
|
861 |
+
"[Si]": 32714,
|
862 |
+
"[Sm+3]": 32953,
|
863 |
+
"[Sm]": 32972,
|
864 |
+
"[Sn+1]": 32490,
|
865 |
+
"[Sn+2]": 32981,
|
866 |
+
"[Sn+3]": 32468,
|
867 |
+
"[Sn+4]": 32282,
|
868 |
+
"[Sn-1]": 32411,
|
869 |
+
"[SnH1+1]": 32548,
|
870 |
+
"[SnH1+3]": 32261,
|
871 |
+
"[SnH1]": 32669,
|
872 |
+
"[SnH2+2]": 32143,
|
873 |
+
"[SnH2-1]": 32418,
|
874 |
+
"[SnH2]": 32743,
|
875 |
+
"[SnH3]": 32944,
|
876 |
+
"[SnH4]": 32846,
|
877 |
+
"[Sn]": 32736,
|
878 |
+
"[Sr+2]": 32436,
|
879 |
+
"[Sr]": 32269,
|
880 |
+
"[Ta+2]": 32643,
|
881 |
+
"[Ta+5]": 32224,
|
882 |
+
"[Ta-1]": 32149,
|
883 |
+
"[TaH1]": 32735,
|
884 |
+
"[TaH2]": 32652,
|
885 |
+
"[TaH3]": 32442,
|
886 |
+
"[Ta]": 32712,
|
887 |
+
"[Tb+3]": 32351,
|
888 |
+
"[Tb+4]": 32264,
|
889 |
+
"[Tb]": 32056,
|
890 |
+
"[Tc+1]": 32060,
|
891 |
+
"[Tc+4]": 32368,
|
892 |
+
"[Tc+5]": 32251,
|
893 |
+
"[Tc+6]": 32570,
|
894 |
+
"[Tc+7]": 32065,
|
895 |
+
"[Tc]": 32394,
|
896 |
+
"[Te+1]": 32535,
|
897 |
+
"[Te+4]": 32722,
|
898 |
+
"[TeH1]": 32225,
|
899 |
+
"[TeH2+1]": 32522,
|
900 |
+
"[TeH2]": 32177,
|
901 |
+
"[Te]": 32033,
|
902 |
+
"[Th+2]": 32618,
|
903 |
+
"[Th+4]": 32175,
|
904 |
+
"[ThH1]": 32243,
|
905 |
+
"[ThH2]": 32164,
|
906 |
+
"[Th]": 32109,
|
907 |
+
"[Ti+1]": 32182,
|
908 |
+
"[Ti+2]": 32345,
|
909 |
+
"[Ti+3]": 32200,
|
910 |
+
"[Ti+4]": 32112,
|
911 |
+
"[Ti-1]": 32311,
|
912 |
+
"[Ti-2]": 32478,
|
913 |
+
"[TiH1+1]": 32167,
|
914 |
+
"[TiH1]": 32157,
|
915 |
+
"[Ti]": 32633,
|
916 |
+
"[Tl+1]": 32397,
|
917 |
+
"[Tl+3]": 32800,
|
918 |
+
"[TlH1]": 32258,
|
919 |
+
"[TlH2]": 32819,
|
920 |
+
"[Tl]": 32760,
|
921 |
+
"[Tm+3]": 32527,
|
922 |
+
"[Tm]": 32561,
|
923 |
+
"[U+2]": 32691,
|
924 |
+
"[U+3]": 32197,
|
925 |
+
"[U+4]": 32354,
|
926 |
+
"[UH1]": 32479,
|
927 |
+
"[UH2]": 32952,
|
928 |
+
"[UH3]": 32169,
|
929 |
+
"[U]": 32308,
|
930 |
+
"[V+2]": 32096,
|
931 |
+
"[V+4]": 32915,
|
932 |
+
"[VH1]": 32450,
|
933 |
+
"[VH2]": 32280,
|
934 |
+
"[V]": 32980,
|
935 |
+
"[W+1]": 32248,
|
936 |
+
"[W+2]": 32449,
|
937 |
+
"[WH1]": 32765,
|
938 |
+
"[WH2]": 32900,
|
939 |
+
"[WH3]": 32189,
|
940 |
+
"[W]": 32448,
|
941 |
+
"[XeH1]": 32794,
|
942 |
+
"[Xe]": 32616,
|
943 |
+
"[Y+3]": 32078,
|
944 |
+
"[YH1]": 32347,
|
945 |
+
"[YH2]": 32901,
|
946 |
+
"[Y]": 32352,
|
947 |
+
"[Yb+2]": 32861,
|
948 |
+
"[Yb+3]": 32500,
|
949 |
+
"[YbH2]": 32851,
|
950 |
+
"[Yb]": 32203,
|
951 |
+
"[Zn+1]": 32124,
|
952 |
+
"[Zn+2]": 32290,
|
953 |
+
"[Zn-1]": 32678,
|
954 |
+
"[Zn-2]": 32781,
|
955 |
+
"[ZnH1+1]": 32480,
|
956 |
+
"[ZnH1]": 32534,
|
957 |
+
"[ZnH2]": 32367,
|
958 |
+
"[Zn]": 32977,
|
959 |
+
"[Zr+2]": 32238,
|
960 |
+
"[Zr+3]": 32314,
|
961 |
+
"[Zr+4]": 32613,
|
962 |
+
"[Zr-1]": 32639,
|
963 |
+
"[Zr-2]": 32791,
|
964 |
+
"[Zr-3]": 32774,
|
965 |
+
"[Zr-4]": 32059,
|
966 |
+
"[ZrH1]": 32744,
|
967 |
+
"[ZrH2]": 32037,
|
968 |
+
"[Zr]": 32865,
|
969 |
+
"[\\Br]": 32523,
|
970 |
+
"[\\C@@H1]": 32686,
|
971 |
+
"[\\C@@]": 32266,
|
972 |
+
"[\\C@H1]": 32717,
|
973 |
+
"[\\C@]": 32246,
|
974 |
+
"[\\C]": 32424,
|
975 |
+
"[\\Cl]": 32931,
|
976 |
+
"[\\F]": 32529,
|
977 |
+
"[\\I]": 32386,
|
978 |
+
"[\\N+1]": 32403,
|
979 |
+
"[\\N-1]": 32092,
|
980 |
+
"[\\NH1+1]": 32766,
|
981 |
+
"[\\NH1]": 32051,
|
982 |
+
"[\\NH2+1]": 32627,
|
983 |
+
"[\\N]": 32178,
|
984 |
+
"[\\O-1]": 32605,
|
985 |
+
"[\\O]": 32797,
|
986 |
+
"[\\S]": 32543
|
987 |
+
}
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"bos_token_id": 1,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"hidden_act": "silu",
|
9 |
+
"hidden_size": 4096,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 11008,
|
12 |
+
"max_position_embeddings": 4096,
|
13 |
+
"model_type": "llama",
|
14 |
+
"num_attention_heads": 32,
|
15 |
+
"num_hidden_layers": 32,
|
16 |
+
"num_key_value_heads": 32,
|
17 |
+
"pad_token_id": 0,
|
18 |
+
"pretraining_tp": 1,
|
19 |
+
"rms_norm_eps": 1e-05,
|
20 |
+
"rope_scaling": null,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"torch_dtype": "float16",
|
23 |
+
"transformers_version": "4.31.0",
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 32985
|
26 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21c8f677d6459255e42c44ad3fdbbf5c94e2e78b04084880e52bbe8a9135e44d
|
3 |
+
size 13493010094
|
special_tokens_map.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"unk_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
}
|
23 |
+
}
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
tokenizer_config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"bos_token": {
|
5 |
+
"__type": "AddedToken",
|
6 |
+
"content": "<s>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"clean_up_tokenization_spaces": false,
|
13 |
+
"eos_token": {
|
14 |
+
"__type": "AddedToken",
|
15 |
+
"content": "</s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": false,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"legacy": false,
|
22 |
+
"model_max_length": 1000000000000000019884624838656,
|
23 |
+
"pad_token": null,
|
24 |
+
"padding_side": "right",
|
25 |
+
"sp_model_kwargs": {},
|
26 |
+
"tokenizer_class": "LlamaTokenizer",
|
27 |
+
"unk_token": {
|
28 |
+
"__type": "AddedToken",
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": false,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false
|
34 |
+
}
|
35 |
+
}
|