Upload 11 files
Browse files- .gitattributes +2 -0
- 3rd_party_licenses.txt +48 -0
- License.txt +1 -0
- Readme.txt +10 -0
- genai_config.json +54 -0
- model.onnx +3 -0
- model.onnx_data +3 -0
- quantization_log.txt +45 -0
- quantization_pip.txt +307 -0
- special_tokens_map.json +16 -0
- tokenizer.json +3 -0
- tokenizer_config.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
37 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
3rd_party_licenses.txt
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
1: Name: TensorRT Model Optimizer - Windows
|
2 |
+
Version: 0.19
|
3 |
+
LicenseText: MIT License
|
4 |
+
|
5 |
+
Copyright (c) 2023 MIT HAN Lab
|
6 |
+
|
7 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
8 |
+
of this software and associated documentation files (the "Software"), to deal
|
9 |
+
in the Software without restriction, including without limitation the rights
|
10 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11 |
+
copies of the Software, and to permit persons to whom the Software is
|
12 |
+
furnished to do so, subject to the following conditions:
|
13 |
+
|
14 |
+
The above copyright notice and this permission notice shall be included in all
|
15 |
+
copies or substantial portions of the Software.
|
16 |
+
|
17 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
18 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
19 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
20 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
21 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
22 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23 |
+
SOFTWARE.
|
24 |
+
|
25 |
+
|
26 |
+
2: Name: onnxruntime-genai-directml
|
27 |
+
Version: 0.4.0
|
28 |
+
LicenseText: MIT License
|
29 |
+
|
30 |
+
Copyright (c) Microsoft Corporation.
|
31 |
+
|
32 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
33 |
+
of this software and associated documentation files (the "Software"), to deal
|
34 |
+
in the Software without restriction, including without limitation the rights
|
35 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
36 |
+
copies of the Software, and to permit persons to whom the Software is
|
37 |
+
furnished to do so, subject to the following conditions:
|
38 |
+
|
39 |
+
The above copyright notice and this permission notice shall be included in all
|
40 |
+
copies or substantial portions of the Software.
|
41 |
+
|
42 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
43 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
44 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
45 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
46 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
47 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
48 |
+
SOFTWARE
|
License.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
GOVERNING TERMS: Use of this model is governed by the NVIDIA Open Model License Agreement (found at https://developer.download.nvidia.com/licenses/nvidia-open-model-license-agreement-june-2024.pdf). ADDITIONAL INFORMATION: Apache License, Version 2.0 (found at https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md).
|
Readme.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
To run inference with this model, please follow below steps -
|
2 |
+
|
3 |
+
1. Install Nvidia Graphics driver R565 or higher.
|
4 |
+
2. Install python 3.10.11.
|
5 |
+
3. Setup GenAI DirectML via
|
6 |
+
a. Install numpy - pip install numpy==2.1.0
|
7 |
+
b. Install GenAI - pip install onnxruntime-genai-directml==0.5.0
|
8 |
+
4. Download inference script - curl https://github.com/microsoft/onnxruntime-genai/blob/rel-0.5.0/examples/python/phi3-qa.py -o phi3-qa.py
|
9 |
+
5. Run inference - python phi3-qa.py -m <model-downloaded-path>
|
10 |
+
a. Enter prompt - "What is GenAI?"
|
genai_config.json
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": {
|
3 |
+
"bos_token_id": 2,
|
4 |
+
"context_length": 4096,
|
5 |
+
"decoder": {
|
6 |
+
"session_options": {
|
7 |
+
"log_id": "onnxruntime-genai",
|
8 |
+
"provider_options": [
|
9 |
+
{
|
10 |
+
"dml": {}
|
11 |
+
}
|
12 |
+
]
|
13 |
+
},
|
14 |
+
"filename": "model.onnx",
|
15 |
+
"head_size": 128,
|
16 |
+
"hidden_size": 3072,
|
17 |
+
"inputs": {
|
18 |
+
"input_ids": "input_ids",
|
19 |
+
"attention_mask": "attention_mask",
|
20 |
+
"position_ids": "position_ids",
|
21 |
+
"past_key_names": "past_key_values.%d.key",
|
22 |
+
"past_value_names": "past_key_values.%d.value"
|
23 |
+
},
|
24 |
+
"outputs": {
|
25 |
+
"logits": "logits",
|
26 |
+
"present_key_names": "present.%d.key",
|
27 |
+
"present_value_names": "present.%d.value"
|
28 |
+
},
|
29 |
+
"num_attention_heads": 24,
|
30 |
+
"num_hidden_layers": 32,
|
31 |
+
"num_key_value_heads": 8
|
32 |
+
},
|
33 |
+
"eos_token_id": 3,
|
34 |
+
"pad_token_id": 3,
|
35 |
+
"type": "nemotron",
|
36 |
+
"vocab_size": 256000
|
37 |
+
},
|
38 |
+
"search": {
|
39 |
+
"diversity_penalty": 0.0,
|
40 |
+
"do_sample": false,
|
41 |
+
"early_stopping": true,
|
42 |
+
"length_penalty": 1.0,
|
43 |
+
"max_length": 4096,
|
44 |
+
"min_length": 0,
|
45 |
+
"no_repeat_ngram_size": 0,
|
46 |
+
"num_beams": 1,
|
47 |
+
"num_return_sequences": 1,
|
48 |
+
"past_present_share_buffer": true,
|
49 |
+
"repetition_penalty": 1.0,
|
50 |
+
"temperature": 1.0,
|
51 |
+
"top_k": 1,
|
52 |
+
"top_p": 1.0
|
53 |
+
}
|
54 |
+
}
|
model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e5edb075cb4642ce920845ac7372861217418448ef3604a2c643ef7fa9bc0c2
|
3 |
+
size 394332
|
model.onnx_data
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27dfc997144ec05ae7d374d11ab626b55d0a7184c933d7961065ae6a6650911e
|
3 |
+
size 4498141184
|
quantization_log.txt
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
(modelopt) PS E:\ModelOpt_Windows_Scripts_2\modelopt-windows-scripts\ONNX_PTQ> python quantize_script.py --model_name=nvidia/Nemotron-Mini-4B-Instruct --onnx_path=E:\model_store\genai\nemotron-mini-4b-instruct-fp16-dml-genai\opset_21\model.onnx --output_path="E:\model_store\genai\nemotron-mini-4b-instruct-fp16-dml-genai\opset_21\default_quant_dml_ep_calib\model.onnx"
|
2 |
+
|
3 |
+
--Quantize-Script-- algo=awq_lite, dataset=cnn, calib_size=32, batch_size=1, block_size=128, add-position-ids=True, past-kv=True, rcalib=False, device=cpu, use_zero_point=False
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
--Quantize-Script-- awqlite_alpha_step=0.1, awqlite_fuse_nodes=False, awqlite_run_per_subgraph=False, awqclip_alpha_step=0.05, awqclip_alpha_min=0.5, awqclip_bsz_col=1024, calibration_eps=['dml']
|
8 |
+
|
9 |
+
C:\Users\vrl\miniconda3\envs\modelopt\Lib\site-packages\transformers\models\auto\configuration_auto.py:1002: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
|
10 |
+
warnings.warn(
|
11 |
+
C:\Users\vrl\miniconda3\envs\modelopt\Lib\site-packages\transformers\models\auto\tokenization_auto.py:809: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.
|
12 |
+
warnings.warn(
|
13 |
+
|
14 |
+
--Quantize-Script-- number_of_batched_samples=32, batch-input-ids-list-len=32, batched_attention_mask=32
|
15 |
+
|
16 |
+
|
17 |
+
--Quantize-Script-- number of batched inputs = 32
|
18 |
+
|
19 |
+
INFO:root:
|
20 |
+
Quantizing the model....
|
21 |
+
|
22 |
+
INFO:root:Quantization Mode: int4
|
23 |
+
INFO:root:Finding quantizable weights and augmenting graph output with input activations
|
24 |
+
INFO:root:Augmenting took 0.03900003433227539 seconds
|
25 |
+
INFO:root:Saving the model took 35.37520098686218 seconds
|
26 |
+
2024-11-05 06:08:38.8247274 [W:onnxruntime:, session_state.cc:1168 onnxruntime::VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.
|
27 |
+
2024-11-05 06:08:38.8385074 [W:onnxruntime:, session_state.cc:1170 onnxruntime::VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.
|
28 |
+
Getting activation names maps...: 100%|██████████████████████████████████████████████████████| 192/192 [00:00<?, ?it/s]
|
29 |
+
Running AWQ scale search per node...: 100%|██████████████████████████████████████████| 192/192 [05:08<00:00, 1.61s/it]
|
30 |
+
INFO:root:AWQ scale search took 308.7233784198761 seconds
|
31 |
+
Quantizing the weights...: 100%|█████████████████████████████████████████████████████| 192/192 [00:05<00:00, 32.75it/s]
|
32 |
+
INFO:root:Quantizing actual weights took 5.864110231399536 seconds
|
33 |
+
INFO:root:Inserting DQ nodes and input_pre_quant_scale node using quantized weights and scales ...
|
34 |
+
INFO:root:Inserting nodes took 0.1272134780883789 seconds
|
35 |
+
INFO:root:Exporting the quantized graph ...
|
36 |
+
Loading extension modelopt_round_and_pack_ext...
|
37 |
+
|
38 |
+
INFO:root:Exporting took 33.892990589141846 seconds
|
39 |
+
INFO:root:
|
40 |
+
Quantization process took 394.4490396976471 seconds
|
41 |
+
INFO:root:Saving to E:\model_store\genai\nemotron-mini-4b-instruct-fp16-dml-genai\opset_21\default_quant_dml_ep_calib\model.onnx took 33.43196678161621 seconds
|
42 |
+
|
43 |
+
Done
|
44 |
+
|
45 |
+
(modelopt) PS E:\ModelOpt_Windows_Scripts_2\modelopt-windows-scripts\ONNX_PTQ>
|
quantization_pip.txt
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
(modelopt) PS E:\ModelOpt_Windows_Scripts_2\modelopt-windows-scripts\ONNX_PTQ> pip list
|
2 |
+
Package Version Editable project location
|
3 |
+
----------------------------- --------------------------------- -------------------------
|
4 |
+
absl-py 2.1.0
|
5 |
+
accelerate 0.34.2
|
6 |
+
aiohttp 3.9.5
|
7 |
+
aiosignal 1.3.1
|
8 |
+
alabaster 0.7.16
|
9 |
+
annotated-types 0.6.0
|
10 |
+
anyio 4.3.0
|
11 |
+
asttokens 2.4.1
|
12 |
+
astunparse 1.6.3
|
13 |
+
attrs 23.2.0
|
14 |
+
autodoc_pydantic 2.2.0
|
15 |
+
Babel 2.15.0
|
16 |
+
backports.tarfile 1.1.1
|
17 |
+
bandit 1.7.9
|
18 |
+
beautifulsoup4 4.12.3
|
19 |
+
black 24.4.2
|
20 |
+
bleach 6.1.0
|
21 |
+
boto3 1.35.0
|
22 |
+
botocore 1.35.0
|
23 |
+
build 1.2.1
|
24 |
+
cachetools 5.5.0
|
25 |
+
certifi 2024.2.2
|
26 |
+
cffi 1.17.0
|
27 |
+
cfgv 3.4.0
|
28 |
+
chardet 5.2.0
|
29 |
+
charset-normalizer 3.3.2
|
30 |
+
click 8.1.7
|
31 |
+
cloudpickle 3.0.0
|
32 |
+
colorama 0.4.6
|
33 |
+
colored 2.2.4
|
34 |
+
coloredlogs 15.0.1
|
35 |
+
comm 0.2.2
|
36 |
+
contourpy 1.2.1
|
37 |
+
coverage 7.5.1
|
38 |
+
cppimport 22.8.2
|
39 |
+
cuda-python 12.3.0
|
40 |
+
cupy-cuda12x 13.3.0
|
41 |
+
cycler 0.12.1
|
42 |
+
Cython 3.0.10
|
43 |
+
DataProperty 1.0.1
|
44 |
+
datasets 2.19.0
|
45 |
+
decorator 5.1.1
|
46 |
+
defusedxml 0.7.1
|
47 |
+
Deprecated 1.2.14
|
48 |
+
diffusers 0.29.2
|
49 |
+
dill 0.3.8
|
50 |
+
distlib 0.3.8
|
51 |
+
docutils 0.20.1
|
52 |
+
evaluate 0.4.2
|
53 |
+
executing 2.0.1
|
54 |
+
fastjsonschema 2.19.1
|
55 |
+
fastrlock 0.8.2
|
56 |
+
filelock 3.15.4
|
57 |
+
fire 0.6.0
|
58 |
+
flatbuffers 24.3.25
|
59 |
+
fonttools 4.51.0
|
60 |
+
frozenlist 1.4.1
|
61 |
+
fsspec 2024.3.1
|
62 |
+
google-api-core 2.19.1
|
63 |
+
google-api-python-client 2.141.0
|
64 |
+
google-auth 2.34.0
|
65 |
+
google-auth-httplib2 0.2.0
|
66 |
+
googleapis-common-protos 1.63.2
|
67 |
+
GPUtil 1.4.0
|
68 |
+
h11 0.14.0
|
69 |
+
h5py 3.11.0
|
70 |
+
httplib2 0.22.0
|
71 |
+
huggingface-hub 0.24.0
|
72 |
+
humanfriendly 10.0
|
73 |
+
identify 2.5.36
|
74 |
+
idna 3.7
|
75 |
+
imagesize 1.4.1
|
76 |
+
importlib_metadata 7.1.0
|
77 |
+
iniconfig 2.0.0
|
78 |
+
intel-openmp 2021.4.0
|
79 |
+
ipython 8.24.0
|
80 |
+
ipywidgets 8.1.2
|
81 |
+
janus 1.0.0
|
82 |
+
jaraco.classes 3.4.0
|
83 |
+
jaraco.context 5.3.0
|
84 |
+
jaraco.functools 4.0.1
|
85 |
+
jax 0.4.31
|
86 |
+
jaxlib 0.4.31
|
87 |
+
jedi 0.19.1
|
88 |
+
Jinja2 3.1.3
|
89 |
+
jmespath 1.0.1
|
90 |
+
joblib 1.4.2
|
91 |
+
jsonlines 4.0.0
|
92 |
+
jsonschema 4.22.0
|
93 |
+
jsonschema-specifications 2023.12.1
|
94 |
+
jupyter_client 8.6.1
|
95 |
+
jupyter_core 5.7.2
|
96 |
+
jupyterlab_pygments 0.3.0
|
97 |
+
jupyterlab_widgets 3.0.10
|
98 |
+
keyring 25.2.0
|
99 |
+
kiwisolver 1.4.5
|
100 |
+
lm_eval 0.4.2
|
101 |
+
lora 0.3.0
|
102 |
+
lxml 5.2.2
|
103 |
+
Mako 1.3.5
|
104 |
+
markdown-it-py 3.0.0
|
105 |
+
MarkupSafe 2.1.5
|
106 |
+
maskprop 0.1.3
|
107 |
+
matplotlib 3.8.4
|
108 |
+
matplotlib-inline 0.1.7
|
109 |
+
mbstrdecoder 1.1.3
|
110 |
+
mdurl 0.1.2
|
111 |
+
mistune 3.0.2
|
112 |
+
mkl 2021.4.0
|
113 |
+
ml-dtypes 0.4.0
|
114 |
+
more-itertools 10.2.0
|
115 |
+
mpi4py 3.1.6
|
116 |
+
mpmath 1.3.0
|
117 |
+
multidict 6.0.5
|
118 |
+
multiprocess 0.70.16
|
119 |
+
mypy 1.11.2
|
120 |
+
mypy-extensions 1.0.0
|
121 |
+
nbclient 0.10.0
|
122 |
+
nbconvert 7.16.4
|
123 |
+
nbformat 5.10.4
|
124 |
+
nbsphinx 0.9.4
|
125 |
+
networkx 3.3
|
126 |
+
neural_compressor 3.0.1
|
127 |
+
nh3 0.2.17
|
128 |
+
ninja 1.11.1.1
|
129 |
+
nltk 3.8.1
|
130 |
+
nodeenv 1.8.0
|
131 |
+
numexpr 2.10.0
|
132 |
+
numpy 1.26.4
|
133 |
+
nvidia-cublas-cu12 12.5.2.13
|
134 |
+
nvidia-cuda-nvrtc-cu12 12.5.40
|
135 |
+
nvidia-cuda-runtime-cu12 12.4.127
|
136 |
+
nvidia-cudnn-cu12 9.1.1.17
|
137 |
+
nvidia-modelopt 0.12.2.dev106+g2ea947ec.d20240614 D:\__ammo_code__\modelopt
|
138 |
+
onnx 1.16.0
|
139 |
+
onnx-graphsurgeon 0.5.2
|
140 |
+
onnxconverter-common 1.14.0
|
141 |
+
onnxmltools 1.12.0
|
142 |
+
onnxruntime-directml 1.20.0
|
143 |
+
onnxscript 0.1.0.dev20241104
|
144 |
+
opencv-python 4.9.0.80
|
145 |
+
opencv-python-headless 4.10.0.84
|
146 |
+
opt-einsum 3.3.0
|
147 |
+
optimum 1.21.4
|
148 |
+
outcome 1.3.0.post0
|
149 |
+
packaging 24.1
|
150 |
+
pandas 2.2.2
|
151 |
+
pandocfilters 1.5.1
|
152 |
+
parameterized 0.9.0
|
153 |
+
parso 0.8.4
|
154 |
+
pathspec 0.12.1
|
155 |
+
pathvalidate 3.2.0
|
156 |
+
pbr 6.0.0
|
157 |
+
peft 0.13.0
|
158 |
+
pillow 10.2.0
|
159 |
+
pip 23.3.1
|
160 |
+
pkginfo 1.10.0
|
161 |
+
platformdirs 4.2.2
|
162 |
+
pluggy 1.5.0
|
163 |
+
polygraphy 0.49.9
|
164 |
+
portalocker 2.8.2
|
165 |
+
pre-commit 3.7.0
|
166 |
+
prettytable 3.11.0
|
167 |
+
prompt-toolkit 3.0.43
|
168 |
+
proto-plus 1.24.0
|
169 |
+
protobuf 3.20.2
|
170 |
+
psutil 5.9.8
|
171 |
+
PuLP 2.8.0
|
172 |
+
pure-eval 0.2.2
|
173 |
+
py-cpuinfo 9.0.0
|
174 |
+
pyarrow 16.0.0
|
175 |
+
pyarrow-hotfix 0.6
|
176 |
+
pyasn1 0.6.0
|
177 |
+
pyasn1_modules 0.4.0
|
178 |
+
pybind11 2.12.0
|
179 |
+
pycocotools 2.0.8
|
180 |
+
pycparser 2.22
|
181 |
+
pycuda 2024.1.2
|
182 |
+
pydantic 2.7.1
|
183 |
+
pydantic_core 2.18.2
|
184 |
+
pydantic-settings 2.2.1
|
185 |
+
Pygments 2.17.2
|
186 |
+
pynvml 11.5.0
|
187 |
+
pypandoc 1.13
|
188 |
+
pyparsing 3.1.2
|
189 |
+
pyproject-api 1.7.1
|
190 |
+
pyproject_hooks 1.1.0
|
191 |
+
pyreadline3 3.4.1
|
192 |
+
PySocks 1.7.1
|
193 |
+
pytablewriter 1.2.0
|
194 |
+
pytest 8.2.0
|
195 |
+
pytest-asyncio 0.23.6
|
196 |
+
pytest-cov 5.0.0
|
197 |
+
pytest-timeout 2.3.1
|
198 |
+
python-dateutil 2.9.0.post0
|
199 |
+
python-dotenv 1.0.1
|
200 |
+
python-magic 0.4.27
|
201 |
+
pytools 2024.1.13
|
202 |
+
pytz 2024.1
|
203 |
+
pywin32 306
|
204 |
+
pywin32-ctypes 0.2.2
|
205 |
+
PyYAML 6.0.1
|
206 |
+
pyzmq 26.0.3
|
207 |
+
readme_renderer 43.0
|
208 |
+
referencing 0.35.1
|
209 |
+
regex 2024.4.28
|
210 |
+
requests 2.31.0
|
211 |
+
requests-toolbelt 1.0.0
|
212 |
+
rfc3986 2.0.0
|
213 |
+
rich 13.7.1
|
214 |
+
rouge-score 0.1.2
|
215 |
+
rpds-py 0.18.1
|
216 |
+
rsa 4.9
|
217 |
+
ruff 0.6.4
|
218 |
+
s3transfer 0.10.2
|
219 |
+
sacrebleu 2.4.2
|
220 |
+
safetensors 0.4.3
|
221 |
+
schema 0.7.7
|
222 |
+
scikit-cuda 0.5.3
|
223 |
+
scikit-learn 1.5.0
|
224 |
+
scipy 1.13.0
|
225 |
+
seaborn 0.13.2
|
226 |
+
selenium 4.23.1
|
227 |
+
sentencepiece 0.2.0
|
228 |
+
setuptools 68.2.2
|
229 |
+
setuptools-scm 8.1.0
|
230 |
+
six 1.16.0
|
231 |
+
skipy 0.2.0
|
232 |
+
skl2onnx 1.17.0
|
233 |
+
sniffio 1.3.1
|
234 |
+
snowballstemmer 2.2.0
|
235 |
+
sortedcontainers 2.4.0
|
236 |
+
soupsieve 2.5
|
237 |
+
Sphinx 7.2.6
|
238 |
+
sphinx-autobuild 2024.4.16
|
239 |
+
sphinx-copybutton 0.5.2
|
240 |
+
sphinx_inline_tabs 2023.4.21
|
241 |
+
sphinx-rtd-theme 2.0.0
|
242 |
+
sphinx-togglebutton 0.3.2
|
243 |
+
sphinxcontrib-applehelp 1.0.8
|
244 |
+
sphinxcontrib-devhelp 1.0.6
|
245 |
+
sphinxcontrib-htmlhelp 2.0.5
|
246 |
+
sphinxcontrib-jquery 4.1
|
247 |
+
sphinxcontrib-jsmath 1.0.1
|
248 |
+
sphinxcontrib-qthelp 1.0.7
|
249 |
+
sphinxcontrib-serializinghtml 1.1.10
|
250 |
+
sqlitedict 2.1.0
|
251 |
+
stack-data 0.6.3
|
252 |
+
starlette 0.37.2
|
253 |
+
stevedore 5.2.0
|
254 |
+
StrEnum 0.4.15
|
255 |
+
sympy 1.12
|
256 |
+
tabledata 1.3.3
|
257 |
+
tabulate 0.9.0
|
258 |
+
tbb 2021.12.0
|
259 |
+
tcolorpy 0.1.6
|
260 |
+
tensorrt 10.1.0
|
261 |
+
tensorrt-bindings 9.3.0.post12.dev1
|
262 |
+
tensorrt-cu12 10.1.0
|
263 |
+
tensorrt-cu12_bindings 10.1.0
|
264 |
+
tensorrt-cu12_libs 10.1.0
|
265 |
+
termcolor 2.4.0
|
266 |
+
threadpoolctl 3.5.0
|
267 |
+
timm 0.9.16
|
268 |
+
tinycss2 1.3.0
|
269 |
+
tokenizers 0.20.2
|
270 |
+
toml 0.10.2
|
271 |
+
torch 2.3.0+cu118
|
272 |
+
torchaudio 2.3.0+cu118
|
273 |
+
torchprofile 0.0.4
|
274 |
+
torchsurgeon 0.1.2
|
275 |
+
torchvision 0.18.0+cu118
|
276 |
+
tornado 6.4
|
277 |
+
tox 4.17.1
|
278 |
+
tox-current-env 0.0.12
|
279 |
+
tqdm 4.66.2
|
280 |
+
tqdm-multiprocess 0.0.11
|
281 |
+
traitlets 5.14.3
|
282 |
+
transformers 4.46.1
|
283 |
+
trio 0.26.2
|
284 |
+
trio-websocket 0.11.1
|
285 |
+
twine 5.0.0
|
286 |
+
typepy 1.3.2
|
287 |
+
typing_extensions 4.11.0
|
288 |
+
tzdata 2024.1
|
289 |
+
uritemplate 4.1.1
|
290 |
+
urllib3 2.2.1
|
291 |
+
uvicorn 0.29.0
|
292 |
+
virtualenv 20.26.3
|
293 |
+
watchfiles 0.21.0
|
294 |
+
wcwidth 0.2.13
|
295 |
+
webencodings 0.5.1
|
296 |
+
websocket-client 1.8.0
|
297 |
+
websockets 12.0
|
298 |
+
wheel 0.41.2
|
299 |
+
widgetsnbextension 4.0.10
|
300 |
+
word2number 1.1
|
301 |
+
wrapt 1.16.0
|
302 |
+
wsproto 1.2.0
|
303 |
+
xxhash 3.4.1
|
304 |
+
yarl 1.9.4
|
305 |
+
zipp 3.18.1
|
306 |
+
zstandard 0.22.0
|
307 |
+
(modelopt) PS E:\ModelOpt_Windows_Scripts_2\modelopt-windows-scripts\ONNX_PTQ>
|
special_tokens_map.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
}
|
16 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91fd32f29cfeb8fee9535681f551697b066992c9d950d810651801be24f39e93
|
3 |
+
size 34809710
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|