Build
Browse files- build/torch25-cxx11-cu118-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch25-cxx11-cu118-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch25-cxx11-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_v7rnpcck3kry4.abi3.so +3 -0
- build/torch25-cxx11-cu118-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch25-cxx11-cu121-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch25-cxx11-cu121-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch25-cxx11-cu121-x86_64-linux/quantization_eetq/_quantization_eetq_zcfiojfkx55be.abi3.so +3 -0
- build/torch25-cxx11-cu121-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch25-cxx11-cu124-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch25-cxx11-cu124-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch25-cxx11-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_btymam4x7xvs6.abi3.so +3 -0
- build/torch25-cxx11-cu124-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch25-cxx98-cu118-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch25-cxx98-cu118-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch25-cxx98-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_yy3p6bsf622sq.abi3.so +3 -0
- build/torch25-cxx98-cu118-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch25-cxx98-cu121-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch25-cxx98-cu121-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch25-cxx98-cu121-x86_64-linux/quantization_eetq/_quantization_eetq_imijtykkseqze.abi3.so +3 -0
- build/torch25-cxx98-cu121-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch25-cxx98-cu124-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch25-cxx98-cu124-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch25-cxx98-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_4qerj3t7ddiry.abi3.so +3 -0
- build/torch25-cxx98-cu124-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_j23ltbqvrnixg.abi3.so +3 -0
- build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_p5neqtnhdgxv2.abi3.so +3 -0
- build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_idk3dezy35dfk.abi3.so +3 -0
- build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_fpjoxzd7nm2qa.abi3.so +3 -0
- build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_k7mlunxe2ye4s.abi3.so +3 -0
- build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
- build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/__init__.py +3 -0
- build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/_ops.py +9 -0
- build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_7m7hz3sbwkaio.abi3.so +3 -0
- build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/custom_ops.py +36 -0
build/torch25-cxx11-cu118-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch25-cxx11-cu118-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_v7rnpcck3kry4
|
3 |
+
ops = torch.ops._quantization_eetq_v7rnpcck3kry4
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_v7rnpcck3kry4::{op_name}"
|
build/torch25-cxx11-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_v7rnpcck3kry4.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff073c997a4857cf7f88f5ca5b6a0e0aed3feb9739ba4187a3c1ec6fd2f1b64b
|
3 |
+
size 28364752
|
build/torch25-cxx11-cu118-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch25-cxx11-cu121-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch25-cxx11-cu121-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_zcfiojfkx55be
|
3 |
+
ops = torch.ops._quantization_eetq_zcfiojfkx55be
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_zcfiojfkx55be::{op_name}"
|
build/torch25-cxx11-cu121-x86_64-linux/quantization_eetq/_quantization_eetq_zcfiojfkx55be.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93f7cce612f7efed34ccfadc57ca6a17899feba7ba4780f33d4ff8f828171bfc
|
3 |
+
size 27919784
|
build/torch25-cxx11-cu121-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch25-cxx11-cu124-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch25-cxx11-cu124-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_btymam4x7xvs6
|
3 |
+
ops = torch.ops._quantization_eetq_btymam4x7xvs6
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_btymam4x7xvs6::{op_name}"
|
build/torch25-cxx11-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_btymam4x7xvs6.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3e693ee1f395169b985b0dadab379cc5d2be0858ebd0caa3dedc5c6b8bd7a7d
|
3 |
+
size 27950768
|
build/torch25-cxx11-cu124-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch25-cxx98-cu118-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch25-cxx98-cu118-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_yy3p6bsf622sq
|
3 |
+
ops = torch.ops._quantization_eetq_yy3p6bsf622sq
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_yy3p6bsf622sq::{op_name}"
|
build/torch25-cxx98-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_yy3p6bsf622sq.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3c69a2ef282a7d50795a5656514227bbdef8e5153a2bbd2f277c78ea39de4cd
|
3 |
+
size 28368280
|
build/torch25-cxx98-cu118-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch25-cxx98-cu121-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch25-cxx98-cu121-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_imijtykkseqze
|
3 |
+
ops = torch.ops._quantization_eetq_imijtykkseqze
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_imijtykkseqze::{op_name}"
|
build/torch25-cxx98-cu121-x86_64-linux/quantization_eetq/_quantization_eetq_imijtykkseqze.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4111fb3304cba2975f11275f8444505208d4fc4b31da4ff3e3d508a3838ef64
|
3 |
+
size 28063536
|
build/torch25-cxx98-cu121-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch25-cxx98-cu124-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch25-cxx98-cu124-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_4qerj3t7ddiry
|
3 |
+
ops = torch.ops._quantization_eetq_4qerj3t7ddiry
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_4qerj3t7ddiry::{op_name}"
|
build/torch25-cxx98-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_4qerj3t7ddiry.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:933e5f40627ba92496a0fbd14c4a602d0ea81cbb66ec2e4e870e692d87601b3f
|
3 |
+
size 28110408
|
build/torch25-cxx98-cu124-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_j23ltbqvrnixg
|
3 |
+
ops = torch.ops._quantization_eetq_j23ltbqvrnixg
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_j23ltbqvrnixg::{op_name}"
|
build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_j23ltbqvrnixg.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9de877da7bf0e968bb9d307539860efa86b12bcd15ac017acee2377f0241495a
|
3 |
+
size 28369152
|
build/torch26-cxx11-cu118-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_p5neqtnhdgxv2
|
3 |
+
ops = torch.ops._quantization_eetq_p5neqtnhdgxv2
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_p5neqtnhdgxv2::{op_name}"
|
build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_p5neqtnhdgxv2.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20e49f06c5ea24b8f93e1d1200b87ab284cbdbab87495e3ae34a162375b5634e
|
3 |
+
size 27951064
|
build/torch26-cxx11-cu124-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_idk3dezy35dfk
|
3 |
+
ops = torch.ops._quantization_eetq_idk3dezy35dfk
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_idk3dezy35dfk::{op_name}"
|
build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_idk3dezy35dfk.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0da474df2d93afd24827cd9ce04844e32bd0d742cbf60504c14f670e884ddc3c
|
3 |
+
size 27998512
|
build/torch26-cxx11-cu126-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_fpjoxzd7nm2qa
|
3 |
+
ops = torch.ops._quantization_eetq_fpjoxzd7nm2qa
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_fpjoxzd7nm2qa::{op_name}"
|
build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/_quantization_eetq_fpjoxzd7nm2qa.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fb594a0a3bcca4af967680d7b5865e6fcbed12bb9d59f8374bf4601516f7bc8
|
3 |
+
size 28364528
|
build/torch26-cxx98-cu118-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_k7mlunxe2ye4s
|
3 |
+
ops = torch.ops._quantization_eetq_k7mlunxe2ye4s
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_k7mlunxe2ye4s::{op_name}"
|
build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/_quantization_eetq_k7mlunxe2ye4s.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:865d3f89b2b8bbe79c7493416999cda076a106f0b3ecb5feb2664a2dee4d0fa5
|
3 |
+
size 28114800
|
build/torch26-cxx98-cu124-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|
build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
2 |
+
|
3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _quantization_eetq_7m7hz3sbwkaio
|
3 |
+
ops = torch.ops._quantization_eetq_7m7hz3sbwkaio
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_quantization_eetq_7m7hz3sbwkaio::{op_name}"
|
build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/_quantization_eetq_7m7hz3sbwkaio.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f443eff55bcac99507a8a622333461922f6bcee610cab9334882628e58a54da6
|
3 |
+
size 28162256
|
build/torch26-cxx98-cu126-x86_64-linux/quantization_eetq/custom_ops.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from ._ops import ops
|
5 |
+
|
6 |
+
|
7 |
+
def w8_a16_gemm(
|
8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
9 |
+
) -> torch.Tensor:
|
10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
11 |
+
|
12 |
+
|
13 |
+
def w8_a16_gemm_(
|
14 |
+
input: torch.Tensor,
|
15 |
+
weight: torch.Tensor,
|
16 |
+
scale: torch.Tensor,
|
17 |
+
output: torch.Tensor,
|
18 |
+
m: int,
|
19 |
+
n: int,
|
20 |
+
k: int,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
23 |
+
|
24 |
+
|
25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
27 |
+
|
28 |
+
|
29 |
+
def quant_weights(
|
30 |
+
origin_weight: torch.Tensor,
|
31 |
+
quant_type: torch.dtype,
|
32 |
+
return_unprocessed_quantized_tensor: bool,
|
33 |
+
) -> List[torch.Tensor]:
|
34 |
+
return ops.quant_weights(
|
35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
36 |
+
)
|