{ | |
"batchers": null, | |
"cachers": null, | |
"compilers": null, | |
"distillers": null, | |
"pruners": null, | |
"quantizers": "llm-int8", | |
"recoverers": null, | |
"quant_llm-int8_compute_dtype": "bfloat16", | |
"quant_llm-int8_double_quant": false, | |
"quant_llm-int8_enable_fp32_cpu_offload": false, | |
"quant_llm-int8_has_fp16_weight": false, | |
"quant_llm-int8_quant_type": "fp4", | |
"quant_llm-int8_threshold": 6.0, | |
"quant_llm-int8_weight_bits": 8, | |
"max_batch_size": 1, | |
"device": "cuda", | |
"cache_dir": "/tmp/models/tmpitiogpqm", | |
"task": "", | |
"save_load_fn": "llm-int8", | |
"save_load_fn_args": {}, | |
"api_key": null | |
} |