Last active
June 8, 2024 16:43
-
-
Save kunato/d298b7565666fda5df02bc3f7a2687c6 to your computer and use it in GitHub Desktop.
basic_convert_to_awq
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from awq import AutoAWQForCausalLM | |
| from transformers import AutoTokenizer | |
| # TODO pip install autoawq==0.2.4 | |
| if __name__ == '__main__': | |
| model_path = 'scb10x/llama-3-typhoon-v1.5x-8b-instruct' | |
| quant_path = 'llama-3-typhoon-v1.5x-8b-instruct-awq' | |
| quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" } | |
| tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) | |
| tokenizer.save_pretrained(quant_path) | |
| # Load model | |
| model = AutoAWQForCausalLM.from_pretrained(model_path) | |
| # Quantize | |
| model.quantize(tokenizer, quant_config=quant_config) # add calib_data=huggingface_datasets to optimize how the model get optimized (using Thai & English unsupervised corpus (maybe en_wiki + th_wiki) | |
| # Save quantized model | |
| model.save_quantized(quant_path) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment