Skip to content

Instantly share code, notes, and snippets.

@kunato
Last active June 8, 2024 16:43
Show Gist options
  • Select an option

  • Save kunato/d298b7565666fda5df02bc3f7a2687c6 to your computer and use it in GitHub Desktop.

Select an option

Save kunato/d298b7565666fda5df02bc3f7a2687c6 to your computer and use it in GitHub Desktop.

Revisions

  1. kunato revised this gist Jun 8, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion convert_to_awq.py
    Original file line number Diff line number Diff line change
    @@ -13,7 +13,7 @@
    # Load model
    model = AutoAWQForCausalLM.from_pretrained(model_path)
    # Quantize
    model.quantize(tokenizer, quant_config=quant_config) # add calib_data=huggingface_datasets to optimize how the model get optimized (better tobe Thai & English unsupervised corpus)
    model.quantize(tokenizer, quant_config=quant_config) # add calib_data=huggingface_datasets to optimize how the model get optimized (using Thai & English unsupervised corpus (maybe en_wiki + th_wiki)
    # Save quantized model
    model.save_quantized(quant_path)

  2. kunato revised this gist Jun 8, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion convert_to_awq.py
    Original file line number Diff line number Diff line change
    @@ -13,7 +13,7 @@
    # Load model
    model = AutoAWQForCausalLM.from_pretrained(model_path)
    # Quantize
    model.quantize(tokenizer, quant_config=quant_config) # add calib_data=huggingface_datasets to optimized how the model get optimized
    model.quantize(tokenizer, quant_config=quant_config) # add calib_data=huggingface_datasets to optimize how the model get optimized (better tobe Thai & English unsupervised corpus)
    # Save quantized model
    model.save_quantized(quant_path)

  3. kunato revised this gist Jun 8, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion convert_to_awq.py
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,7 @@
    from awq import AutoAWQForCausalLM
    from transformers import AutoTokenizer

    # TODO pip install autoawq==0.2.4from awq import AutoAWQForCausalLM
    # TODO pip install autoawq==0.2.4

    if __name__ == '__main__':
    model_path = 'scb10x/llama-3-typhoon-v1.5x-8b-instruct'
  4. kunato revised this gist Jun 8, 2024. 1 changed file with 4 additions and 3 deletions.
    7 changes: 4 additions & 3 deletions convert_to_awq.py
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,7 @@
    from awq import AutoAWQForCausalLM
    from transformers import AutoTokenizer

    # TODO pip install autoawq==0.2.4
    # TODO pip install autoawq==0.2.4from awq import AutoAWQForCausalLM

    if __name__ == '__main__':
    model_path = 'scb10x/llama-3-typhoon-v1.5x-8b-instruct'
    @@ -13,6 +13,7 @@
    # Load model
    model = AutoAWQForCausalLM.from_pretrained(model_path)
    # Quantize
    model.quantize(tokenizer, quant_config=quant_config)
    model.quantize(tokenizer, quant_config=quant_config) # add calib_data=huggingface_datasets to optimized how the model get optimized
    # Save quantized model
    model.save_quantized(quant_path)
    model.save_quantized(quant_path)

  5. kunato created this gist Jun 8, 2024.
    18 changes: 18 additions & 0 deletions convert_to_awq.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,18 @@
    from awq import AutoAWQForCausalLM
    from transformers import AutoTokenizer

    # TODO pip install autoawq==0.2.4

    if __name__ == '__main__':
    model_path = 'scb10x/llama-3-typhoon-v1.5x-8b-instruct'
    quant_path = 'llama-3-typhoon-v1.5x-8b-instruct-awq'
    quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
    tokenizer.save_pretrained(quant_path)

    # Load model
    model = AutoAWQForCausalLM.from_pretrained(model_path)
    # Quantize
    model.quantize(tokenizer, quant_config=quant_config)
    # Save quantized model
    model.save_quantized(quant_path)