doc: replace a broken example with a working one (#595)

casper-hansen · Aug 28, 2024 · 79258d6 · 79258d6
1 parent 97f4a77
commit 79258d6
Showing 1 changed file with 7 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -125,12 +125,14 @@ Expect this to take 10-15 minutes on smaller 7B models, and around 1 hour for 70
 from awq import AutoAWQForCausalLM
 from transformers import AutoTokenizer
 
-model_path = 'lmsys/vicuna-7b-v1.5'
-quant_path = 'vicuna-7b-v1.5-awq'
+model_path = 'mistralai/Mistral-7B-Instruct-v0.2'
+quant_path = 'mistral-instruct-v0.2-awq'
 quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }
 
 # Load model
-model = AutoAWQForCausalLM.from_pretrained(model_path)
+model = AutoAWQForCausalLM.from_pretrained(
+    model_path, **{"low_cpu_mem_usage": True, "use_cache": False}
+)
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 
 # Quantize
@@ -139,6 +141,8 @@ model.quantize(tokenizer, quant_config=quant_config)
 # Save quantized model
 model.save_quantized(quant_path)
 tokenizer.save_pretrained(quant_path)
+
+print(f'Model is quantized and saved at "{quant_path}"')
 ```
 
 </details>