diff --git a/README.md b/README.md index 1a5891e4dd..a74b952708 100644 --- a/README.md +++ b/README.md @@ -65,10 +65,10 @@ Refer to [torchserve docker](docker/README.md) for details. #### VLLM Engine ```bash # Make sure to install torchserve with pip or conda as described above and login with `huggingface-cli login` -python -m ts.llm_launcher --model_id meta-llama/Meta-Llama-3.1-8B-Instruct --disable_token_auth +python -m ts.llm_launcher --model_id meta-llama/Llama-3.2-3B-Instruct --disable_token_auth # Try it out -curl -X POST -d '{"model":"meta-llama/Meta-Llama-3.1-8B-Instruct", "prompt":"Hello, my name is", "max_tokens": 200}' --header "Content-Type: application/json" "http://localhost:8080/predictions/model/1.0/v1/completions" +curl -X POST -d '{"model":"meta-llama/Llama-3.2-3B-Instruct", "prompt":"Hello, my name is", "max_tokens": 200}' --header "Content-Type: application/json" "http://localhost:8080/predictions/model/1.0/v1/completions" ``` #### TRT-LLM Engine diff --git a/ts/llm_launcher.py b/ts/llm_launcher.py index 591eedd474..faf01d8725 100644 --- a/ts/llm_launcher.py +++ b/ts/llm_launcher.py @@ -168,8 +168,9 @@ def main(args): model_store_path = Path(args.model_store) model_store_path.mkdir(parents=True, exist_ok=True) - if args.engine == "trt_llm": - model_snapshot_path = download_model(args.model_id) + model_snapshot_path = ( + download_model(args.model_id) if args.engine == "trt_llm" else None + ) with create_mar_file(args, model_snapshot_path): if args.engine == "trt_llm":