From 1499fd6cbf1f6a2558d48cc806cb60d39824aee3 Mon Sep 17 00:00:00 2001 From: Markus Hennerbichler Date: Mon, 5 Feb 2024 20:05:52 +0000 Subject: [PATCH] Only warn of rate-limits when using HF endpoint (#58) * Only warn of rate-limits when using HF endpoint Co-authored-by: Luc Georges --- crates/llm-ls/src/adaptors.rs | 10 +++++----- crates/llm-ls/src/main.rs | 6 ++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/crates/llm-ls/src/adaptors.rs b/crates/llm-ls/src/adaptors.rs index c9e8f8d..553fc87 100644 --- a/crates/llm-ls/src/adaptors.rs +++ b/crates/llm-ls/src/adaptors.rs @@ -206,11 +206,11 @@ fn parse_openai_text(text: &str) -> Result, jsonrpc::Error> { } } -const TGI: &str = "tgi"; -const HUGGING_FACE: &str = "huggingface"; -const OLLAMA: &str = "ollama"; -const OPENAI: &str = "openai"; -const DEFAULT_ADAPTOR: &str = HUGGING_FACE; +pub(crate) const TGI: &str = "tgi"; +pub(crate) const HUGGING_FACE: &str = "huggingface"; +pub(crate) const OLLAMA: &str = "ollama"; +pub(crate) const OPENAI: &str = "openai"; +pub(crate) const DEFAULT_ADAPTOR: &str = HUGGING_FACE; fn unknown_adaptor_error(adaptor: Option<&String>) -> jsonrpc::Error { internal_error(format!("Unknown adaptor {:?}", adaptor)) diff --git a/crates/llm-ls/src/main.rs b/crates/llm-ls/src/main.rs index 2640569..318be3b 100644 --- a/crates/llm-ls/src/main.rs +++ b/crates/llm-ls/src/main.rs @@ -26,6 +26,7 @@ mod language_id; const MAX_WARNING_REPEAT: Duration = Duration::from_secs(3_600); pub const NAME: &str = "llm-ls"; pub const VERSION: &str = env!("CARGO_PKG_VERSION"); +const HF_INFERENCE_API_HOSTNAME: &str = "api-inference.huggingface.co"; fn get_position_idx(rope: &Rope, row: usize, col: usize) -> Result { Ok(rope.try_line_to_char(row).map_err(internal_error)? @@ -589,7 +590,7 @@ fn build_url(model: &str) -> String { if model.starts_with("http://") || model.starts_with("https://") { model.to_owned() } else { - format!("https://api-inference.huggingface.co/models/{model}") + format!("https://{HF_INFERENCE_API_HOSTNAME}/models/{model}") } } @@ -618,7 +619,8 @@ impl Backend { "received completion request for {}", params.text_document_position.text_document.uri ); - if params.api_token.is_none() { + let is_using_inference_api = params.adaptor.as_ref().unwrap_or(&adaptors::DEFAULT_ADAPTOR.to_owned()).as_str() == adaptors::HUGGING_FACE; + if params.api_token.is_none() && is_using_inference_api { let now = Instant::now(); let unauthenticated_warn_at = self.unauthenticated_warn_at.read().await; if now.duration_since(*unauthenticated_warn_at) > MAX_WARNING_REPEAT {