Merge branch 'main' into feat/multi_file_context

huggingface · Feb 19, 2024 · e04353a · e04353a
2 parents 307ee39 + 0b75e5d
commit e04353a
Show file tree

Hide file tree

Showing 21 changed files with 687 additions and 771 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -19,12 +19,16 @@ It also makes sure that you are within the context window of the model by tokeni
 
 Gathers information about requests and completions that can enable retraining.
 
-Note that **llm-ls** does not export any data anywhere (other than setting a user agent when querying the model API), everything is stored in a log file if you set the log level to `info`.
+Note that **llm-ls** does not export any data anywhere (other than setting a user agent when querying the model API), everything is stored in a log file (`~/.cache/llm_ls/llm-ls.log`) if you set the log level to `info`.
 
 ### Completion
 
 **llm-ls** parses the AST of the code to determine if completions should be multi line, single line or empty (no completion).
 
+### Multiple backends
+
+**llm-ls** is compatible with Hugging Face's [Inference API](https://huggingface.co/docs/api-inference/en/index), Hugging Face's [text-generation-inference](https://github.com/huggingface/text-generation-inference), [ollama](https://github.com/ollama/ollama) and OpenAI compatible APIs, like the [python llama.cpp server bindings](https://github.com/abetlen/llama-cpp-python?tab=readme-ov-file#openai-compatible-web-server).
+
 ## Compatible extensions
 
 - [x] [llm.nvim](https://github.com/huggingface/llm.nvim)
@@ -38,6 +42,4 @@ Note that **llm-ls** does not export any data anywhere (other than setting a use
 - add `suffix_percent` setting that determines the ratio of # of tokens for the prefix vs the suffix in the prompt
 - add context window fill percent or change context_window to `max_tokens`
 - filter bad suggestions (repetitive, same as below, etc)
-- support for ollama
-- support for llama.cpp
 - oltp traces ?
diff --git a/crates/custom-types/Cargo.toml b/crates/custom-types/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "custom-types"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+authors.workspace = true
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+lsp-types = "0.94"
+serde = "1"
+serde_json = "1"
+uuid = "1"
diff --git a/crates/custom-types/src/lib.rs b/crates/custom-types/src/lib.rs
@@ -0,0 +1,2 @@
+pub mod llm_ls;
+pub mod request;
diff --git a/crates/custom-types/src/llm_ls.rs b/crates/custom-types/src/llm_ls.rs
@@ -0,0 +1,156 @@
+use std::{fmt::Display, path::PathBuf};
+
+use lsp_types::TextDocumentPositionParams;
+use serde::{Deserialize, Deserializer, Serialize};
+use serde_json::{Map, Value};
+use uuid::Uuid;
+
+const HF_INFERENCE_API_HOSTNAME: &str = "api-inference.huggingface.co";
+
+#[derive(Debug, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct AcceptCompletionParams {
+    pub request_id: Uuid,
+    pub accepted_completion: u32,
+    pub shown_completions: Vec<u32>,
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct RejectCompletionParams {
+    pub request_id: Uuid,
+    pub shown_completions: Vec<u32>,
+}
+
+#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize)]
+#[serde(rename_all = "lowercase")]
+pub enum Ide {
+    Neovim,
+    VSCode,
+    JetBrains,
+    Emacs,
+    Jupyter,
+    Sublime,
+    VisualStudio,
+    #[default]
+    Unknown,
+}
+
+impl Display for Ide {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.serialize(f)
+    }
+}
+
+fn parse_ide<'de, D>(d: D) -> std::result::Result<Ide, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    Option::deserialize(d).map(|b| b.unwrap_or_default())
+}
+
+fn parse_url<'de, D>(d: D) -> std::result::Result<String, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    Option::deserialize(d).map(|b| b.unwrap_or_else(hf_default_url))
+}
+
+fn hf_default_url() -> String {
+    format!("https://{HF_INFERENCE_API_HOSTNAME}")
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+#[serde(rename_all = "lowercase", tag = "backend")]
+pub enum Backend {
+    HuggingFace {
+        #[serde(default = "hf_default_url", deserialize_with = "parse_url")]
+        url: String,
+    },
+    // TODO:
+    // LlamaCpp {
+    //   url: String,
+    // },
+    Ollama {
+        url: String,
+    },
+    OpenAi {
+        url: String,
+    },
+    Tgi {
+        url: String,
+    },
+}
+
+impl Default for Backend {
+    fn default() -> Self {
+        Self::HuggingFace {
+            url: hf_default_url(),
+        }
+    }
+}
+
+impl Backend {
+    pub fn is_using_inference_api(&self) -> bool {
+        match self {
+            Self::HuggingFace { url } => url.contains(HF_INFERENCE_API_HOSTNAME),
+            _ => false,
+        }
+    }
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct FimParams {
+    pub enabled: bool,
+    pub prefix: String,
+    pub middle: String,
+    pub suffix: String,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+#[serde(untagged)]
+pub enum TokenizerConfig {
+    Local {
+        path: PathBuf,
+    },
+    HuggingFace {
+        repository: String,
+        api_token: Option<String>,
+    },
+    Download {
+        url: String,
+        to: PathBuf,
+    },
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct GetCompletionsParams {
+    #[serde(flatten)]
+    pub text_document_position: TextDocumentPositionParams,
+    #[serde(default)]
+    #[serde(deserialize_with = "parse_ide")]
+    pub ide: Ide,
+    pub fim: FimParams,
+    pub api_token: Option<String>,
+    pub model: String,
+    #[serde(flatten)]
+    pub backend: Backend,
+    pub tokens_to_clear: Vec<String>,
+    pub tokenizer_config: Option<TokenizerConfig>,
+    pub context_window: usize,
+    pub tls_skip_verify_insecure: bool,
+    #[serde(default)]
+    pub request_body: Map<String, Value>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct Completion {
+    pub generated_text: String,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct GetCompletionsResult {
+    pub request_id: Uuid,
+    pub completions: Vec<Completion>,
+}
diff --git a/crates/custom-types/src/request.rs b/crates/custom-types/src/request.rs
@@ -0,0 +1,32 @@
+use lsp_types::request::Request;
+
+use crate::llm_ls::{
+    AcceptCompletionParams, GetCompletionsParams, GetCompletionsResult, RejectCompletionParams,
+};
+
+#[derive(Debug)]
+pub enum GetCompletions {}
+
+impl Request for GetCompletions {
+    type Params = GetCompletionsParams;
+    type Result = GetCompletionsResult;
+    const METHOD: &'static str = "llm-ls/getCompletions";
+}
+
+#[derive(Debug)]
+pub enum AcceptCompletion {}
+
+impl Request for AcceptCompletion {
+    type Params = AcceptCompletionParams;
+    type Result = ();
+    const METHOD: &'static str = "llm-ls/acceptCompletion";
+}
+
+#[derive(Debug)]
+pub enum RejectCompletion {}
+
+impl Request for RejectCompletion {
+    type Params = RejectCompletionParams;
+    type Result = ();
+    const METHOD: &'static str = "llm-ls/rejectCompletion";
+}
diff --git a/crates/llm-ls/Cargo.toml b/crates/llm-ls/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "llm-ls"
-version = "0.4.0"
+version = "0.5.2"
 edition = "2021"
 
 [[bin]]
@@ -13,6 +13,7 @@ candle = { version = "0.3", package = "candle-core", default-features = false }
 candle-nn = "0.3"
 candle-transformers = "0.3"
 clap = { version = "4", features = ["derive"] }
+custom-types = { path = "../custom-types" }
 futures-util = "0.3"
 gitignore = { path = "../gitignore" }
 hf-hub = { version = "0.3", features = ["tokio"] }