feat: nvim; llm inline llm autocomplete

2026-03-08 10:17:22 -07:00
parent 7ebe8337b0
commit 2ad618af1b
3 changed files with 56 additions and 81 deletions
@@ -0,0 +1,54 @@
+return {
+  "huggingface/llm.nvim",
+  config = function()
+    local llm = require('llm')
+    llm.setup({
+      api_token = nil, -- cf Install paragraph
+      model = "Qwen3.5-9B", -- the model ID, behavior depends on backend
+      backend = "openai", -- backend ID, "huggingface" | "ollama" | "openai" | "tgi"
+      url = "http://localhost:8123", -- the http url of the backend
+      --tokens_to_clear = { "<|endoftext|>" }, -- tokens to remove from the model's output
+      -- parameters that are added to the request body, values are arbitrary, you can set any field:value pair here it will be passed as is to the backend
+      request_body = {
+        parameters = {
+          temperature=0.6, 
+          top_p=0.95,
+          top_k=20,
+          min_p=0.0,
+          presence_penalty=0.0,
+          repetition_penalty=1.0,
+          max_new_tokens = 2000,
+          chat_template_kwargs='{"enable_thinking": False}',
+        },
+      },
+      -- set this if the model supports fill in the middle
+      fim = {
+        enabled = true,
+        prefix = "<fim_prefix>",
+        middle = "<fim_middle>",
+        suffix = "<fim_suffix>",
+      },
+      debounce_ms = 100,
+      accept_keymap = "<Tab>",
+      dismiss_keymap = "<S-Tab>",
+      tls_skip_verify_insecure = false,
+      -- llm-ls configuration, cf llm-ls section
+      --lsp = {
+      --  bin_path = nil,
+      --  host = nil,
+      --  port = nil,
+      --  cmd_env = nil, -- or { LLM_LOG_LEVEL = "DEBUG" } to set the log level of llm-ls
+      --  version = "0.5.3",
+      --},
+      tokenizer = {
+        repository = "Qwen/Qwen3.5-9B",
+        api_token = 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,
+        
+      }, -- cf Tokenizer paragraph
+      context_window = 5000, -- max number of tokens for the context window
+      enable_suggestions_on_startup = false,
+      enable_suggestions_on_files = "*", -- pattern matching syntax to enable suggestions on specific files, either a string or a list of strings
+      disable_url_path_completion = false, -- cf Backend
+    })
+  end
+}
@@ -1,79 +0,0 @@
-return {
-    "frankroeder/parrot.nvim",
-    dependencies = { 'ibhagwan/fzf-lua', 'nvim-lua/plenary.nvim' },
-    -- optionally include "folke/noice.nvim" or "rcarriga/nvim-notify" for beautiful notifications
-    config = function()
-        require("parrot").setup {
-            -- Providers must be explicitly set up to make them available.
-            providers = {
-                ollama = {
-                    name = "ollama",
-                    endpoint = "http://localhost:11434/api/chat",
-                    api_key = "", -- not required for local Ollama
-                    --params = {
-                    --    chat = { temperature = 1.5, top_p = 1, num_ctx = 8192, min_p = 0.05 },
-                    --    command = { temperature = 1.5, top_p = 1, num_ctx = 8192, min_p = 0.05 },
-                    --},
-                    --topic_prompt = [[
-                    --Summarize the chat above and only provide a short headline of 2 to 3
-                    --words without any opening phrase like "Sure, here is the summary",
-                    --"Sure! Here's a shortheadline summarizing the chat" or anything similar.
-                    --]],
-                    --topic = {
-                    --    model = "llama3.2",
-                    --    params = { max_tokens = 32 },
-                    --},
-                    headers = {
-                        ["Content-Type"] = "application/json",
-                    },
-                    models = {
-                        "glm-4.7-flash:latest",
-                    },
-                    resolve_api_key = function()
-                        return true
-                    end,
-                    process_stdout = function(response)
-                        if response:match "message" and response:match "content" then
-                            local ok, data = pcall(vim.json.decode, response)
-                            if ok and data.message and data.message.content then
-                                return data.message.content
-                            end
-                        end
-                    end,
-                    get_available_models = function(self)
-                        local url = self.endpoint:gsub("chat", "")
-                        local logger = require "parrot.logger"
-                        local job = Job:new({
-                            command = "curl",
-                            args = { "-H", "Content-Type: application/json", url .. "tags" },
-                        }):sync()
-                        local parsed_response = require("parrot.utils").parse_raw_response(job)
-                        self:process_onexit(parsed_response)
-                        if parsed_response == "" then
-                            logger.debug("Ollama server not running on " .. endpoint_api)
-                            return {}
-                        end
-
-                        local success, parsed_data = pcall(vim.json.decode, parsed_response)
-                        if not success then
-                            logger.error("Ollama - Error parsing JSON: " .. vim.inspect(parsed_data))
-                            return {}
-                        end
-
-                        if not parsed_data.models then
-                            logger.error "Ollama - No models found. Please use 'ollama pull' to download one."
-                            return {}
-                        end
-
-                        local names = {}
-                        for _, model in ipairs(parsed_data.models) do
-                            table.insert(names, model.name)
-                        end
-
-                        return names
-                    end,
-                },
-}
-        }
-    end,
-}