return { "huggingface/llm.nvim", config = function() local llm = require('llm') llm.setup({ api_token = nil, -- cf Install paragraph model = "Qwen3.5-9B", -- the model ID, behavior depends on backend backend = "openai", -- backend ID, "huggingface" | "ollama" | "openai" | "tgi" url = "http://localhost:8123", -- the http url of the backend --tokens_to_clear = { "<|endoftext|>" }, -- tokens to remove from the model's output -- parameters that are added to the request body, values are arbitrary, you can set any field:value pair here it will be passed as is to the backend request_body = { parameters = { temperature=0.6, top_p=0.95, top_k=20, min_p=0.0, presence_penalty=0.0, repetition_penalty=1.0, max_new_tokens = 2000, chat_template_kwargs='{"enable_thinking": False}', }, }, -- set this if the model supports fill in the middle fim = { enabled = true, prefix = "", middle = "", suffix = "", }, debounce_ms = 100, accept_keymap = "", dismiss_keymap = "", tls_skip_verify_insecure = false, -- llm-ls configuration, cf llm-ls section --lsp = { -- bin_path = nil, -- host = nil, -- port = nil, -- cmd_env = nil, -- or { LLM_LOG_LEVEL = "DEBUG" } to set the log level of llm-ls -- version = "0.5.3", --}, tokenizer = { repository = "Qwen/Qwen3.5-9B", api_token = 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, }, -- cf Tokenizer paragraph context_window = 5000, -- max number of tokens for the context window enable_suggestions_on_startup = false, enable_suggestions_on_files = "*", -- pattern matching syntax to enable suggestions on specific files, either a string or a list of strings disable_url_path_completion = false, -- cf Backend }) end }