return {
  "huggingface/llm.nvim",
  config = function()
    local llm = require('llm')
    llm.setup({
      api_token = nil, -- cf Install paragraph
      model = "Qwen3.5-9B", -- the model ID, behavior depends on backend
      backend = "openai", -- backend ID, "huggingface" | "ollama" | "openai" | "tgi"
      url = "http://localhost:8123", -- the http url of the backend
      --tokens_to_clear = { "<|endoftext|>" }, -- tokens to remove from the model's output
      -- parameters that are added to the request body, values are arbitrary, you can set any field:value pair here it will be passed as is to the backend
      request_body = {
        parameters = {
          temperature=0.6, 
          top_p=0.95,
          top_k=20,
          min_p=0.0,
          presence_penalty=0.0,
          repetition_penalty=1.0,
          max_new_tokens = 2000,
          chat_template_kwargs='{"enable_thinking": False}',
        },
      },
      -- set this if the model supports fill in the middle
      fim = {
        enabled = true,
        prefix = "<fim_prefix>",
        middle = "<fim_middle>",
        suffix = "<fim_suffix>",
      },
      debounce_ms = 100,
      accept_keymap = "<Tab>",
      dismiss_keymap = "<S-Tab>",
      tls_skip_verify_insecure = false,
      -- llm-ls configuration, cf llm-ls section
      --lsp = {
      --  bin_path = nil,
      --  host = nil,
      --  port = nil,
      --  cmd_env = nil, -- or { LLM_LOG_LEVEL = "DEBUG" } to set the log level of llm-ls
      --  version = "0.5.3",
      --},
      tokenizer = {
        repository = "Qwen/Qwen3.5-9B",
        api_token = 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,
        
      }, -- cf Tokenizer paragraph
      context_window = 5000, -- max number of tokens for the context window
      enable_suggestions_on_startup = false,
      enable_suggestions_on_files = "*", -- pattern matching syntax to enable suggestions on specific files, either a string or a list of strings
      disable_url_path_completion = false, -- cf Backend
    })
  end
}