Add config option for controlling Ollama think parameter (#146000)

* Add config option for controlling Ollama think parameter Allows enabling or disable thinking for supported models. Neither option will dislay thinking content in the chat. Future support for displaying think content will require frontend changes for formatting. * Add thinking strings
2025-07-10 23:07:09 +00:00 · 2025-06-03 20:42:16 -07:00 · 2025-06-03 20:42:16 -07:00 · e3f7e5706b
commit e3f7e5706b
parent 7ad1e756e7
7 changed files with 65 additions and 2 deletions
--- a/homeassistant/components/ollama/init.py
+++ b/homeassistant/components/ollama/init.py
@ -21,6 +21,7 @@ from .const import (
    CONF_MODEL,
    CONF_NUM_CTX,
    CONF_PROMPT,
    CONF_THINK,
    DEFAULT_TIMEOUT,
    DOMAIN,
 )
@ -33,6 +34,7 @@ __all__ = [
    "CONF_MODEL",
    "CONF_NUM_CTX",
    "CONF_PROMPT",
    "CONF_THINK",
    "CONF_URL",
    "DOMAIN",
 ]
--- a/homeassistant/components/ollama/config_flow.py
+++ b/homeassistant/components/ollama/config_flow.py
@ -22,6 +22,7 @@ from homeassistant.const import CONF_LLM_HASS_API, CONF_URL
 from homeassistant.core import HomeAssistant
 from homeassistant.helpers import llm
 from homeassistant.helpers.selector import (
    BooleanSelector,
    NumberSelector,
    NumberSelectorConfig,
    NumberSelectorMode,
@ -41,10 +42,12 @@ from .const import (
    CONF_MODEL,
    CONF_NUM_CTX,
    CONF_PROMPT,
    CONF_THINK,
    DEFAULT_KEEP_ALIVE,
    DEFAULT_MAX_HISTORY,
    DEFAULT_MODEL,
    DEFAULT_NUM_CTX,
    DEFAULT_THINK,
    DEFAULT_TIMEOUT,
    DOMAIN,
    MAX_NUM_CTX,
@ -280,6 +283,12 @@ def ollama_config_option_schema(
                min=-1, max=sys.maxsize, step=1, mode=NumberSelectorMode.BOX
            )
        ),
        vol.Optional(
            CONF_THINK,
            description={
                "suggested_value": options.get("think", DEFAULT_THINK),
            },
        ): BooleanSelector(),
    }
--- a/homeassistant/components/ollama/const.py
+++ b/homeassistant/components/ollama/const.py
@ -4,6 +4,7 @@ DOMAIN = "ollama"
 CONF_MODEL = "model"
 CONF_PROMPT = "prompt"
 CONF_THINK = "think"
 CONF_KEEP_ALIVE = "keep_alive"
 DEFAULT_KEEP_ALIVE = -1  # seconds. -1 = indefinite, 0 = never
@ -15,6 +16,7 @@ CONF_NUM_CTX = "num_ctx"
 DEFAULT_NUM_CTX = 8192
 MIN_NUM_CTX = 2048
 MAX_NUM_CTX = 131072
 DEFAULT_THINK = False
 CONF_MAX_HISTORY = "max_history"
 DEFAULT_MAX_HISTORY = 20
--- a/homeassistant/components/ollama/conversation.py
+++ b/homeassistant/components/ollama/conversation.py
@ -24,6 +24,7 @@ from .const import (
    CONF_MODEL,
    CONF_NUM_CTX,
    CONF_PROMPT,
    CONF_THINK,
    DEFAULT_KEEP_ALIVE,
    DEFAULT_MAX_HISTORY,
    DEFAULT_NUM_CTX,
@ -256,6 +257,7 @@ class OllamaConversationEntity(
                    # keep_alive requires specifying unit. In this case, seconds
                    keep_alive=f"{settings.get(CONF_KEEP_ALIVE, DEFAULT_KEEP_ALIVE)}s",
                    options={CONF_NUM_CTX: settings.get(CONF_NUM_CTX, DEFAULT_NUM_CTX)},
                    think=settings.get(CONF_THINK),
                )
            except (ollama.RequestError, ollama.ResponseError) as err:
                _LOGGER.error("Unexpected error talking to Ollama server: %s", err)
--- a/homeassistant/components/ollama/strings.json
+++ b/homeassistant/components/ollama/strings.json
@ -30,12 +30,14 @@
          "llm_hass_api": "[%key:common::config_flow::data::llm_hass_api%]",
          "max_history": "Max history messages",
          "num_ctx": "Context window size",
-          "keep_alive": "Keep alive"
+          "keep_alive": "Keep alive",
          "think": "Think before responding"
        },
        "data_description": {
          "prompt": "Instruct how the LLM should respond. This can be a template.",
          "keep_alive": "Duration in seconds for Ollama to keep model in memory. -1 = indefinite, 0 = never.",
-          "num_ctx": "Maximum number of text tokens the model can process. Lower to reduce Ollama RAM, or increase for a large number of exposed entities."
+          "num_ctx": "Maximum number of text tokens the model can process. Lower to reduce Ollama RAM, or increase for a large number of exposed entities.",
          "think": "If enabled, the LLM will think before responding. This can improve response quality but may increase latency."
        }
      }
    }
--- a/tests/components/ollama/test_config_flow.py
+++ b/tests/components/ollama/test_config_flow.py
@ -168,6 +168,7 @@ async def test_options(
            ollama.CONF_PROMPT: "test prompt",
            ollama.CONF_MAX_HISTORY: 100,
            ollama.CONF_NUM_CTX: 32768,
            ollama.CONF_THINK: True,
        },
    )
    await hass.async_block_till_done()
@ -176,6 +177,7 @@ async def test_options(
        ollama.CONF_PROMPT: "test prompt",
        ollama.CONF_MAX_HISTORY: 100,
        ollama.CONF_NUM_CTX: 32768,
        ollama.CONF_THINK: True,
    }
--- a/tests/components/ollama/test_conversation.py
+++ b/tests/components/ollama/test_conversation.py
@ -650,3 +650,47 @@ async def test_options(
        assert mock_chat.call_count == 1
        args = mock_chat.call_args.kwargs
        assert args.get("options") == expected_options
@pytest.mark.parametrize(
    "think",
    [False, True],
    ids=["no_think", "think"],
 )
 async def test_reasoning_filter(
    hass: HomeAssistant,
    mock_config_entry: MockConfigEntry,
    mock_init_component,
    think: bool,
 ) -> None:
    """Test that think option is passed correctly to client."""
    agent_id = mock_config_entry.entry_id
    entry = MockConfigEntry()
    entry.add_to_hass(hass)
    hass.config_entries.async_update_entry(
        mock_config_entry,
        options={
            ollama.CONF_THINK: think,
        },
    )
    with patch(
        "ollama.AsyncClient.chat",
        return_value=stream_generator(
            {"message": {"role": "assistant", "content": "test response"}}
        ),
    ) as mock_chat:
        await conversation.async_converse(
            hass,
            "test message",
            None,
            Context(),
            agent_id=agent_id,
        )
        # Assert called with the expected think value
        for call in mock_chat.call_args_list:
            kwargs = call.kwargs
            assert kwargs.get("think") == think