Add config option for controlling Ollama think parameter (#146000)

* Add config option for controlling Ollama think parameter

Allows enabling or disable thinking for supported models. Neither option
will dislay thinking content in the chat. Future support for displaying
think content will require frontend changes for formatting.

* Add thinking strings
This commit is contained in:
Ian 2025-06-03 20:42:16 -07:00 committed by GitHub
parent 7ad1e756e7
commit e3f7e5706b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 65 additions and 2 deletions

View File

@ -21,6 +21,7 @@ from .const import (
CONF_MODEL,
CONF_NUM_CTX,
CONF_PROMPT,
CONF_THINK,
DEFAULT_TIMEOUT,
DOMAIN,
)
@ -33,6 +34,7 @@ __all__ = [
"CONF_MODEL",
"CONF_NUM_CTX",
"CONF_PROMPT",
"CONF_THINK",
"CONF_URL",
"DOMAIN",
]

View File

@ -22,6 +22,7 @@ from homeassistant.const import CONF_LLM_HASS_API, CONF_URL
from homeassistant.core import HomeAssistant
from homeassistant.helpers import llm
from homeassistant.helpers.selector import (
BooleanSelector,
NumberSelector,
NumberSelectorConfig,
NumberSelectorMode,
@ -41,10 +42,12 @@ from .const import (
CONF_MODEL,
CONF_NUM_CTX,
CONF_PROMPT,
CONF_THINK,
DEFAULT_KEEP_ALIVE,
DEFAULT_MAX_HISTORY,
DEFAULT_MODEL,
DEFAULT_NUM_CTX,
DEFAULT_THINK,
DEFAULT_TIMEOUT,
DOMAIN,
MAX_NUM_CTX,
@ -280,6 +283,12 @@ def ollama_config_option_schema(
min=-1, max=sys.maxsize, step=1, mode=NumberSelectorMode.BOX
)
),
vol.Optional(
CONF_THINK,
description={
"suggested_value": options.get("think", DEFAULT_THINK),
},
): BooleanSelector(),
}

View File

@ -4,6 +4,7 @@ DOMAIN = "ollama"
CONF_MODEL = "model"
CONF_PROMPT = "prompt"
CONF_THINK = "think"
CONF_KEEP_ALIVE = "keep_alive"
DEFAULT_KEEP_ALIVE = -1 # seconds. -1 = indefinite, 0 = never
@ -15,6 +16,7 @@ CONF_NUM_CTX = "num_ctx"
DEFAULT_NUM_CTX = 8192
MIN_NUM_CTX = 2048
MAX_NUM_CTX = 131072
DEFAULT_THINK = False
CONF_MAX_HISTORY = "max_history"
DEFAULT_MAX_HISTORY = 20

View File

@ -24,6 +24,7 @@ from .const import (
CONF_MODEL,
CONF_NUM_CTX,
CONF_PROMPT,
CONF_THINK,
DEFAULT_KEEP_ALIVE,
DEFAULT_MAX_HISTORY,
DEFAULT_NUM_CTX,
@ -256,6 +257,7 @@ class OllamaConversationEntity(
# keep_alive requires specifying unit. In this case, seconds
keep_alive=f"{settings.get(CONF_KEEP_ALIVE, DEFAULT_KEEP_ALIVE)}s",
options={CONF_NUM_CTX: settings.get(CONF_NUM_CTX, DEFAULT_NUM_CTX)},
think=settings.get(CONF_THINK),
)
except (ollama.RequestError, ollama.ResponseError) as err:
_LOGGER.error("Unexpected error talking to Ollama server: %s", err)

View File

@ -30,12 +30,14 @@
"llm_hass_api": "[%key:common::config_flow::data::llm_hass_api%]",
"max_history": "Max history messages",
"num_ctx": "Context window size",
"keep_alive": "Keep alive"
"keep_alive": "Keep alive",
"think": "Think before responding"
},
"data_description": {
"prompt": "Instruct how the LLM should respond. This can be a template.",
"keep_alive": "Duration in seconds for Ollama to keep model in memory. -1 = indefinite, 0 = never.",
"num_ctx": "Maximum number of text tokens the model can process. Lower to reduce Ollama RAM, or increase for a large number of exposed entities."
"num_ctx": "Maximum number of text tokens the model can process. Lower to reduce Ollama RAM, or increase for a large number of exposed entities.",
"think": "If enabled, the LLM will think before responding. This can improve response quality but may increase latency."
}
}
}

View File

@ -168,6 +168,7 @@ async def test_options(
ollama.CONF_PROMPT: "test prompt",
ollama.CONF_MAX_HISTORY: 100,
ollama.CONF_NUM_CTX: 32768,
ollama.CONF_THINK: True,
},
)
await hass.async_block_till_done()
@ -176,6 +177,7 @@ async def test_options(
ollama.CONF_PROMPT: "test prompt",
ollama.CONF_MAX_HISTORY: 100,
ollama.CONF_NUM_CTX: 32768,
ollama.CONF_THINK: True,
}

View File

@ -650,3 +650,47 @@ async def test_options(
assert mock_chat.call_count == 1
args = mock_chat.call_args.kwargs
assert args.get("options") == expected_options
@pytest.mark.parametrize(
"think",
[False, True],
ids=["no_think", "think"],
)
async def test_reasoning_filter(
hass: HomeAssistant,
mock_config_entry: MockConfigEntry,
mock_init_component,
think: bool,
) -> None:
"""Test that think option is passed correctly to client."""
agent_id = mock_config_entry.entry_id
entry = MockConfigEntry()
entry.add_to_hass(hass)
hass.config_entries.async_update_entry(
mock_config_entry,
options={
ollama.CONF_THINK: think,
},
)
with patch(
"ollama.AsyncClient.chat",
return_value=stream_generator(
{"message": {"role": "assistant", "content": "test response"}}
),
) as mock_chat:
await conversation.async_converse(
hass,
"test message",
None,
Context(),
agent_id=agent_id,
)
# Assert called with the expected think value
for call in mock_chat.call_args_list:
kwargs = call.kwargs
assert kwargs.get("think") == think