Add config option for controlling Ollama think parameter (#146000)

* Add config option for controlling Ollama think parameter

Allows enabling or disable thinking for supported models. Neither option
will dislay thinking content in the chat. Future support for displaying
think content will require frontend changes for formatting.

* Add thinking strings
This commit is contained in:
Ian 2025-06-03 20:42:16 -07:00 committed by GitHub
parent 7ad1e756e7
commit e3f7e5706b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 65 additions and 2 deletions

View File

@ -21,6 +21,7 @@ from .const import (
CONF_MODEL, CONF_MODEL,
CONF_NUM_CTX, CONF_NUM_CTX,
CONF_PROMPT, CONF_PROMPT,
CONF_THINK,
DEFAULT_TIMEOUT, DEFAULT_TIMEOUT,
DOMAIN, DOMAIN,
) )
@ -33,6 +34,7 @@ __all__ = [
"CONF_MODEL", "CONF_MODEL",
"CONF_NUM_CTX", "CONF_NUM_CTX",
"CONF_PROMPT", "CONF_PROMPT",
"CONF_THINK",
"CONF_URL", "CONF_URL",
"DOMAIN", "DOMAIN",
] ]

View File

@ -22,6 +22,7 @@ from homeassistant.const import CONF_LLM_HASS_API, CONF_URL
from homeassistant.core import HomeAssistant from homeassistant.core import HomeAssistant
from homeassistant.helpers import llm from homeassistant.helpers import llm
from homeassistant.helpers.selector import ( from homeassistant.helpers.selector import (
BooleanSelector,
NumberSelector, NumberSelector,
NumberSelectorConfig, NumberSelectorConfig,
NumberSelectorMode, NumberSelectorMode,
@ -41,10 +42,12 @@ from .const import (
CONF_MODEL, CONF_MODEL,
CONF_NUM_CTX, CONF_NUM_CTX,
CONF_PROMPT, CONF_PROMPT,
CONF_THINK,
DEFAULT_KEEP_ALIVE, DEFAULT_KEEP_ALIVE,
DEFAULT_MAX_HISTORY, DEFAULT_MAX_HISTORY,
DEFAULT_MODEL, DEFAULT_MODEL,
DEFAULT_NUM_CTX, DEFAULT_NUM_CTX,
DEFAULT_THINK,
DEFAULT_TIMEOUT, DEFAULT_TIMEOUT,
DOMAIN, DOMAIN,
MAX_NUM_CTX, MAX_NUM_CTX,
@ -280,6 +283,12 @@ def ollama_config_option_schema(
min=-1, max=sys.maxsize, step=1, mode=NumberSelectorMode.BOX min=-1, max=sys.maxsize, step=1, mode=NumberSelectorMode.BOX
) )
), ),
vol.Optional(
CONF_THINK,
description={
"suggested_value": options.get("think", DEFAULT_THINK),
},
): BooleanSelector(),
} }

View File

@ -4,6 +4,7 @@ DOMAIN = "ollama"
CONF_MODEL = "model" CONF_MODEL = "model"
CONF_PROMPT = "prompt" CONF_PROMPT = "prompt"
CONF_THINK = "think"
CONF_KEEP_ALIVE = "keep_alive" CONF_KEEP_ALIVE = "keep_alive"
DEFAULT_KEEP_ALIVE = -1 # seconds. -1 = indefinite, 0 = never DEFAULT_KEEP_ALIVE = -1 # seconds. -1 = indefinite, 0 = never
@ -15,6 +16,7 @@ CONF_NUM_CTX = "num_ctx"
DEFAULT_NUM_CTX = 8192 DEFAULT_NUM_CTX = 8192
MIN_NUM_CTX = 2048 MIN_NUM_CTX = 2048
MAX_NUM_CTX = 131072 MAX_NUM_CTX = 131072
DEFAULT_THINK = False
CONF_MAX_HISTORY = "max_history" CONF_MAX_HISTORY = "max_history"
DEFAULT_MAX_HISTORY = 20 DEFAULT_MAX_HISTORY = 20

View File

@ -24,6 +24,7 @@ from .const import (
CONF_MODEL, CONF_MODEL,
CONF_NUM_CTX, CONF_NUM_CTX,
CONF_PROMPT, CONF_PROMPT,
CONF_THINK,
DEFAULT_KEEP_ALIVE, DEFAULT_KEEP_ALIVE,
DEFAULT_MAX_HISTORY, DEFAULT_MAX_HISTORY,
DEFAULT_NUM_CTX, DEFAULT_NUM_CTX,
@ -256,6 +257,7 @@ class OllamaConversationEntity(
# keep_alive requires specifying unit. In this case, seconds # keep_alive requires specifying unit. In this case, seconds
keep_alive=f"{settings.get(CONF_KEEP_ALIVE, DEFAULT_KEEP_ALIVE)}s", keep_alive=f"{settings.get(CONF_KEEP_ALIVE, DEFAULT_KEEP_ALIVE)}s",
options={CONF_NUM_CTX: settings.get(CONF_NUM_CTX, DEFAULT_NUM_CTX)}, options={CONF_NUM_CTX: settings.get(CONF_NUM_CTX, DEFAULT_NUM_CTX)},
think=settings.get(CONF_THINK),
) )
except (ollama.RequestError, ollama.ResponseError) as err: except (ollama.RequestError, ollama.ResponseError) as err:
_LOGGER.error("Unexpected error talking to Ollama server: %s", err) _LOGGER.error("Unexpected error talking to Ollama server: %s", err)

View File

@ -30,12 +30,14 @@
"llm_hass_api": "[%key:common::config_flow::data::llm_hass_api%]", "llm_hass_api": "[%key:common::config_flow::data::llm_hass_api%]",
"max_history": "Max history messages", "max_history": "Max history messages",
"num_ctx": "Context window size", "num_ctx": "Context window size",
"keep_alive": "Keep alive" "keep_alive": "Keep alive",
"think": "Think before responding"
}, },
"data_description": { "data_description": {
"prompt": "Instruct how the LLM should respond. This can be a template.", "prompt": "Instruct how the LLM should respond. This can be a template.",
"keep_alive": "Duration in seconds for Ollama to keep model in memory. -1 = indefinite, 0 = never.", "keep_alive": "Duration in seconds for Ollama to keep model in memory. -1 = indefinite, 0 = never.",
"num_ctx": "Maximum number of text tokens the model can process. Lower to reduce Ollama RAM, or increase for a large number of exposed entities." "num_ctx": "Maximum number of text tokens the model can process. Lower to reduce Ollama RAM, or increase for a large number of exposed entities.",
"think": "If enabled, the LLM will think before responding. This can improve response quality but may increase latency."
} }
} }
} }

View File

@ -168,6 +168,7 @@ async def test_options(
ollama.CONF_PROMPT: "test prompt", ollama.CONF_PROMPT: "test prompt",
ollama.CONF_MAX_HISTORY: 100, ollama.CONF_MAX_HISTORY: 100,
ollama.CONF_NUM_CTX: 32768, ollama.CONF_NUM_CTX: 32768,
ollama.CONF_THINK: True,
}, },
) )
await hass.async_block_till_done() await hass.async_block_till_done()
@ -176,6 +177,7 @@ async def test_options(
ollama.CONF_PROMPT: "test prompt", ollama.CONF_PROMPT: "test prompt",
ollama.CONF_MAX_HISTORY: 100, ollama.CONF_MAX_HISTORY: 100,
ollama.CONF_NUM_CTX: 32768, ollama.CONF_NUM_CTX: 32768,
ollama.CONF_THINK: True,
} }

View File

@ -650,3 +650,47 @@ async def test_options(
assert mock_chat.call_count == 1 assert mock_chat.call_count == 1
args = mock_chat.call_args.kwargs args = mock_chat.call_args.kwargs
assert args.get("options") == expected_options assert args.get("options") == expected_options
@pytest.mark.parametrize(
"think",
[False, True],
ids=["no_think", "think"],
)
async def test_reasoning_filter(
hass: HomeAssistant,
mock_config_entry: MockConfigEntry,
mock_init_component,
think: bool,
) -> None:
"""Test that think option is passed correctly to client."""
agent_id = mock_config_entry.entry_id
entry = MockConfigEntry()
entry.add_to_hass(hass)
hass.config_entries.async_update_entry(
mock_config_entry,
options={
ollama.CONF_THINK: think,
},
)
with patch(
"ollama.AsyncClient.chat",
return_value=stream_generator(
{"message": {"role": "assistant", "content": "test response"}}
),
) as mock_chat:
await conversation.async_converse(
hass,
"test message",
None,
Context(),
agent_id=agent_id,
)
# Assert called with the expected think value
for call in mock_chat.call_args_list:
kwargs = call.kwargs
assert kwargs.get("think") == think