mirror of
https://github.com/home-assistant/core.git
synced 2025-07-09 14:27:07 +00:00
Add config option for controlling Ollama think parameter (#146000)
* Add config option for controlling Ollama think parameter Allows enabling or disable thinking for supported models. Neither option will dislay thinking content in the chat. Future support for displaying think content will require frontend changes for formatting. * Add thinking strings
This commit is contained in:
parent
7ad1e756e7
commit
e3f7e5706b
@ -21,6 +21,7 @@ from .const import (
|
||||
CONF_MODEL,
|
||||
CONF_NUM_CTX,
|
||||
CONF_PROMPT,
|
||||
CONF_THINK,
|
||||
DEFAULT_TIMEOUT,
|
||||
DOMAIN,
|
||||
)
|
||||
@ -33,6 +34,7 @@ __all__ = [
|
||||
"CONF_MODEL",
|
||||
"CONF_NUM_CTX",
|
||||
"CONF_PROMPT",
|
||||
"CONF_THINK",
|
||||
"CONF_URL",
|
||||
"DOMAIN",
|
||||
]
|
||||
|
@ -22,6 +22,7 @@ from homeassistant.const import CONF_LLM_HASS_API, CONF_URL
|
||||
from homeassistant.core import HomeAssistant
|
||||
from homeassistant.helpers import llm
|
||||
from homeassistant.helpers.selector import (
|
||||
BooleanSelector,
|
||||
NumberSelector,
|
||||
NumberSelectorConfig,
|
||||
NumberSelectorMode,
|
||||
@ -41,10 +42,12 @@ from .const import (
|
||||
CONF_MODEL,
|
||||
CONF_NUM_CTX,
|
||||
CONF_PROMPT,
|
||||
CONF_THINK,
|
||||
DEFAULT_KEEP_ALIVE,
|
||||
DEFAULT_MAX_HISTORY,
|
||||
DEFAULT_MODEL,
|
||||
DEFAULT_NUM_CTX,
|
||||
DEFAULT_THINK,
|
||||
DEFAULT_TIMEOUT,
|
||||
DOMAIN,
|
||||
MAX_NUM_CTX,
|
||||
@ -280,6 +283,12 @@ def ollama_config_option_schema(
|
||||
min=-1, max=sys.maxsize, step=1, mode=NumberSelectorMode.BOX
|
||||
)
|
||||
),
|
||||
vol.Optional(
|
||||
CONF_THINK,
|
||||
description={
|
||||
"suggested_value": options.get("think", DEFAULT_THINK),
|
||||
},
|
||||
): BooleanSelector(),
|
||||
}
|
||||
|
||||
|
||||
|
@ -4,6 +4,7 @@ DOMAIN = "ollama"
|
||||
|
||||
CONF_MODEL = "model"
|
||||
CONF_PROMPT = "prompt"
|
||||
CONF_THINK = "think"
|
||||
|
||||
CONF_KEEP_ALIVE = "keep_alive"
|
||||
DEFAULT_KEEP_ALIVE = -1 # seconds. -1 = indefinite, 0 = never
|
||||
@ -15,6 +16,7 @@ CONF_NUM_CTX = "num_ctx"
|
||||
DEFAULT_NUM_CTX = 8192
|
||||
MIN_NUM_CTX = 2048
|
||||
MAX_NUM_CTX = 131072
|
||||
DEFAULT_THINK = False
|
||||
|
||||
CONF_MAX_HISTORY = "max_history"
|
||||
DEFAULT_MAX_HISTORY = 20
|
||||
|
@ -24,6 +24,7 @@ from .const import (
|
||||
CONF_MODEL,
|
||||
CONF_NUM_CTX,
|
||||
CONF_PROMPT,
|
||||
CONF_THINK,
|
||||
DEFAULT_KEEP_ALIVE,
|
||||
DEFAULT_MAX_HISTORY,
|
||||
DEFAULT_NUM_CTX,
|
||||
@ -256,6 +257,7 @@ class OllamaConversationEntity(
|
||||
# keep_alive requires specifying unit. In this case, seconds
|
||||
keep_alive=f"{settings.get(CONF_KEEP_ALIVE, DEFAULT_KEEP_ALIVE)}s",
|
||||
options={CONF_NUM_CTX: settings.get(CONF_NUM_CTX, DEFAULT_NUM_CTX)},
|
||||
think=settings.get(CONF_THINK),
|
||||
)
|
||||
except (ollama.RequestError, ollama.ResponseError) as err:
|
||||
_LOGGER.error("Unexpected error talking to Ollama server: %s", err)
|
||||
|
@ -30,12 +30,14 @@
|
||||
"llm_hass_api": "[%key:common::config_flow::data::llm_hass_api%]",
|
||||
"max_history": "Max history messages",
|
||||
"num_ctx": "Context window size",
|
||||
"keep_alive": "Keep alive"
|
||||
"keep_alive": "Keep alive",
|
||||
"think": "Think before responding"
|
||||
},
|
||||
"data_description": {
|
||||
"prompt": "Instruct how the LLM should respond. This can be a template.",
|
||||
"keep_alive": "Duration in seconds for Ollama to keep model in memory. -1 = indefinite, 0 = never.",
|
||||
"num_ctx": "Maximum number of text tokens the model can process. Lower to reduce Ollama RAM, or increase for a large number of exposed entities."
|
||||
"num_ctx": "Maximum number of text tokens the model can process. Lower to reduce Ollama RAM, or increase for a large number of exposed entities.",
|
||||
"think": "If enabled, the LLM will think before responding. This can improve response quality but may increase latency."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -168,6 +168,7 @@ async def test_options(
|
||||
ollama.CONF_PROMPT: "test prompt",
|
||||
ollama.CONF_MAX_HISTORY: 100,
|
||||
ollama.CONF_NUM_CTX: 32768,
|
||||
ollama.CONF_THINK: True,
|
||||
},
|
||||
)
|
||||
await hass.async_block_till_done()
|
||||
@ -176,6 +177,7 @@ async def test_options(
|
||||
ollama.CONF_PROMPT: "test prompt",
|
||||
ollama.CONF_MAX_HISTORY: 100,
|
||||
ollama.CONF_NUM_CTX: 32768,
|
||||
ollama.CONF_THINK: True,
|
||||
}
|
||||
|
||||
|
||||
|
@ -650,3 +650,47 @@ async def test_options(
|
||||
assert mock_chat.call_count == 1
|
||||
args = mock_chat.call_args.kwargs
|
||||
assert args.get("options") == expected_options
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"think",
|
||||
[False, True],
|
||||
ids=["no_think", "think"],
|
||||
)
|
||||
async def test_reasoning_filter(
|
||||
hass: HomeAssistant,
|
||||
mock_config_entry: MockConfigEntry,
|
||||
mock_init_component,
|
||||
think: bool,
|
||||
) -> None:
|
||||
"""Test that think option is passed correctly to client."""
|
||||
|
||||
agent_id = mock_config_entry.entry_id
|
||||
entry = MockConfigEntry()
|
||||
entry.add_to_hass(hass)
|
||||
|
||||
hass.config_entries.async_update_entry(
|
||||
mock_config_entry,
|
||||
options={
|
||||
ollama.CONF_THINK: think,
|
||||
},
|
||||
)
|
||||
|
||||
with patch(
|
||||
"ollama.AsyncClient.chat",
|
||||
return_value=stream_generator(
|
||||
{"message": {"role": "assistant", "content": "test response"}}
|
||||
),
|
||||
) as mock_chat:
|
||||
await conversation.async_converse(
|
||||
hass,
|
||||
"test message",
|
||||
None,
|
||||
Context(),
|
||||
agent_id=agent_id,
|
||||
)
|
||||
|
||||
# Assert called with the expected think value
|
||||
for call in mock_chat.call_args_list:
|
||||
kwargs = call.kwargs
|
||||
assert kwargs.get("think") == think
|
||||
|
Loading…
x
Reference in New Issue
Block a user