mirror of
https://github.com/home-assistant/core.git
synced 2025-07-10 23:07:09 +00:00
Add config option for controlling Ollama think parameter (#146000)
* Add config option for controlling Ollama think parameter Allows enabling or disable thinking for supported models. Neither option will dislay thinking content in the chat. Future support for displaying think content will require frontend changes for formatting. * Add thinking strings
This commit is contained in:
parent
7ad1e756e7
commit
e3f7e5706b
@ -21,6 +21,7 @@ from .const import (
|
|||||||
CONF_MODEL,
|
CONF_MODEL,
|
||||||
CONF_NUM_CTX,
|
CONF_NUM_CTX,
|
||||||
CONF_PROMPT,
|
CONF_PROMPT,
|
||||||
|
CONF_THINK,
|
||||||
DEFAULT_TIMEOUT,
|
DEFAULT_TIMEOUT,
|
||||||
DOMAIN,
|
DOMAIN,
|
||||||
)
|
)
|
||||||
@ -33,6 +34,7 @@ __all__ = [
|
|||||||
"CONF_MODEL",
|
"CONF_MODEL",
|
||||||
"CONF_NUM_CTX",
|
"CONF_NUM_CTX",
|
||||||
"CONF_PROMPT",
|
"CONF_PROMPT",
|
||||||
|
"CONF_THINK",
|
||||||
"CONF_URL",
|
"CONF_URL",
|
||||||
"DOMAIN",
|
"DOMAIN",
|
||||||
]
|
]
|
||||||
|
@ -22,6 +22,7 @@ from homeassistant.const import CONF_LLM_HASS_API, CONF_URL
|
|||||||
from homeassistant.core import HomeAssistant
|
from homeassistant.core import HomeAssistant
|
||||||
from homeassistant.helpers import llm
|
from homeassistant.helpers import llm
|
||||||
from homeassistant.helpers.selector import (
|
from homeassistant.helpers.selector import (
|
||||||
|
BooleanSelector,
|
||||||
NumberSelector,
|
NumberSelector,
|
||||||
NumberSelectorConfig,
|
NumberSelectorConfig,
|
||||||
NumberSelectorMode,
|
NumberSelectorMode,
|
||||||
@ -41,10 +42,12 @@ from .const import (
|
|||||||
CONF_MODEL,
|
CONF_MODEL,
|
||||||
CONF_NUM_CTX,
|
CONF_NUM_CTX,
|
||||||
CONF_PROMPT,
|
CONF_PROMPT,
|
||||||
|
CONF_THINK,
|
||||||
DEFAULT_KEEP_ALIVE,
|
DEFAULT_KEEP_ALIVE,
|
||||||
DEFAULT_MAX_HISTORY,
|
DEFAULT_MAX_HISTORY,
|
||||||
DEFAULT_MODEL,
|
DEFAULT_MODEL,
|
||||||
DEFAULT_NUM_CTX,
|
DEFAULT_NUM_CTX,
|
||||||
|
DEFAULT_THINK,
|
||||||
DEFAULT_TIMEOUT,
|
DEFAULT_TIMEOUT,
|
||||||
DOMAIN,
|
DOMAIN,
|
||||||
MAX_NUM_CTX,
|
MAX_NUM_CTX,
|
||||||
@ -280,6 +283,12 @@ def ollama_config_option_schema(
|
|||||||
min=-1, max=sys.maxsize, step=1, mode=NumberSelectorMode.BOX
|
min=-1, max=sys.maxsize, step=1, mode=NumberSelectorMode.BOX
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
|
vol.Optional(
|
||||||
|
CONF_THINK,
|
||||||
|
description={
|
||||||
|
"suggested_value": options.get("think", DEFAULT_THINK),
|
||||||
|
},
|
||||||
|
): BooleanSelector(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ DOMAIN = "ollama"
|
|||||||
|
|
||||||
CONF_MODEL = "model"
|
CONF_MODEL = "model"
|
||||||
CONF_PROMPT = "prompt"
|
CONF_PROMPT = "prompt"
|
||||||
|
CONF_THINK = "think"
|
||||||
|
|
||||||
CONF_KEEP_ALIVE = "keep_alive"
|
CONF_KEEP_ALIVE = "keep_alive"
|
||||||
DEFAULT_KEEP_ALIVE = -1 # seconds. -1 = indefinite, 0 = never
|
DEFAULT_KEEP_ALIVE = -1 # seconds. -1 = indefinite, 0 = never
|
||||||
@ -15,6 +16,7 @@ CONF_NUM_CTX = "num_ctx"
|
|||||||
DEFAULT_NUM_CTX = 8192
|
DEFAULT_NUM_CTX = 8192
|
||||||
MIN_NUM_CTX = 2048
|
MIN_NUM_CTX = 2048
|
||||||
MAX_NUM_CTX = 131072
|
MAX_NUM_CTX = 131072
|
||||||
|
DEFAULT_THINK = False
|
||||||
|
|
||||||
CONF_MAX_HISTORY = "max_history"
|
CONF_MAX_HISTORY = "max_history"
|
||||||
DEFAULT_MAX_HISTORY = 20
|
DEFAULT_MAX_HISTORY = 20
|
||||||
|
@ -24,6 +24,7 @@ from .const import (
|
|||||||
CONF_MODEL,
|
CONF_MODEL,
|
||||||
CONF_NUM_CTX,
|
CONF_NUM_CTX,
|
||||||
CONF_PROMPT,
|
CONF_PROMPT,
|
||||||
|
CONF_THINK,
|
||||||
DEFAULT_KEEP_ALIVE,
|
DEFAULT_KEEP_ALIVE,
|
||||||
DEFAULT_MAX_HISTORY,
|
DEFAULT_MAX_HISTORY,
|
||||||
DEFAULT_NUM_CTX,
|
DEFAULT_NUM_CTX,
|
||||||
@ -256,6 +257,7 @@ class OllamaConversationEntity(
|
|||||||
# keep_alive requires specifying unit. In this case, seconds
|
# keep_alive requires specifying unit. In this case, seconds
|
||||||
keep_alive=f"{settings.get(CONF_KEEP_ALIVE, DEFAULT_KEEP_ALIVE)}s",
|
keep_alive=f"{settings.get(CONF_KEEP_ALIVE, DEFAULT_KEEP_ALIVE)}s",
|
||||||
options={CONF_NUM_CTX: settings.get(CONF_NUM_CTX, DEFAULT_NUM_CTX)},
|
options={CONF_NUM_CTX: settings.get(CONF_NUM_CTX, DEFAULT_NUM_CTX)},
|
||||||
|
think=settings.get(CONF_THINK),
|
||||||
)
|
)
|
||||||
except (ollama.RequestError, ollama.ResponseError) as err:
|
except (ollama.RequestError, ollama.ResponseError) as err:
|
||||||
_LOGGER.error("Unexpected error talking to Ollama server: %s", err)
|
_LOGGER.error("Unexpected error talking to Ollama server: %s", err)
|
||||||
|
@ -30,12 +30,14 @@
|
|||||||
"llm_hass_api": "[%key:common::config_flow::data::llm_hass_api%]",
|
"llm_hass_api": "[%key:common::config_flow::data::llm_hass_api%]",
|
||||||
"max_history": "Max history messages",
|
"max_history": "Max history messages",
|
||||||
"num_ctx": "Context window size",
|
"num_ctx": "Context window size",
|
||||||
"keep_alive": "Keep alive"
|
"keep_alive": "Keep alive",
|
||||||
|
"think": "Think before responding"
|
||||||
},
|
},
|
||||||
"data_description": {
|
"data_description": {
|
||||||
"prompt": "Instruct how the LLM should respond. This can be a template.",
|
"prompt": "Instruct how the LLM should respond. This can be a template.",
|
||||||
"keep_alive": "Duration in seconds for Ollama to keep model in memory. -1 = indefinite, 0 = never.",
|
"keep_alive": "Duration in seconds for Ollama to keep model in memory. -1 = indefinite, 0 = never.",
|
||||||
"num_ctx": "Maximum number of text tokens the model can process. Lower to reduce Ollama RAM, or increase for a large number of exposed entities."
|
"num_ctx": "Maximum number of text tokens the model can process. Lower to reduce Ollama RAM, or increase for a large number of exposed entities.",
|
||||||
|
"think": "If enabled, the LLM will think before responding. This can improve response quality but may increase latency."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -168,6 +168,7 @@ async def test_options(
|
|||||||
ollama.CONF_PROMPT: "test prompt",
|
ollama.CONF_PROMPT: "test prompt",
|
||||||
ollama.CONF_MAX_HISTORY: 100,
|
ollama.CONF_MAX_HISTORY: 100,
|
||||||
ollama.CONF_NUM_CTX: 32768,
|
ollama.CONF_NUM_CTX: 32768,
|
||||||
|
ollama.CONF_THINK: True,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
await hass.async_block_till_done()
|
await hass.async_block_till_done()
|
||||||
@ -176,6 +177,7 @@ async def test_options(
|
|||||||
ollama.CONF_PROMPT: "test prompt",
|
ollama.CONF_PROMPT: "test prompt",
|
||||||
ollama.CONF_MAX_HISTORY: 100,
|
ollama.CONF_MAX_HISTORY: 100,
|
||||||
ollama.CONF_NUM_CTX: 32768,
|
ollama.CONF_NUM_CTX: 32768,
|
||||||
|
ollama.CONF_THINK: True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -650,3 +650,47 @@ async def test_options(
|
|||||||
assert mock_chat.call_count == 1
|
assert mock_chat.call_count == 1
|
||||||
args = mock_chat.call_args.kwargs
|
args = mock_chat.call_args.kwargs
|
||||||
assert args.get("options") == expected_options
|
assert args.get("options") == expected_options
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"think",
|
||||||
|
[False, True],
|
||||||
|
ids=["no_think", "think"],
|
||||||
|
)
|
||||||
|
async def test_reasoning_filter(
|
||||||
|
hass: HomeAssistant,
|
||||||
|
mock_config_entry: MockConfigEntry,
|
||||||
|
mock_init_component,
|
||||||
|
think: bool,
|
||||||
|
) -> None:
|
||||||
|
"""Test that think option is passed correctly to client."""
|
||||||
|
|
||||||
|
agent_id = mock_config_entry.entry_id
|
||||||
|
entry = MockConfigEntry()
|
||||||
|
entry.add_to_hass(hass)
|
||||||
|
|
||||||
|
hass.config_entries.async_update_entry(
|
||||||
|
mock_config_entry,
|
||||||
|
options={
|
||||||
|
ollama.CONF_THINK: think,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"ollama.AsyncClient.chat",
|
||||||
|
return_value=stream_generator(
|
||||||
|
{"message": {"role": "assistant", "content": "test response"}}
|
||||||
|
),
|
||||||
|
) as mock_chat:
|
||||||
|
await conversation.async_converse(
|
||||||
|
hass,
|
||||||
|
"test message",
|
||||||
|
None,
|
||||||
|
Context(),
|
||||||
|
agent_id=agent_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Assert called with the expected think value
|
||||||
|
for call in mock_chat.call_args_list:
|
||||||
|
kwargs = call.kwargs
|
||||||
|
assert kwargs.get("think") == think
|
||||||
|
Loading…
x
Reference in New Issue
Block a user