Split Anthropic entity (#147770)

This commit is contained in:
Paulus Schoutsen 2025-06-30 21:47:44 +02:00 committed by GitHub
parent bf74ba990a
commit 38a7b21052
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 404 additions and 383 deletions

View File

@ -1,69 +1,17 @@
"""Conversation support for Anthropic."""
from collections.abc import AsyncGenerator, Callable, Iterable
import json
from typing import Any, Literal, cast
import anthropic
from anthropic import AsyncStream
from anthropic._types import NOT_GIVEN
from anthropic.types import (
InputJSONDelta,
MessageDeltaUsage,
MessageParam,
MessageStreamEvent,
RawContentBlockDeltaEvent,
RawContentBlockStartEvent,
RawContentBlockStopEvent,
RawMessageDeltaEvent,
RawMessageStartEvent,
RawMessageStopEvent,
RedactedThinkingBlock,
RedactedThinkingBlockParam,
SignatureDelta,
TextBlock,
TextBlockParam,
TextDelta,
ThinkingBlock,
ThinkingBlockParam,
ThinkingConfigDisabledParam,
ThinkingConfigEnabledParam,
ThinkingDelta,
ToolParam,
ToolResultBlockParam,
ToolUseBlock,
ToolUseBlockParam,
Usage,
)
from voluptuous_openapi import convert
from typing import Literal
from homeassistant.components import conversation
from homeassistant.config_entries import ConfigEntry, ConfigSubentry
from homeassistant.const import CONF_LLM_HASS_API, MATCH_ALL
from homeassistant.core import HomeAssistant
from homeassistant.exceptions import HomeAssistantError
from homeassistant.helpers import device_registry as dr, intent, llm
from homeassistant.helpers import intent
from homeassistant.helpers.entity_platform import AddConfigEntryEntitiesCallback
from . import AnthropicConfigEntry
from .const import (
CONF_CHAT_MODEL,
CONF_MAX_TOKENS,
CONF_PROMPT,
CONF_TEMPERATURE,
CONF_THINKING_BUDGET,
DOMAIN,
LOGGER,
MIN_THINKING_BUDGET,
RECOMMENDED_CHAT_MODEL,
RECOMMENDED_MAX_TOKENS,
RECOMMENDED_TEMPERATURE,
RECOMMENDED_THINKING_BUDGET,
THINKING_MODELS,
)
# Max number of back and forth with the LLM to generate a response
MAX_TOOL_ITERATIONS = 10
from .const import CONF_PROMPT, DOMAIN
from .entity import AnthropicBaseLLMEntity
async def async_setup_entry(
@ -82,253 +30,10 @@ async def async_setup_entry(
)
def _format_tool(
tool: llm.Tool, custom_serializer: Callable[[Any], Any] | None
) -> ToolParam:
"""Format tool specification."""
return ToolParam(
name=tool.name,
description=tool.description or "",
input_schema=convert(tool.parameters, custom_serializer=custom_serializer),
)
def _convert_content(
chat_content: Iterable[conversation.Content],
) -> list[MessageParam]:
"""Transform HA chat_log content into Anthropic API format."""
messages: list[MessageParam] = []
for content in chat_content:
if isinstance(content, conversation.ToolResultContent):
tool_result_block = ToolResultBlockParam(
type="tool_result",
tool_use_id=content.tool_call_id,
content=json.dumps(content.tool_result),
)
if not messages or messages[-1]["role"] != "user":
messages.append(
MessageParam(
role="user",
content=[tool_result_block],
)
)
elif isinstance(messages[-1]["content"], str):
messages[-1]["content"] = [
TextBlockParam(type="text", text=messages[-1]["content"]),
tool_result_block,
]
else:
messages[-1]["content"].append(tool_result_block) # type: ignore[attr-defined]
elif isinstance(content, conversation.UserContent):
# Combine consequent user messages
if not messages or messages[-1]["role"] != "user":
messages.append(
MessageParam(
role="user",
content=content.content,
)
)
elif isinstance(messages[-1]["content"], str):
messages[-1]["content"] = [
TextBlockParam(type="text", text=messages[-1]["content"]),
TextBlockParam(type="text", text=content.content),
]
else:
messages[-1]["content"].append( # type: ignore[attr-defined]
TextBlockParam(type="text", text=content.content)
)
elif isinstance(content, conversation.AssistantContent):
# Combine consequent assistant messages
if not messages or messages[-1]["role"] != "assistant":
messages.append(
MessageParam(
role="assistant",
content=[],
)
)
if content.content:
messages[-1]["content"].append( # type: ignore[union-attr]
TextBlockParam(type="text", text=content.content)
)
if content.tool_calls:
messages[-1]["content"].extend( # type: ignore[union-attr]
[
ToolUseBlockParam(
type="tool_use",
id=tool_call.id,
name=tool_call.tool_name,
input=tool_call.tool_args,
)
for tool_call in content.tool_calls
]
)
else:
# Note: We don't pass SystemContent here as its passed to the API as the prompt
raise TypeError(f"Unexpected content type: {type(content)}")
return messages
async def _transform_stream( # noqa: C901 - This is complex, but better to have it in one place
chat_log: conversation.ChatLog,
result: AsyncStream[MessageStreamEvent],
messages: list[MessageParam],
) -> AsyncGenerator[conversation.AssistantContentDeltaDict]:
"""Transform the response stream into HA format.
A typical stream of responses might look something like the following:
- RawMessageStartEvent with no content
- RawContentBlockStartEvent with an empty ThinkingBlock (if extended thinking is enabled)
- RawContentBlockDeltaEvent with a ThinkingDelta
- RawContentBlockDeltaEvent with a ThinkingDelta
- RawContentBlockDeltaEvent with a ThinkingDelta
- ...
- RawContentBlockDeltaEvent with a SignatureDelta
- RawContentBlockStopEvent
- RawContentBlockStartEvent with a RedactedThinkingBlock (occasionally)
- RawContentBlockStopEvent (RedactedThinkingBlock does not have a delta)
- RawContentBlockStartEvent with an empty TextBlock
- RawContentBlockDeltaEvent with a TextDelta
- RawContentBlockDeltaEvent with a TextDelta
- RawContentBlockDeltaEvent with a TextDelta
- ...
- RawContentBlockStopEvent
- RawContentBlockStartEvent with ToolUseBlock specifying the function name
- RawContentBlockDeltaEvent with a InputJSONDelta
- RawContentBlockDeltaEvent with a InputJSONDelta
- ...
- RawContentBlockStopEvent
- RawMessageDeltaEvent with a stop_reason='tool_use'
- RawMessageStopEvent(type='message_stop')
Each message could contain multiple blocks of the same type.
"""
if result is None:
raise TypeError("Expected a stream of messages")
current_message: MessageParam | None = None
current_block: (
TextBlockParam
| ToolUseBlockParam
| ThinkingBlockParam
| RedactedThinkingBlockParam
| None
) = None
current_tool_args: str
input_usage: Usage | None = None
async for response in result:
LOGGER.debug("Received response: %s", response)
if isinstance(response, RawMessageStartEvent):
if response.message.role != "assistant":
raise ValueError("Unexpected message role")
current_message = MessageParam(role=response.message.role, content=[])
input_usage = response.message.usage
elif isinstance(response, RawContentBlockStartEvent):
if isinstance(response.content_block, ToolUseBlock):
current_block = ToolUseBlockParam(
type="tool_use",
id=response.content_block.id,
name=response.content_block.name,
input="",
)
current_tool_args = ""
elif isinstance(response.content_block, TextBlock):
current_block = TextBlockParam(
type="text", text=response.content_block.text
)
yield {"role": "assistant"}
if response.content_block.text:
yield {"content": response.content_block.text}
elif isinstance(response.content_block, ThinkingBlock):
current_block = ThinkingBlockParam(
type="thinking",
thinking=response.content_block.thinking,
signature=response.content_block.signature,
)
elif isinstance(response.content_block, RedactedThinkingBlock):
current_block = RedactedThinkingBlockParam(
type="redacted_thinking", data=response.content_block.data
)
LOGGER.debug(
"Some of Claudes internal reasoning has been automatically "
"encrypted for safety reasons. This doesnt affect the quality of "
"responses"
)
elif isinstance(response, RawContentBlockDeltaEvent):
if current_block is None:
raise ValueError("Unexpected delta without a block")
if isinstance(response.delta, InputJSONDelta):
current_tool_args += response.delta.partial_json
elif isinstance(response.delta, TextDelta):
text_block = cast(TextBlockParam, current_block)
text_block["text"] += response.delta.text
yield {"content": response.delta.text}
elif isinstance(response.delta, ThinkingDelta):
thinking_block = cast(ThinkingBlockParam, current_block)
thinking_block["thinking"] += response.delta.thinking
elif isinstance(response.delta, SignatureDelta):
thinking_block = cast(ThinkingBlockParam, current_block)
thinking_block["signature"] += response.delta.signature
elif isinstance(response, RawContentBlockStopEvent):
if current_block is None:
raise ValueError("Unexpected stop event without a current block")
if current_block["type"] == "tool_use":
# tool block
tool_args = json.loads(current_tool_args) if current_tool_args else {}
current_block["input"] = tool_args
yield {
"tool_calls": [
llm.ToolInput(
id=current_block["id"],
tool_name=current_block["name"],
tool_args=tool_args,
)
]
}
elif current_block["type"] == "thinking":
# thinking block
LOGGER.debug("Thinking: %s", current_block["thinking"])
if current_message is None:
raise ValueError("Unexpected stop event without a current message")
current_message["content"].append(current_block) # type: ignore[union-attr]
current_block = None
elif isinstance(response, RawMessageDeltaEvent):
if (usage := response.usage) is not None:
chat_log.async_trace(_create_token_stats(input_usage, usage))
if response.delta.stop_reason == "refusal":
raise HomeAssistantError("Potential policy violation detected")
elif isinstance(response, RawMessageStopEvent):
if current_message is not None:
messages.append(current_message)
current_message = None
def _create_token_stats(
input_usage: Usage | None, response_usage: MessageDeltaUsage
) -> dict[str, Any]:
"""Create token stats for conversation agent tracing."""
input_tokens = 0
cached_input_tokens = 0
if input_usage:
input_tokens = input_usage.input_tokens
cached_input_tokens = input_usage.cache_creation_input_tokens or 0
output_tokens = response_usage.output_tokens
return {
"stats": {
"input_tokens": input_tokens,
"cached_input_tokens": cached_input_tokens,
"output_tokens": output_tokens,
}
}
class AnthropicConversationEntity(
conversation.ConversationEntity, conversation.AbstractConversationAgent
conversation.ConversationEntity,
conversation.AbstractConversationAgent,
AnthropicBaseLLMEntity,
):
"""Anthropic conversation agent."""
@ -336,17 +41,7 @@ class AnthropicConversationEntity(
def __init__(self, entry: AnthropicConfigEntry, subentry: ConfigSubentry) -> None:
"""Initialize the agent."""
self.entry = entry
self.subentry = subentry
self._attr_name = subentry.title
self._attr_unique_id = subentry.subentry_id
self._attr_device_info = dr.DeviceInfo(
identifiers={(DOMAIN, subentry.subentry_id)},
name=subentry.title,
manufacturer="Anthropic",
model="Claude",
entry_type=dr.DeviceEntryType.SERVICE,
)
super().__init__(entry, subentry)
if self.subentry.data.get(CONF_LLM_HASS_API):
self._attr_supported_features = (
conversation.ConversationEntityFeature.CONTROL
@ -395,73 +90,6 @@ class AnthropicConversationEntity(
continue_conversation=chat_log.continue_conversation,
)
async def _async_handle_chat_log(
self,
chat_log: conversation.ChatLog,
) -> None:
"""Generate an answer for the chat log."""
options = self.subentry.data
tools: list[ToolParam] | None = None
if chat_log.llm_api:
tools = [
_format_tool(tool, chat_log.llm_api.custom_serializer)
for tool in chat_log.llm_api.tools
]
system = chat_log.content[0]
if not isinstance(system, conversation.SystemContent):
raise TypeError("First message must be a system message")
messages = _convert_content(chat_log.content[1:])
client = self.entry.runtime_data
thinking_budget = options.get(CONF_THINKING_BUDGET, RECOMMENDED_THINKING_BUDGET)
model = options.get(CONF_CHAT_MODEL, RECOMMENDED_CHAT_MODEL)
# To prevent infinite loops, we limit the number of iterations
for _iteration in range(MAX_TOOL_ITERATIONS):
model_args = {
"model": model,
"messages": messages,
"tools": tools or NOT_GIVEN,
"max_tokens": options.get(CONF_MAX_TOKENS, RECOMMENDED_MAX_TOKENS),
"system": system.content,
"stream": True,
}
if model in THINKING_MODELS and thinking_budget >= MIN_THINKING_BUDGET:
model_args["thinking"] = ThinkingConfigEnabledParam(
type="enabled", budget_tokens=thinking_budget
)
else:
model_args["thinking"] = ThinkingConfigDisabledParam(type="disabled")
model_args["temperature"] = options.get(
CONF_TEMPERATURE, RECOMMENDED_TEMPERATURE
)
try:
stream = await client.messages.create(**model_args)
except anthropic.AnthropicError as err:
raise HomeAssistantError(
f"Sorry, I had a problem talking to Anthropic: {err}"
) from err
messages.extend(
_convert_content(
[
content
async for content in chat_log.async_add_delta_content_stream(
self.entity_id,
_transform_stream(chat_log, stream, messages),
)
if not isinstance(content, conversation.AssistantContent)
]
)
)
if not chat_log.unresponded_tool_results:
break
async def _async_entry_update_listener(
self, hass: HomeAssistant, entry: ConfigEntry
) -> None:

View File

@ -0,0 +1,393 @@
"""Base entity for Anthropic."""
from collections.abc import AsyncGenerator, Callable, Iterable
import json
from typing import Any, cast
import anthropic
from anthropic import AsyncStream
from anthropic._types import NOT_GIVEN
from anthropic.types import (
InputJSONDelta,
MessageDeltaUsage,
MessageParam,
MessageStreamEvent,
RawContentBlockDeltaEvent,
RawContentBlockStartEvent,
RawContentBlockStopEvent,
RawMessageDeltaEvent,
RawMessageStartEvent,
RawMessageStopEvent,
RedactedThinkingBlock,
RedactedThinkingBlockParam,
SignatureDelta,
TextBlock,
TextBlockParam,
TextDelta,
ThinkingBlock,
ThinkingBlockParam,
ThinkingConfigDisabledParam,
ThinkingConfigEnabledParam,
ThinkingDelta,
ToolParam,
ToolResultBlockParam,
ToolUseBlock,
ToolUseBlockParam,
Usage,
)
from voluptuous_openapi import convert
from homeassistant.components import conversation
from homeassistant.config_entries import ConfigSubentry
from homeassistant.exceptions import HomeAssistantError
from homeassistant.helpers import device_registry as dr, llm
from homeassistant.helpers.entity import Entity
from . import AnthropicConfigEntry
from .const import (
CONF_CHAT_MODEL,
CONF_MAX_TOKENS,
CONF_TEMPERATURE,
CONF_THINKING_BUDGET,
DOMAIN,
LOGGER,
MIN_THINKING_BUDGET,
RECOMMENDED_CHAT_MODEL,
RECOMMENDED_MAX_TOKENS,
RECOMMENDED_TEMPERATURE,
RECOMMENDED_THINKING_BUDGET,
THINKING_MODELS,
)
# Max number of back and forth with the LLM to generate a response
MAX_TOOL_ITERATIONS = 10
def _format_tool(
tool: llm.Tool, custom_serializer: Callable[[Any], Any] | None
) -> ToolParam:
"""Format tool specification."""
return ToolParam(
name=tool.name,
description=tool.description or "",
input_schema=convert(tool.parameters, custom_serializer=custom_serializer),
)
def _convert_content(
chat_content: Iterable[conversation.Content],
) -> list[MessageParam]:
"""Transform HA chat_log content into Anthropic API format."""
messages: list[MessageParam] = []
for content in chat_content:
if isinstance(content, conversation.ToolResultContent):
tool_result_block = ToolResultBlockParam(
type="tool_result",
tool_use_id=content.tool_call_id,
content=json.dumps(content.tool_result),
)
if not messages or messages[-1]["role"] != "user":
messages.append(
MessageParam(
role="user",
content=[tool_result_block],
)
)
elif isinstance(messages[-1]["content"], str):
messages[-1]["content"] = [
TextBlockParam(type="text", text=messages[-1]["content"]),
tool_result_block,
]
else:
messages[-1]["content"].append(tool_result_block) # type: ignore[attr-defined]
elif isinstance(content, conversation.UserContent):
# Combine consequent user messages
if not messages or messages[-1]["role"] != "user":
messages.append(
MessageParam(
role="user",
content=content.content,
)
)
elif isinstance(messages[-1]["content"], str):
messages[-1]["content"] = [
TextBlockParam(type="text", text=messages[-1]["content"]),
TextBlockParam(type="text", text=content.content),
]
else:
messages[-1]["content"].append( # type: ignore[attr-defined]
TextBlockParam(type="text", text=content.content)
)
elif isinstance(content, conversation.AssistantContent):
# Combine consequent assistant messages
if not messages or messages[-1]["role"] != "assistant":
messages.append(
MessageParam(
role="assistant",
content=[],
)
)
if content.content:
messages[-1]["content"].append( # type: ignore[union-attr]
TextBlockParam(type="text", text=content.content)
)
if content.tool_calls:
messages[-1]["content"].extend( # type: ignore[union-attr]
[
ToolUseBlockParam(
type="tool_use",
id=tool_call.id,
name=tool_call.tool_name,
input=tool_call.tool_args,
)
for tool_call in content.tool_calls
]
)
else:
# Note: We don't pass SystemContent here as its passed to the API as the prompt
raise TypeError(f"Unexpected content type: {type(content)}")
return messages
async def _transform_stream( # noqa: C901 - This is complex, but better to have it in one place
chat_log: conversation.ChatLog,
result: AsyncStream[MessageStreamEvent],
messages: list[MessageParam],
) -> AsyncGenerator[conversation.AssistantContentDeltaDict]:
"""Transform the response stream into HA format.
A typical stream of responses might look something like the following:
- RawMessageStartEvent with no content
- RawContentBlockStartEvent with an empty ThinkingBlock (if extended thinking is enabled)
- RawContentBlockDeltaEvent with a ThinkingDelta
- RawContentBlockDeltaEvent with a ThinkingDelta
- RawContentBlockDeltaEvent with a ThinkingDelta
- ...
- RawContentBlockDeltaEvent with a SignatureDelta
- RawContentBlockStopEvent
- RawContentBlockStartEvent with a RedactedThinkingBlock (occasionally)
- RawContentBlockStopEvent (RedactedThinkingBlock does not have a delta)
- RawContentBlockStartEvent with an empty TextBlock
- RawContentBlockDeltaEvent with a TextDelta
- RawContentBlockDeltaEvent with a TextDelta
- RawContentBlockDeltaEvent with a TextDelta
- ...
- RawContentBlockStopEvent
- RawContentBlockStartEvent with ToolUseBlock specifying the function name
- RawContentBlockDeltaEvent with a InputJSONDelta
- RawContentBlockDeltaEvent with a InputJSONDelta
- ...
- RawContentBlockStopEvent
- RawMessageDeltaEvent with a stop_reason='tool_use'
- RawMessageStopEvent(type='message_stop')
Each message could contain multiple blocks of the same type.
"""
if result is None:
raise TypeError("Expected a stream of messages")
current_message: MessageParam | None = None
current_block: (
TextBlockParam
| ToolUseBlockParam
| ThinkingBlockParam
| RedactedThinkingBlockParam
| None
) = None
current_tool_args: str
input_usage: Usage | None = None
async for response in result:
LOGGER.debug("Received response: %s", response)
if isinstance(response, RawMessageStartEvent):
if response.message.role != "assistant":
raise ValueError("Unexpected message role")
current_message = MessageParam(role=response.message.role, content=[])
input_usage = response.message.usage
elif isinstance(response, RawContentBlockStartEvent):
if isinstance(response.content_block, ToolUseBlock):
current_block = ToolUseBlockParam(
type="tool_use",
id=response.content_block.id,
name=response.content_block.name,
input="",
)
current_tool_args = ""
elif isinstance(response.content_block, TextBlock):
current_block = TextBlockParam(
type="text", text=response.content_block.text
)
yield {"role": "assistant"}
if response.content_block.text:
yield {"content": response.content_block.text}
elif isinstance(response.content_block, ThinkingBlock):
current_block = ThinkingBlockParam(
type="thinking",
thinking=response.content_block.thinking,
signature=response.content_block.signature,
)
elif isinstance(response.content_block, RedactedThinkingBlock):
current_block = RedactedThinkingBlockParam(
type="redacted_thinking", data=response.content_block.data
)
LOGGER.debug(
"Some of Claudes internal reasoning has been automatically "
"encrypted for safety reasons. This doesnt affect the quality of "
"responses"
)
elif isinstance(response, RawContentBlockDeltaEvent):
if current_block is None:
raise ValueError("Unexpected delta without a block")
if isinstance(response.delta, InputJSONDelta):
current_tool_args += response.delta.partial_json
elif isinstance(response.delta, TextDelta):
text_block = cast(TextBlockParam, current_block)
text_block["text"] += response.delta.text
yield {"content": response.delta.text}
elif isinstance(response.delta, ThinkingDelta):
thinking_block = cast(ThinkingBlockParam, current_block)
thinking_block["thinking"] += response.delta.thinking
elif isinstance(response.delta, SignatureDelta):
thinking_block = cast(ThinkingBlockParam, current_block)
thinking_block["signature"] += response.delta.signature
elif isinstance(response, RawContentBlockStopEvent):
if current_block is None:
raise ValueError("Unexpected stop event without a current block")
if current_block["type"] == "tool_use":
# tool block
tool_args = json.loads(current_tool_args) if current_tool_args else {}
current_block["input"] = tool_args
yield {
"tool_calls": [
llm.ToolInput(
id=current_block["id"],
tool_name=current_block["name"],
tool_args=tool_args,
)
]
}
elif current_block["type"] == "thinking":
# thinking block
LOGGER.debug("Thinking: %s", current_block["thinking"])
if current_message is None:
raise ValueError("Unexpected stop event without a current message")
current_message["content"].append(current_block) # type: ignore[union-attr]
current_block = None
elif isinstance(response, RawMessageDeltaEvent):
if (usage := response.usage) is not None:
chat_log.async_trace(_create_token_stats(input_usage, usage))
if response.delta.stop_reason == "refusal":
raise HomeAssistantError("Potential policy violation detected")
elif isinstance(response, RawMessageStopEvent):
if current_message is not None:
messages.append(current_message)
current_message = None
def _create_token_stats(
input_usage: Usage | None, response_usage: MessageDeltaUsage
) -> dict[str, Any]:
"""Create token stats for conversation agent tracing."""
input_tokens = 0
cached_input_tokens = 0
if input_usage:
input_tokens = input_usage.input_tokens
cached_input_tokens = input_usage.cache_creation_input_tokens or 0
output_tokens = response_usage.output_tokens
return {
"stats": {
"input_tokens": input_tokens,
"cached_input_tokens": cached_input_tokens,
"output_tokens": output_tokens,
}
}
class AnthropicBaseLLMEntity(Entity):
"""Anthropic base LLM entity."""
def __init__(self, entry: AnthropicConfigEntry, subentry: ConfigSubentry) -> None:
"""Initialize the entity."""
self.entry = entry
self.subentry = subentry
self._attr_name = subentry.title
self._attr_unique_id = subentry.subentry_id
self._attr_device_info = dr.DeviceInfo(
identifiers={(DOMAIN, subentry.subentry_id)},
name=subentry.title,
manufacturer="Anthropic",
model="Claude",
entry_type=dr.DeviceEntryType.SERVICE,
)
async def _async_handle_chat_log(
self,
chat_log: conversation.ChatLog,
) -> None:
"""Generate an answer for the chat log."""
options = self.subentry.data
tools: list[ToolParam] | None = None
if chat_log.llm_api:
tools = [
_format_tool(tool, chat_log.llm_api.custom_serializer)
for tool in chat_log.llm_api.tools
]
system = chat_log.content[0]
if not isinstance(system, conversation.SystemContent):
raise TypeError("First message must be a system message")
messages = _convert_content(chat_log.content[1:])
client = self.entry.runtime_data
thinking_budget = options.get(CONF_THINKING_BUDGET, RECOMMENDED_THINKING_BUDGET)
model = options.get(CONF_CHAT_MODEL, RECOMMENDED_CHAT_MODEL)
# To prevent infinite loops, we limit the number of iterations
for _iteration in range(MAX_TOOL_ITERATIONS):
model_args = {
"model": model,
"messages": messages,
"tools": tools or NOT_GIVEN,
"max_tokens": options.get(CONF_MAX_TOKENS, RECOMMENDED_MAX_TOKENS),
"system": system.content,
"stream": True,
}
if model in THINKING_MODELS and thinking_budget >= MIN_THINKING_BUDGET:
model_args["thinking"] = ThinkingConfigEnabledParam(
type="enabled", budget_tokens=thinking_budget
)
else:
model_args["thinking"] = ThinkingConfigDisabledParam(type="disabled")
model_args["temperature"] = options.get(
CONF_TEMPERATURE, RECOMMENDED_TEMPERATURE
)
try:
stream = await client.messages.create(**model_args)
except anthropic.AnthropicError as err:
raise HomeAssistantError(
f"Sorry, I had a problem talking to Anthropic: {err}"
) from err
messages.extend(
_convert_content(
[
content
async for content in chat_log.async_add_delta_content_stream(
self.entity_id,
_transform_stream(chat_log, stream, messages),
)
if not isinstance(content, conversation.AssistantContent)
]
)
)
if not chat_log.unresponded_tool_results:
break

View File

@ -316,7 +316,7 @@ async def test_conversation_agent(
assert agent.supported_languages == "*"
@patch("homeassistant.components.anthropic.conversation.llm.AssistAPI._async_get_tools")
@patch("homeassistant.components.anthropic.entity.llm.AssistAPI._async_get_tools")
@pytest.mark.parametrize(
("tool_call_json_parts", "expected_call_tool_args"),
[
@ -430,7 +430,7 @@ async def test_function_call(
)
@patch("homeassistant.components.anthropic.conversation.llm.AssistAPI._async_get_tools")
@patch("homeassistant.components.anthropic.entity.llm.AssistAPI._async_get_tools")
async def test_function_exception(
mock_get_tools,
hass: HomeAssistant,
@ -760,7 +760,7 @@ async def test_redacted_thinking(
assert chat_log.content[2].content == "How can I help you today?"
@patch("homeassistant.components.anthropic.conversation.llm.AssistAPI._async_get_tools")
@patch("homeassistant.components.anthropic.entity.llm.AssistAPI._async_get_tools")
async def test_extended_thinking_tool_call(
mock_get_tools,
hass: HomeAssistant,