Files
core/homeassistant/components/anthropic/entity.py
2025-11-10 14:01:28 +01:00

810 lines
32 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Base entity for Anthropic."""
import base64
from collections.abc import AsyncGenerator, Callable, Iterable
from dataclasses import dataclass, field
import json
from mimetypes import guess_file_type
from pathlib import Path
from typing import Any
import anthropic
from anthropic import AsyncStream
from anthropic.types import (
Base64ImageSourceParam,
Base64PDFSourceParam,
CitationsDelta,
CitationsWebSearchResultLocation,
CitationWebSearchResultLocationParam,
ContentBlockParam,
DocumentBlockParam,
ImageBlockParam,
InputJSONDelta,
MessageDeltaUsage,
MessageParam,
MessageStreamEvent,
RawContentBlockDeltaEvent,
RawContentBlockStartEvent,
RawContentBlockStopEvent,
RawMessageDeltaEvent,
RawMessageStartEvent,
RawMessageStopEvent,
RedactedThinkingBlock,
RedactedThinkingBlockParam,
ServerToolUseBlock,
ServerToolUseBlockParam,
SignatureDelta,
TextBlock,
TextBlockParam,
TextCitation,
TextCitationParam,
TextDelta,
ThinkingBlock,
ThinkingBlockParam,
ThinkingConfigDisabledParam,
ThinkingConfigEnabledParam,
ThinkingDelta,
ToolChoiceAnyParam,
ToolChoiceAutoParam,
ToolChoiceToolParam,
ToolParam,
ToolResultBlockParam,
ToolUnionParam,
ToolUseBlock,
ToolUseBlockParam,
Usage,
WebSearchTool20250305Param,
WebSearchToolRequestErrorParam,
WebSearchToolResultBlock,
WebSearchToolResultBlockParam,
WebSearchToolResultError,
)
from anthropic.types.message_create_params import MessageCreateParamsStreaming
import voluptuous as vol
from voluptuous_openapi import convert
from homeassistant.components import conversation
from homeassistant.config_entries import ConfigSubentry
from homeassistant.core import HomeAssistant
from homeassistant.exceptions import HomeAssistantError
from homeassistant.helpers import device_registry as dr, llm
from homeassistant.helpers.entity import Entity
from homeassistant.util import slugify
from . import AnthropicConfigEntry
from .const import (
CONF_CHAT_MODEL,
CONF_MAX_TOKENS,
CONF_TEMPERATURE,
CONF_THINKING_BUDGET,
CONF_WEB_SEARCH,
CONF_WEB_SEARCH_CITY,
CONF_WEB_SEARCH_COUNTRY,
CONF_WEB_SEARCH_MAX_USES,
CONF_WEB_SEARCH_REGION,
CONF_WEB_SEARCH_TIMEZONE,
CONF_WEB_SEARCH_USER_LOCATION,
DOMAIN,
LOGGER,
MIN_THINKING_BUDGET,
NON_THINKING_MODELS,
RECOMMENDED_CHAT_MODEL,
RECOMMENDED_MAX_TOKENS,
RECOMMENDED_TEMPERATURE,
RECOMMENDED_THINKING_BUDGET,
)
# Max number of back and forth with the LLM to generate a response
MAX_TOOL_ITERATIONS = 10
def _format_tool(
tool: llm.Tool, custom_serializer: Callable[[Any], Any] | None
) -> ToolParam:
"""Format tool specification."""
return ToolParam(
name=tool.name,
description=tool.description or "",
input_schema=convert(tool.parameters, custom_serializer=custom_serializer),
)
@dataclass(slots=True)
class CitationDetails:
"""Citation details for a content part."""
index: int = 0
"""Start position of the text."""
length: int = 0
"""Length of the relevant data."""
citations: list[TextCitationParam] = field(default_factory=list)
"""Citations for the content part."""
@dataclass(slots=True)
class ContentDetails:
"""Native data for AssistantContent."""
citation_details: list[CitationDetails] = field(default_factory=list)
def has_content(self) -> bool:
"""Check if there is any content."""
return any(detail.length > 0 for detail in self.citation_details)
def has_citations(self) -> bool:
"""Check if there are any citations."""
return any(detail.citations for detail in self.citation_details)
def add_citation_detail(self) -> None:
"""Add a new citation detail."""
if not self.citation_details or self.citation_details[-1].length > 0:
self.citation_details.append(
CitationDetails(
index=self.citation_details[-1].index
+ self.citation_details[-1].length
if self.citation_details
else 0
)
)
def add_citation(self, citation: TextCitation) -> None:
"""Add a citation to the current detail."""
if not self.citation_details:
self.citation_details.append(CitationDetails())
citation_param: TextCitationParam | None = None
if isinstance(citation, CitationsWebSearchResultLocation):
citation_param = CitationWebSearchResultLocationParam(
type="web_search_result_location",
title=citation.title,
url=citation.url,
cited_text=citation.cited_text,
encrypted_index=citation.encrypted_index,
)
if citation_param:
self.citation_details[-1].citations.append(citation_param)
def delete_empty(self) -> None:
"""Delete empty citation details."""
self.citation_details = [
detail for detail in self.citation_details if detail.citations
]
def _convert_content(
chat_content: Iterable[conversation.Content],
) -> list[MessageParam]:
"""Transform HA chat_log content into Anthropic API format."""
messages: list[MessageParam] = []
for content in chat_content:
if isinstance(content, conversation.ToolResultContent):
if content.tool_name == "web_search":
tool_result_block: ContentBlockParam = WebSearchToolResultBlockParam(
type="web_search_tool_result",
tool_use_id=content.tool_call_id,
content=content.tool_result["content"]
if "content" in content.tool_result
else WebSearchToolRequestErrorParam(
type="web_search_tool_result_error",
error_code=content.tool_result.get("error_code", "unavailable"), # type: ignore[typeddict-item]
),
)
external_tool = True
else:
tool_result_block = ToolResultBlockParam(
type="tool_result",
tool_use_id=content.tool_call_id,
content=json.dumps(content.tool_result),
)
external_tool = False
if not messages or messages[-1]["role"] != (
"assistant" if external_tool else "user"
):
messages.append(
MessageParam(
role="assistant" if external_tool else "user",
content=[tool_result_block],
)
)
elif isinstance(messages[-1]["content"], str):
messages[-1]["content"] = [
TextBlockParam(type="text", text=messages[-1]["content"]),
tool_result_block,
]
else:
messages[-1]["content"].append(tool_result_block) # type: ignore[attr-defined]
elif isinstance(content, conversation.UserContent):
# Combine consequent user messages
if not messages or messages[-1]["role"] != "user":
messages.append(
MessageParam(
role="user",
content=content.content,
)
)
elif isinstance(messages[-1]["content"], str):
messages[-1]["content"] = [
TextBlockParam(type="text", text=messages[-1]["content"]),
TextBlockParam(type="text", text=content.content),
]
else:
messages[-1]["content"].append( # type: ignore[attr-defined]
TextBlockParam(type="text", text=content.content)
)
elif isinstance(content, conversation.AssistantContent):
# Combine consequent assistant messages
if not messages or messages[-1]["role"] != "assistant":
messages.append(
MessageParam(
role="assistant",
content=[],
)
)
if isinstance(content.native, ThinkingBlock):
messages[-1]["content"].append( # type: ignore[union-attr]
ThinkingBlockParam(
type="thinking",
thinking=content.thinking_content or "",
signature=content.native.signature,
)
)
elif isinstance(content.native, RedactedThinkingBlock):
redacted_thinking_block = RedactedThinkingBlockParam(
type="redacted_thinking",
data=content.native.data,
)
if isinstance(messages[-1]["content"], str):
messages[-1]["content"] = [
TextBlockParam(type="text", text=messages[-1]["content"]),
redacted_thinking_block,
]
else:
messages[-1]["content"].append( # type: ignore[attr-defined]
redacted_thinking_block
)
if content.content:
current_index = 0
for detail in (
content.native.citation_details
if isinstance(content.native, ContentDetails)
else [CitationDetails(length=len(content.content))]
):
if detail.index > current_index:
# Add text block for any text without citations
messages[-1]["content"].append( # type: ignore[union-attr]
TextBlockParam(
type="text",
text=content.content[current_index : detail.index],
)
)
messages[-1]["content"].append( # type: ignore[union-attr]
TextBlockParam(
type="text",
text=content.content[
detail.index : detail.index + detail.length
],
citations=detail.citations,
)
if detail.citations
else TextBlockParam(
type="text",
text=content.content[
detail.index : detail.index + detail.length
],
)
)
current_index = detail.index + detail.length
if current_index < len(content.content):
# Add text block for any remaining text without citations
messages[-1]["content"].append( # type: ignore[union-attr]
TextBlockParam(
type="text",
text=content.content[current_index:],
)
)
if content.tool_calls:
messages[-1]["content"].extend( # type: ignore[union-attr]
[
ServerToolUseBlockParam(
type="server_tool_use",
id=tool_call.id,
name="web_search",
input=tool_call.tool_args,
)
if tool_call.external and tool_call.tool_name == "web_search"
else ToolUseBlockParam(
type="tool_use",
id=tool_call.id,
name=tool_call.tool_name,
input=tool_call.tool_args,
)
for tool_call in content.tool_calls
]
)
else:
# Note: We don't pass SystemContent here as its passed to the API as the prompt
raise TypeError(f"Unexpected content type: {type(content)}")
return messages
async def _transform_stream( # noqa: C901 - This is complex, but better to have it in one place
chat_log: conversation.ChatLog,
stream: AsyncStream[MessageStreamEvent],
output_tool: str | None = None,
) -> AsyncGenerator[
conversation.AssistantContentDeltaDict | conversation.ToolResultContentDeltaDict
]:
"""Transform the response stream into HA format.
A typical stream of responses might look something like the following:
- RawMessageStartEvent with no content
- RawContentBlockStartEvent with an empty ThinkingBlock (if extended thinking is enabled)
- RawContentBlockDeltaEvent with a ThinkingDelta
- RawContentBlockDeltaEvent with a ThinkingDelta
- RawContentBlockDeltaEvent with a ThinkingDelta
- ...
- RawContentBlockDeltaEvent with a SignatureDelta
- RawContentBlockStopEvent
- RawContentBlockStartEvent with a RedactedThinkingBlock (occasionally)
- RawContentBlockStopEvent (RedactedThinkingBlock does not have a delta)
- RawContentBlockStartEvent with an empty TextBlock
- RawContentBlockDeltaEvent with a TextDelta
- RawContentBlockDeltaEvent with a TextDelta
- RawContentBlockDeltaEvent with a TextDelta
- ...
- RawContentBlockStopEvent
- RawContentBlockStartEvent with ToolUseBlock specifying the function name
- RawContentBlockDeltaEvent with a InputJSONDelta
- RawContentBlockDeltaEvent with a InputJSONDelta
- ...
- RawContentBlockStopEvent
- RawMessageDeltaEvent with a stop_reason='tool_use'
- RawMessageStopEvent(type='message_stop')
Each message could contain multiple blocks of the same type.
"""
if stream is None:
raise TypeError("Expected a stream of messages")
current_tool_block: ToolUseBlockParam | ServerToolUseBlockParam | None = None
current_tool_args: str
content_details = ContentDetails()
content_details.add_citation_detail()
input_usage: Usage | None = None
has_native = False
first_block: bool
async for response in stream:
LOGGER.debug("Received response: %s", response)
if isinstance(response, RawMessageStartEvent):
if response.message.role != "assistant":
raise ValueError("Unexpected message role")
input_usage = response.message.usage
first_block = True
elif isinstance(response, RawContentBlockStartEvent):
if isinstance(response.content_block, ToolUseBlock):
current_tool_block = ToolUseBlockParam(
type="tool_use",
id=response.content_block.id,
name=response.content_block.name,
input="",
)
current_tool_args = ""
if response.content_block.name == output_tool:
if first_block or content_details.has_content():
if content_details.has_citations():
content_details.delete_empty()
yield {"native": content_details}
content_details = ContentDetails()
content_details.add_citation_detail()
yield {"role": "assistant"}
has_native = False
first_block = False
elif isinstance(response.content_block, TextBlock):
if ( # Do not start a new assistant content just for citations, concatenate consecutive blocks with citations instead.
first_block
or (
not content_details.has_citations()
and response.content_block.citations is None
and content_details.has_content()
)
):
if content_details.has_citations():
content_details.delete_empty()
yield {"native": content_details}
content_details = ContentDetails()
yield {"role": "assistant"}
has_native = False
first_block = False
content_details.add_citation_detail()
if response.content_block.text:
content_details.citation_details[-1].length += len(
response.content_block.text
)
yield {"content": response.content_block.text}
elif isinstance(response.content_block, ThinkingBlock):
if first_block or has_native:
if content_details.has_citations():
content_details.delete_empty()
yield {"native": content_details}
content_details = ContentDetails()
content_details.add_citation_detail()
yield {"role": "assistant"}
has_native = False
first_block = False
elif isinstance(response.content_block, RedactedThinkingBlock):
LOGGER.debug(
"Some of Claudes internal reasoning has been automatically "
"encrypted for safety reasons. This doesnt affect the quality of "
"responses"
)
if has_native:
if content_details.has_citations():
content_details.delete_empty()
yield {"native": content_details}
content_details = ContentDetails()
content_details.add_citation_detail()
yield {"role": "assistant"}
has_native = False
first_block = False
yield {"native": response.content_block}
has_native = True
elif isinstance(response.content_block, ServerToolUseBlock):
current_tool_block = ServerToolUseBlockParam(
type="server_tool_use",
id=response.content_block.id,
name=response.content_block.name,
input="",
)
current_tool_args = ""
elif isinstance(response.content_block, WebSearchToolResultBlock):
if content_details.has_citations():
content_details.delete_empty()
yield {"native": content_details}
content_details = ContentDetails()
content_details.add_citation_detail()
yield {
"role": "tool_result",
"tool_call_id": response.content_block.tool_use_id,
"tool_name": "web_search",
"tool_result": {
"type": "web_search_tool_result_error",
"error_code": response.content_block.content.error_code,
}
if isinstance(
response.content_block.content, WebSearchToolResultError
)
else {
"content": [
{
"type": "web_search_result",
"encrypted_content": block.encrypted_content,
"page_age": block.page_age,
"title": block.title,
"url": block.url,
}
for block in response.content_block.content
]
},
}
first_block = True
elif isinstance(response, RawContentBlockDeltaEvent):
if isinstance(response.delta, InputJSONDelta):
if (
current_tool_block is not None
and current_tool_block["name"] == output_tool
):
content_details.citation_details[-1].length += len(
response.delta.partial_json
)
yield {"content": response.delta.partial_json}
else:
current_tool_args += response.delta.partial_json
elif isinstance(response.delta, TextDelta):
content_details.citation_details[-1].length += len(response.delta.text)
yield {"content": response.delta.text}
elif isinstance(response.delta, ThinkingDelta):
yield {"thinking_content": response.delta.thinking}
elif isinstance(response.delta, SignatureDelta):
yield {
"native": ThinkingBlock(
type="thinking",
thinking="",
signature=response.delta.signature,
)
}
has_native = True
elif isinstance(response.delta, CitationsDelta):
content_details.add_citation(response.delta.citation)
elif isinstance(response, RawContentBlockStopEvent):
if current_tool_block is not None:
if current_tool_block["name"] == output_tool:
current_tool_block = None
continue
tool_args = json.loads(current_tool_args) if current_tool_args else {}
current_tool_block["input"] = tool_args
yield {
"tool_calls": [
llm.ToolInput(
id=current_tool_block["id"],
tool_name=current_tool_block["name"],
tool_args=tool_args,
external=current_tool_block["type"] == "server_tool_use",
)
]
}
current_tool_block = None
elif isinstance(response, RawMessageDeltaEvent):
if (usage := response.usage) is not None:
chat_log.async_trace(_create_token_stats(input_usage, usage))
if response.delta.stop_reason == "refusal":
raise HomeAssistantError("Potential policy violation detected")
elif isinstance(response, RawMessageStopEvent):
if content_details.has_citations():
content_details.delete_empty()
yield {"native": content_details}
content_details = ContentDetails()
content_details.add_citation_detail()
def _create_token_stats(
input_usage: Usage | None, response_usage: MessageDeltaUsage
) -> dict[str, Any]:
"""Create token stats for conversation agent tracing."""
input_tokens = 0
cached_input_tokens = 0
if input_usage:
input_tokens = input_usage.input_tokens
cached_input_tokens = input_usage.cache_creation_input_tokens or 0
output_tokens = response_usage.output_tokens
return {
"stats": {
"input_tokens": input_tokens,
"cached_input_tokens": cached_input_tokens,
"output_tokens": output_tokens,
}
}
class AnthropicBaseLLMEntity(Entity):
"""Anthropic base LLM entity."""
_attr_has_entity_name = True
_attr_name = None
def __init__(self, entry: AnthropicConfigEntry, subentry: ConfigSubentry) -> None:
"""Initialize the entity."""
self.entry = entry
self.subentry = subentry
self._attr_unique_id = subentry.subentry_id
self._attr_device_info = dr.DeviceInfo(
identifiers={(DOMAIN, subentry.subentry_id)},
name=subentry.title,
manufacturer="Anthropic",
model="Claude",
entry_type=dr.DeviceEntryType.SERVICE,
)
async def _async_handle_chat_log(
self,
chat_log: conversation.ChatLog,
structure_name: str | None = None,
structure: vol.Schema | None = None,
) -> None:
"""Generate an answer for the chat log."""
options = self.subentry.data
system = chat_log.content[0]
if not isinstance(system, conversation.SystemContent):
raise TypeError("First message must be a system message")
messages = _convert_content(chat_log.content[1:])
model = options.get(CONF_CHAT_MODEL, RECOMMENDED_CHAT_MODEL)
model_args = MessageCreateParamsStreaming(
model=model,
messages=messages,
max_tokens=options.get(CONF_MAX_TOKENS, RECOMMENDED_MAX_TOKENS),
system=system.content,
stream=True,
)
thinking_budget = options.get(CONF_THINKING_BUDGET, RECOMMENDED_THINKING_BUDGET)
if (
not model.startswith(tuple(NON_THINKING_MODELS))
and thinking_budget >= MIN_THINKING_BUDGET
):
model_args["thinking"] = ThinkingConfigEnabledParam(
type="enabled", budget_tokens=thinking_budget
)
else:
model_args["thinking"] = ThinkingConfigDisabledParam(type="disabled")
model_args["temperature"] = options.get(
CONF_TEMPERATURE, RECOMMENDED_TEMPERATURE
)
tools: list[ToolUnionParam] = []
if chat_log.llm_api:
tools = [
_format_tool(tool, chat_log.llm_api.custom_serializer)
for tool in chat_log.llm_api.tools
]
if options.get(CONF_WEB_SEARCH):
web_search = WebSearchTool20250305Param(
name="web_search",
type="web_search_20250305",
max_uses=options.get(CONF_WEB_SEARCH_MAX_USES),
)
if options.get(CONF_WEB_SEARCH_USER_LOCATION):
web_search["user_location"] = {
"type": "approximate",
"city": options.get(CONF_WEB_SEARCH_CITY, ""),
"region": options.get(CONF_WEB_SEARCH_REGION, ""),
"country": options.get(CONF_WEB_SEARCH_COUNTRY, ""),
"timezone": options.get(CONF_WEB_SEARCH_TIMEZONE, ""),
}
tools.append(web_search)
# Handle attachments by adding them to the last user message
last_content = chat_log.content[-1]
if last_content.role == "user" and last_content.attachments:
last_message = messages[-1]
if last_message["role"] != "user":
raise HomeAssistantError(
"Last message must be a user message to add attachments"
)
if isinstance(last_message["content"], str):
last_message["content"] = [
TextBlockParam(type="text", text=last_message["content"])
]
last_message["content"].extend( # type: ignore[union-attr]
await async_prepare_files_for_prompt(
self.hass, [(a.path, a.mime_type) for a in last_content.attachments]
)
)
if structure and structure_name:
structure_name = slugify(structure_name)
if model_args["thinking"]["type"] == "disabled":
if not tools:
# Simplest case: no tools and no extended thinking
# Add a tool and force its use
model_args["tool_choice"] = ToolChoiceToolParam(
type="tool",
name=structure_name,
)
else:
# Second case: tools present but no extended thinking
# Allow the model to use any tool but not text response
# The model should know to use the right tool by its description
model_args["tool_choice"] = ToolChoiceAnyParam(
type="any",
)
else:
# Extended thinking is enabled. With extended thinking, we cannot
# force tool use or disable text responses, so we add a hint to the
# system prompt instead. With extended thinking, the model should be
# smart enough to use the tool.
model_args["tool_choice"] = ToolChoiceAutoParam(
type="auto",
)
if isinstance(model_args["system"], str):
model_args["system"] = [
TextBlockParam(type="text", text=model_args["system"])
]
model_args["system"].append( # type: ignore[union-attr]
TextBlockParam(
type="text",
text=f"Claude MUST use the '{structure_name}' tool to provide the final answer instead of plain text.",
)
)
tools.append(
ToolParam(
name=structure_name,
description="Use this tool to reply to the user",
input_schema=convert(
structure,
custom_serializer=chat_log.llm_api.custom_serializer
if chat_log.llm_api
else llm.selector_serializer,
),
)
)
if tools:
model_args["tools"] = tools
client = self.entry.runtime_data
# To prevent infinite loops, we limit the number of iterations
for _iteration in range(MAX_TOOL_ITERATIONS):
try:
stream = await client.messages.create(**model_args)
messages.extend(
_convert_content(
[
content
async for content in chat_log.async_add_delta_content_stream(
self.entity_id,
_transform_stream(
chat_log,
stream,
output_tool=structure_name if structure else None,
),
)
]
)
)
except anthropic.AnthropicError as err:
raise HomeAssistantError(
f"Sorry, I had a problem talking to Anthropic: {err}"
) from err
if not chat_log.unresponded_tool_results:
break
async def async_prepare_files_for_prompt(
hass: HomeAssistant, files: list[tuple[Path, str | None]]
) -> Iterable[ImageBlockParam | DocumentBlockParam]:
"""Append files to a prompt.
Caller needs to ensure that the files are allowed.
"""
def append_files_to_content() -> Iterable[ImageBlockParam | DocumentBlockParam]:
content: list[ImageBlockParam | DocumentBlockParam] = []
for file_path, mime_type in files:
if not file_path.exists():
raise HomeAssistantError(f"`{file_path}` does not exist")
if mime_type is None:
mime_type = guess_file_type(file_path)[0]
if not mime_type or not mime_type.startswith(("image/", "application/pdf")):
raise HomeAssistantError(
"Only images and PDF are supported by the Anthropic API,"
f"`{file_path}` is not an image file or PDF"
)
if mime_type == "image/jpg":
mime_type = "image/jpeg"
base64_file = base64.b64encode(file_path.read_bytes()).decode("utf-8")
if mime_type.startswith("image/"):
content.append(
ImageBlockParam(
type="image",
source=Base64ImageSourceParam(
type="base64",
media_type=mime_type, # type: ignore[typeddict-item]
data=base64_file,
),
)
)
elif mime_type.startswith("application/pdf"):
content.append(
DocumentBlockParam(
type="document",
source=Base64PDFSourceParam(
type="base64",
media_type=mime_type, # type: ignore[typeddict-item]
data=base64_file,
),
)
)
return content
return await hass.async_add_executor_job(append_files_to_content)