From 85d2e3d006be81b29adf1ac03c0c156cc395cc49 Mon Sep 17 00:00:00 2001 From: John Karabudak Date: Sun, 30 Mar 2025 18:00:40 -0230 Subject: [PATCH] Fix LLM to speed up prefill (#141156) * fix: two minor LLM changes to speed up prefill - moved the current date/time to the end of the prompt - started sorting all entities by last_changed * addressed PR comments * fixed tests * reduced scope of try/catch in LLM prompt * addressed more PR comments * fixed Anthropic test * addressed another PR comment * fixed remainder of tests --- .../components/conversation/chat_log.py | 73 ++++++++++++------- homeassistant/helpers/llm.py | 3 +- .../snapshots/test_conversation.ambr | 2 +- .../snapshots/test_conversation.ambr | 6 +- tests/helpers/test_llm.py | 54 +++++++------- 5 files changed, 79 insertions(+), 59 deletions(-) diff --git a/homeassistant/components/conversation/chat_log.py b/homeassistant/components/conversation/chat_log.py index cb7b8dd22f7..9ffcc7fc0d5 100644 --- a/homeassistant/components/conversation/chat_log.py +++ b/homeassistant/components/conversation/chat_log.py @@ -354,6 +354,35 @@ class ChatLog: if self.delta_listener: self.delta_listener(self, asdict(tool_result)) + async def _async_expand_prompt_template( + self, + llm_context: llm.LLMContext, + prompt: str, + language: str, + user_name: str | None = None, + ) -> str: + try: + return template.Template(prompt, self.hass).async_render( + { + "ha_name": self.hass.config.location_name, + "user_name": user_name, + "llm_context": llm_context, + }, + parse_result=False, + ) + except TemplateError as err: + LOGGER.error("Error rendering prompt: %s", err) + intent_response = intent.IntentResponse(language=language) + intent_response.async_set_error( + intent.IntentResponseErrorCode.UNKNOWN, + "Sorry, I had a problem with my template", + ) + raise ConverseError( + "Error rendering prompt", + conversation_id=self.conversation_id, + response=intent_response, + ) from err + async def async_update_llm_data( self, conversing_domain: str, @@ -409,38 +438,28 @@ class ChatLog: ): user_name = user.name - try: - prompt_parts = [ - template.Template( - llm.BASE_PROMPT - + (user_llm_prompt or llm.DEFAULT_INSTRUCTIONS_PROMPT), - self.hass, - ).async_render( - { - "ha_name": self.hass.config.location_name, - "user_name": user_name, - "llm_context": llm_context, - }, - parse_result=False, - ) - ] - - except TemplateError as err: - LOGGER.error("Error rendering prompt: %s", err) - intent_response = intent.IntentResponse(language=user_input.language) - intent_response.async_set_error( - intent.IntentResponseErrorCode.UNKNOWN, - "Sorry, I had a problem with my template", + prompt_parts = [] + prompt_parts.append( + await self._async_expand_prompt_template( + llm_context, + (user_llm_prompt or llm.DEFAULT_INSTRUCTIONS_PROMPT), + user_input.language, + user_name, ) - raise ConverseError( - "Error rendering prompt", - conversation_id=self.conversation_id, - response=intent_response, - ) from err + ) if llm_api: prompt_parts.append(llm_api.api_prompt) + prompt_parts.append( + await self._async_expand_prompt_template( + llm_context, + llm.BASE_PROMPT, + user_input.language, + user_name, + ) + ) + if extra_system_prompt := ( # Take new system prompt if one was given user_input.extra_system_prompt or self.extra_system_prompt diff --git a/homeassistant/helpers/llm.py b/homeassistant/helpers/llm.py index 7f6fe22ec70..aa6b3dc2cbf 100644 --- a/homeassistant/helpers/llm.py +++ b/homeassistant/helpers/llm.py @@ -9,6 +9,7 @@ from datetime import timedelta from decimal import Decimal from enum import Enum from functools import cache, partial +from operator import attrgetter from typing import Any, cast import slugify as unicode_slug @@ -496,7 +497,7 @@ def _get_exposed_entities( CALENDAR_DOMAIN: {}, } - for state in hass.states.async_all(): + for state in sorted(hass.states.async_all(), key=attrgetter("name")): if not async_should_expose(hass, assistant, state.entity_id): continue diff --git a/tests/components/anthropic/snapshots/test_conversation.ambr b/tests/components/anthropic/snapshots/test_conversation.ambr index c0ed986f002..ea4ce5a980d 100644 --- a/tests/components/anthropic/snapshots/test_conversation.ambr +++ b/tests/components/anthropic/snapshots/test_conversation.ambr @@ -3,11 +3,11 @@ list([ dict({ 'content': ''' - Current time is 16:00:00. Today's date is 2024-06-03. You are a voice assistant for Home Assistant. Answer questions about the world truthfully. Answer in plain text. Keep it simple and to the point. Only if the user wants to control a device, tell them to expose entities to their voice assistant in Home Assistant. + Current time is 16:00:00. Today's date is 2024-06-03. ''', 'role': 'system', }), diff --git a/tests/components/google_generative_ai_conversation/snapshots/test_conversation.ambr b/tests/components/google_generative_ai_conversation/snapshots/test_conversation.ambr index ec98bdd6529..2376bf51cdc 100644 --- a/tests/components/google_generative_ai_conversation/snapshots/test_conversation.ambr +++ b/tests/components/google_generative_ai_conversation/snapshots/test_conversation.ambr @@ -6,7 +6,7 @@ tuple( ), dict({ - 'config': GenerateContentConfig(http_options=None, system_instruction="Current time is 05:00:00. Today's date is 2024-05-24.\nYou are a voice assistant for Home Assistant.\nAnswer questions about the world truthfully.\nAnswer in plain text. Keep it simple and to the point.\nOnly if the user wants to control a device, tell them to expose entities to their voice assistant in Home Assistant.", temperature=1.0, top_p=0.95, top_k=64.0, candidate_count=None, max_output_tokens=150, stop_sequences=None, response_logprobs=None, logprobs=None, presence_penalty=None, frequency_penalty=None, seed=None, response_mime_type=None, response_schema=None, routing_config=None, safety_settings=[SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=)], tools=[Tool(function_declarations=[FunctionDeclaration(response=None, description='Test function', name='test_tool', parameters=Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties={'param1': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description='Test parameters', enum=None, format=None, items=Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=), max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=), 'param2': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=None), 'param3': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties={'json': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=)}, property_ordering=None, required=[], type=)}, property_ordering=None, required=[], type=))], retrieval=None, google_search=None, google_search_retrieval=None, code_execution=None)], tool_config=None, labels=None, cached_content=None, response_modalities=None, media_resolution=None, speech_config=None, audio_timestamp=None, automatic_function_calling=AutomaticFunctionCallingConfig(disable=True, maximum_remote_calls=None, ignore_call_history=None), thinking_config=None), + 'config': GenerateContentConfig(http_options=None, system_instruction="You are a voice assistant for Home Assistant.\nAnswer questions about the world truthfully.\nAnswer in plain text. Keep it simple and to the point.\nOnly if the user wants to control a device, tell them to expose entities to their voice assistant in Home Assistant.\nCurrent time is 05:00:00. Today's date is 2024-05-24.", temperature=1.0, top_p=0.95, top_k=64.0, candidate_count=None, max_output_tokens=150, stop_sequences=None, response_logprobs=None, logprobs=None, presence_penalty=None, frequency_penalty=None, seed=None, response_mime_type=None, response_schema=None, routing_config=None, safety_settings=[SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=)], tools=[Tool(function_declarations=[FunctionDeclaration(response=None, description='Test function', name='test_tool', parameters=Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties={'param1': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description='Test parameters', enum=None, format=None, items=Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=), max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=), 'param2': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=None), 'param3': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties={'json': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=)}, property_ordering=None, required=[], type=)}, property_ordering=None, required=[], type=))], retrieval=None, google_search=None, google_search_retrieval=None, code_execution=None)], tool_config=None, labels=None, cached_content=None, response_modalities=None, media_resolution=None, speech_config=None, audio_timestamp=None, automatic_function_calling=AutomaticFunctionCallingConfig(disable=True, maximum_remote_calls=None, ignore_call_history=None), thinking_config=None), 'history': list([ ]), 'model': 'models/gemini-2.0-flash', @@ -39,7 +39,7 @@ tuple( ), dict({ - 'config': GenerateContentConfig(http_options=None, system_instruction="Current time is 05:00:00. Today's date is 2024-05-24.\nYou are a voice assistant for Home Assistant.\nAnswer questions about the world truthfully.\nAnswer in plain text. Keep it simple and to the point.\nOnly if the user wants to control a device, tell them to expose entities to their voice assistant in Home Assistant.", temperature=1.0, top_p=0.95, top_k=64.0, candidate_count=None, max_output_tokens=150, stop_sequences=None, response_logprobs=None, logprobs=None, presence_penalty=None, frequency_penalty=None, seed=None, response_mime_type=None, response_schema=None, routing_config=None, safety_settings=[SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=)], tools=[Tool(function_declarations=[FunctionDeclaration(response=None, description='Test function', name='test_tool', parameters=None)], retrieval=None, google_search=None, google_search_retrieval=None, code_execution=None)], tool_config=None, labels=None, cached_content=None, response_modalities=None, media_resolution=None, speech_config=None, audio_timestamp=None, automatic_function_calling=AutomaticFunctionCallingConfig(disable=True, maximum_remote_calls=None, ignore_call_history=None), thinking_config=None), + 'config': GenerateContentConfig(http_options=None, system_instruction="You are a voice assistant for Home Assistant.\nAnswer questions about the world truthfully.\nAnswer in plain text. Keep it simple and to the point.\nOnly if the user wants to control a device, tell them to expose entities to their voice assistant in Home Assistant.\nCurrent time is 05:00:00. Today's date is 2024-05-24.", temperature=1.0, top_p=0.95, top_k=64.0, candidate_count=None, max_output_tokens=150, stop_sequences=None, response_logprobs=None, logprobs=None, presence_penalty=None, frequency_penalty=None, seed=None, response_mime_type=None, response_schema=None, routing_config=None, safety_settings=[SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=)], tools=[Tool(function_declarations=[FunctionDeclaration(response=None, description='Test function', name='test_tool', parameters=None)], retrieval=None, google_search=None, google_search_retrieval=None, code_execution=None)], tool_config=None, labels=None, cached_content=None, response_modalities=None, media_resolution=None, speech_config=None, audio_timestamp=None, automatic_function_calling=AutomaticFunctionCallingConfig(disable=True, maximum_remote_calls=None, ignore_call_history=None), thinking_config=None), 'history': list([ ]), 'model': 'models/gemini-2.0-flash', @@ -72,7 +72,7 @@ tuple( ), dict({ - 'config': GenerateContentConfig(http_options=None, system_instruction="Current time is 05:00:00. Today's date is 2024-05-24.\nYou are a voice assistant for Home Assistant.\nAnswer questions about the world truthfully.\nAnswer in plain text. Keep it simple and to the point.\nOnly if the user wants to control a device, tell them to expose entities to their voice assistant in Home Assistant.", temperature=1.0, top_p=0.95, top_k=64.0, candidate_count=None, max_output_tokens=150, stop_sequences=None, response_logprobs=None, logprobs=None, presence_penalty=None, frequency_penalty=None, seed=None, response_mime_type=None, response_schema=None, routing_config=None, safety_settings=[SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=)], tools=[Tool(function_declarations=[FunctionDeclaration(response=None, description='Test function', name='test_tool', parameters=Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties={'param1': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description='Test parameters', enum=None, format=None, items=Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=), max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=), 'param2': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=None), 'param3': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties={'json': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=)}, property_ordering=None, required=[], type=)}, property_ordering=None, required=[], type=))], retrieval=None, google_search=None, google_search_retrieval=None, code_execution=None), Tool(function_declarations=None, retrieval=None, google_search=GoogleSearch(), google_search_retrieval=None, code_execution=None)], tool_config=None, labels=None, cached_content=None, response_modalities=None, media_resolution=None, speech_config=None, audio_timestamp=None, automatic_function_calling=AutomaticFunctionCallingConfig(disable=True, maximum_remote_calls=None, ignore_call_history=None), thinking_config=None), + 'config': GenerateContentConfig(http_options=None, system_instruction="You are a voice assistant for Home Assistant.\nAnswer questions about the world truthfully.\nAnswer in plain text. Keep it simple and to the point.\nOnly if the user wants to control a device, tell them to expose entities to their voice assistant in Home Assistant.\nCurrent time is 05:00:00. Today's date is 2024-05-24.", temperature=1.0, top_p=0.95, top_k=64.0, candidate_count=None, max_output_tokens=150, stop_sequences=None, response_logprobs=None, logprobs=None, presence_penalty=None, frequency_penalty=None, seed=None, response_mime_type=None, response_schema=None, routing_config=None, safety_settings=[SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=), SafetySetting(method=None, category=, threshold=)], tools=[Tool(function_declarations=[FunctionDeclaration(response=None, description='Test function', name='test_tool', parameters=Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties={'param1': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description='Test parameters', enum=None, format=None, items=Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=), max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=), 'param2': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=None), 'param3': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties={'json': Schema(example=None, pattern=None, default=None, max_length=None, title=None, min_length=None, min_properties=None, max_properties=None, any_of=None, description=None, enum=None, format=None, items=None, max_items=None, maximum=None, min_items=None, minimum=None, nullable=None, properties=None, property_ordering=None, required=None, type=)}, property_ordering=None, required=[], type=)}, property_ordering=None, required=[], type=))], retrieval=None, google_search=None, google_search_retrieval=None, code_execution=None), Tool(function_declarations=None, retrieval=None, google_search=GoogleSearch(), google_search_retrieval=None, code_execution=None)], tool_config=None, labels=None, cached_content=None, response_modalities=None, media_resolution=None, speech_config=None, audio_timestamp=None, automatic_function_calling=AutomaticFunctionCallingConfig(disable=True, maximum_remote_calls=None, ignore_call_history=None), thinking_config=None), 'history': list([ ]), 'model': 'models/gemini-2.0-flash', diff --git a/tests/helpers/test_llm.py b/tests/helpers/test_llm.py index 19ada407550..26c357c4b0a 100644 --- a/tests/helpers/test_llm.py +++ b/tests/helpers/test_llm.py @@ -576,6 +576,10 @@ async def test_assist_api_prompt( ) ) exposed_entities_prompt = """An overview of the areas and the devices in this smart home: +- names: '1' + domain: light + state: unavailable + areas: Test Area 2 - names: Kitchen domain: light state: 'on' @@ -590,18 +594,6 @@ async def test_assist_api_prompt( domain: light state: unavailable areas: Test Area, Alternative name -- names: Test Service - domain: light - state: unavailable - areas: Test Area, Alternative name -- names: Test Service - domain: light - state: unavailable - areas: Test Area, Alternative name -- names: Test Service - domain: light - state: unavailable - areas: Test Area, Alternative name - names: Test Device 2 domain: light state: unavailable @@ -614,16 +606,27 @@ async def test_assist_api_prompt( domain: light state: unavailable areas: Test Area 2 -- names: Unnamed Device +- names: Test Service domain: light state: unavailable - areas: Test Area 2 -- names: '1' + areas: Test Area, Alternative name +- names: Test Service + domain: light + state: unavailable + areas: Test Area, Alternative name +- names: Test Service + domain: light + state: unavailable + areas: Test Area, Alternative name +- names: Unnamed Device domain: light state: unavailable areas: Test Area 2 """ stateless_exposed_entities_prompt = """An overview of the areas and the devices in this smart home: +- names: '1' + domain: light + areas: Test Area 2 - names: Kitchen domain: light - names: Living Room @@ -632,15 +635,6 @@ async def test_assist_api_prompt( - names: Test Device, my test light domain: light areas: Test Area, Alternative name -- names: Test Service - domain: light - areas: Test Area, Alternative name -- names: Test Service - domain: light - areas: Test Area, Alternative name -- names: Test Service - domain: light - areas: Test Area, Alternative name - names: Test Device 2 domain: light areas: Test Area 2 @@ -650,10 +644,16 @@ async def test_assist_api_prompt( - names: Test Device 4 domain: light areas: Test Area 2 -- names: Unnamed Device +- names: Test Service domain: light - areas: Test Area 2 -- names: '1' + areas: Test Area, Alternative name +- names: Test Service + domain: light + areas: Test Area, Alternative name +- names: Test Service + domain: light + areas: Test Area, Alternative name +- names: Unnamed Device domain: light areas: Test Area 2 """