From bb9f5342598c8b1afdc6a08ebc40f17c780aa7da Mon Sep 17 00:00:00 2001
From: Artur Pragacz <49985303+arturpragacz@users.noreply.github.com>
Date: Tue, 15 Oct 2024 16:14:31 +0200
Subject: [PATCH] Improve intent recognition in default conversation agent
 (#124282)

Use the same logic for custom sentences.
Prefer higher quality (longer) names.
---
 .../components/conversation/default_agent.py  | 77 +++++++++++--------
 .../conversation/test_default_agent.py        | 39 ++++++++--
 2 files changed, 76 insertions(+), 40 deletions(-)

diff --git a/homeassistant/components/conversation/default_agent.py b/homeassistant/components/conversation/default_agent.py
index b607ac1d41f..6b5cef89fd6 100644
--- a/homeassistant/components/conversation/default_agent.py
+++ b/homeassistant/components/conversation/default_agent.py
@@ -16,6 +16,7 @@ from hassil.expression import Expression, ListReference, Sequence
 from hassil.intents import Intents, SlotList, TextSlotList, WildcardSlotList
 from hassil.recognize import (
     MISSING_ENTITY,
+    MatchEntity,
     RecognizeResult,
     UnmatchedTextEntity,
     recognize_all,
@@ -561,9 +562,10 @@ class DefaultAgent(ConversationEntity):
         language: str,
     ) -> RecognizeResult | None:
         """Search intents for a strict match to user input."""
-        custom_result: RecognizeResult | None = None
-        name_result: RecognizeResult | None = None
+        custom_found = False
+        name_found = False
         best_results: list[RecognizeResult] = []
+        best_name_quality: int | None = None
         best_text_chunks_matched: int | None = None
         for result in recognize_all(
             user_input.text,
@@ -572,37 +574,52 @@ class DefaultAgent(ConversationEntity):
             intent_context=intent_context,
             language=language,
         ):
-            # User intents have highest priority
-            if (result.intent_metadata is not None) and result.intent_metadata.get(
-                METADATA_CUSTOM_SENTENCE
-            ):
-                if (custom_result is None) or (
-                    result.text_chunks_matched > custom_result.text_chunks_matched
-                ):
-                    custom_result = result
+            # Prioritize user intents
+            is_custom = (
+                result.intent_metadata is not None
+                and result.intent_metadata.get(METADATA_CUSTOM_SENTENCE)
+            )
 
-                # Clear builtin results
-                best_results = []
-                name_result = None
+            if custom_found and not is_custom:
                 continue
 
-            # Prioritize results with a "name" slot, but still prefer ones with
-            # more literal text matched.
-            if (
-                ("name" in result.entities)
-                and (not result.entities["name"].is_wildcard)
-                and (
-                    (name_result is None)
-                    or (result.text_chunks_matched > name_result.text_chunks_matched)
-                )
-            ):
-                name_result = result
+            if not custom_found and is_custom:
+                custom_found = True
+                # Clear builtin results
+                name_found = False
+                best_results = []
+                best_name_quality = None
+                best_text_chunks_matched = None
 
+            # Prioritize results with a "name" slot
+            name = result.entities.get("name")
+            is_name = name and not name.is_wildcard
+
+            if name_found and not is_name:
+                continue
+
+            if not name_found and is_name:
+                name_found = True
+                # Clear non-name results
+                best_results = []
+                best_text_chunks_matched = None
+
+            if is_name:
+                # Prioritize results with a better "name" slot
+                name_quality = len(cast(MatchEntity, name).value.split())
+                if (best_name_quality is None) or (name_quality > best_name_quality):
+                    best_name_quality = name_quality
+                    # Clear worse name results
+                    best_results = []
+                    best_text_chunks_matched = None
+                elif name_quality < best_name_quality:
+                    continue
+
+            # Prioritize results with more literal text
+            # This causes wildcards to match last.
             if (best_text_chunks_matched is None) or (
                 result.text_chunks_matched > best_text_chunks_matched
             ):
-                # Only overwrite if more literal text was matched.
-                # This causes wildcards to match last.
                 best_results = [result]
                 best_text_chunks_matched = result.text_chunks_matched
             elif result.text_chunks_matched == best_text_chunks_matched:
@@ -610,14 +627,6 @@ class DefaultAgent(ConversationEntity):
                 # We will resolve the ambiguity below.
                 best_results.append(result)
 
-        if custom_result is not None:
-            # Prioritize user intents
-            return custom_result
-
-        if name_result is not None:
-            # Prioritize matches with entity names above area names
-            return name_result
-
         if best_results:
             # Successful strict match
             return best_results[0]
diff --git a/tests/components/conversation/test_default_agent.py b/tests/components/conversation/test_default_agent.py
index 8eef4215fd3..9c62f3b8345 100644
--- a/tests/components/conversation/test_default_agent.py
+++ b/tests/components/conversation/test_default_agent.py
@@ -14,6 +14,7 @@ import yaml
 from homeassistant.components import conversation, cover, media_player
 from homeassistant.components.conversation import default_agent
 from homeassistant.components.conversation.const import DATA_DEFAULT_ENTITY
+from homeassistant.components.conversation.default_agent import METADATA_CUSTOM_SENTENCE
 from homeassistant.components.conversation.models import ConversationInput
 from homeassistant.components.cover import SERVICE_OPEN_COVER
 from homeassistant.components.homeassistant.exposed_entities import (
@@ -2551,13 +2552,15 @@ async def test_light_area_same_name(
     device_registry.async_update_device(device.id, area_id=kitchen_area.id)
 
     kitchen_light = entity_registry.async_get_or_create(
-        "light", "demo", "1234", original_name="kitchen light"
+        "light", "demo", "1234", original_name="light in the kitchen"
     )
     entity_registry.async_update_entity(
         kitchen_light.entity_id, area_id=kitchen_area.id
     )
     hass.states.async_set(
-        kitchen_light.entity_id, "off", attributes={ATTR_FRIENDLY_NAME: "kitchen light"}
+        kitchen_light.entity_id,
+        "off",
+        attributes={ATTR_FRIENDLY_NAME: "light in the kitchen"},
     )
 
     ceiling_light = entity_registry.async_get_or_create(
@@ -2570,12 +2573,19 @@ async def test_light_area_same_name(
         ceiling_light.entity_id, "off", attributes={ATTR_FRIENDLY_NAME: "ceiling light"}
     )
 
+    bathroom_light = entity_registry.async_get_or_create(
+        "light", "demo", "9012", original_name="light"
+    )
+    hass.states.async_set(
+        bathroom_light.entity_id, "off", attributes={ATTR_FRIENDLY_NAME: "light"}
+    )
+
     calls = async_mock_service(hass, LIGHT_DOMAIN, "turn_on")
 
     await hass.services.async_call(
         "conversation",
         "process",
-        {conversation.ATTR_TEXT: "turn on kitchen light"},
+        {conversation.ATTR_TEXT: "turn on light in the kitchen"},
     )
     await hass.async_block_till_done()
 
@@ -2592,7 +2602,10 @@ async def test_custom_sentences_priority(
     hass_admin_user: MockUser,
     snapshot: SnapshotAssertion,
 ) -> None:
-    """Test that user intents from custom_sentences have priority over builtin intents/sentences."""
+    """Test that user intents from custom_sentences have priority over builtin intents/sentences.
+
+    Also test that they follow proper selection logic.
+    """
     with tempfile.NamedTemporaryFile(
         mode="w+",
         encoding="utf-8",
@@ -2605,7 +2618,11 @@ async def test_custom_sentences_priority(
             {
                 "language": "en",
                 "intents": {
-                    "CustomIntent": {"data": [{"sentences": ["turn on the lamp"]}]}
+                    "CustomIntent": {"data": [{"sentences": ["turn on <name>"]}]},
+                    "WorseCustomIntent": {
+                        "data": [{"sentences": ["turn on the lamp"]}]
+                    },
+                    "FakeCustomIntent": {"data": [{"sentences": ["turn on <name>"]}]},
                 },
             },
             custom_sentences_file,
@@ -2622,11 +2639,21 @@ async def test_custom_sentences_priority(
             "intent_script",
             {
                 "intent_script": {
-                    "CustomIntent": {"speech": {"text": "custom response"}}
+                    "CustomIntent": {"speech": {"text": "custom response"}},
+                    "WorseCustomIntent": {"speech": {"text": "worse custom response"}},
+                    "FakeCustomIntent": {"speech": {"text": "fake custom response"}},
                 }
             },
         )
 
+        # Fake intent not being custom
+        intents = (
+            await conversation.async_get_agent(hass).async_get_or_load_intents(
+                hass.config.language
+            )
+        ).intents.intents
+        intents["FakeCustomIntent"].data[0].metadata[METADATA_CUSTOM_SENTENCE] = False
+
         # Ensure that a "lamp" exists so that we can verify the custom intent
         # overrides the builtin sentence.
         hass.states.async_set("light.lamp", "off")