Pass prompt as system_instruction for Gemini 1.5 models (#120147)

2025-07-17 10:17:09 +00:00 · 2024-06-22 03:35:48 -07:00 · 2024-06-22 03:35:48 -07:00 · ad1f0db5a4
commit ad1f0db5a4
parent 57eb8dab6a
4 changed files with 253 additions and 141 deletions
--- a/homeassistant/components/google_generative_ai_conversation/conversation.py
+++ b/homeassistant/components/google_generative_ai_conversation/conversation.py
@ -161,10 +161,14 @@ class GoogleGenerativeAIConversationEntity(
        self, user_input: conversation.ConversationInput
    ) -> conversation.ConversationResult:
        """Process a sentence."""
-        intent_response = intent.IntentResponse(language=user_input.language)
-        llm_api: llm.APIInstance | None = None
-        tools: list[dict[str, Any]] | None = None
-        user_name: str | None = None
+        result = conversation.ConversationResult(
+            response=intent.IntentResponse(language=user_input.language),
+            conversation_id=user_input.conversation_id
+            if user_input.conversation_id in self.history
+            else ulid.ulid_now(),
+        )
+        assert result.conversation_id
+
        llm_context = llm.LLMContext(
            platform=DOMAIN,
            context=user_input.context,
@ -173,7 +177,8 @@ class GoogleGenerativeAIConversationEntity(
            assistant=conversation.DOMAIN,
            device_id=user_input.device_id,
        )
-
+        llm_api: llm.APIInstance | None = None
+        tools: list[dict[str, Any]] | None = None
        if self.entry.options.get(CONF_LLM_HASS_API):
            try:
                llm_api = await llm.async_get_api(
@ -183,17 +188,33 @@ class GoogleGenerativeAIConversationEntity(
                )
            except HomeAssistantError as err:
                LOGGER.error("Error getting LLM API: %s", err)
-                intent_response.async_set_error(
+                result.response.async_set_error(
                    intent.IntentResponseErrorCode.UNKNOWN,
                    f"Error preparing LLM API: {err}",
                )
-                return conversation.ConversationResult(
-                    response=intent_response, conversation_id=user_input.conversation_id
-                )
+                return result
            tools = [_format_tool(tool) for tool in llm_api.tools]

+        try:
+            prompt = await self._async_render_prompt(user_input, llm_api, llm_context)
+        except TemplateError as err:
+            LOGGER.error("Error rendering prompt: %s", err)
+            result.response.async_set_error(
+                intent.IntentResponseErrorCode.UNKNOWN,
+                f"Sorry, I had a problem with my template: {err}",
+            )
+            return result
+
+        model_name = self.entry.options.get(CONF_CHAT_MODEL, RECOMMENDED_CHAT_MODEL)
+        # Gemini 1.0 doesn't support system_instruction while 1.5 does.
+        # Assume future versions will support it (if not, the request fails with a
+        # clear message at which point we can fix).
+        supports_system_instruction = (
+            "gemini-1.0" not in model_name and "gemini-pro" not in model_name
+        )
+
        model = genai.GenerativeModel(
-            model_name=self.entry.options.get(CONF_CHAT_MODEL, RECOMMENDED_CHAT_MODEL),
+            model_name=model_name,
            generation_config={
                "temperature": self.entry.options.get(
                    CONF_TEMPERATURE, RECOMMENDED_TEMPERATURE
@ -219,69 +240,25 @@ class GoogleGenerativeAIConversationEntity(
                ),
            },
            tools=tools or None,
+            system_instruction=prompt if supports_system_instruction else None,
        )

-        if user_input.conversation_id in self.history:
-            conversation_id = user_input.conversation_id
-            messages = self.history[conversation_id]
-        else:
-            conversation_id = ulid.ulid_now()
-            messages = [{}, {"role": "model", "parts": "Ok"}]
-
-        if (
-            user_input.context
-            and user_input.context.user_id
-            and (
-                user := await self.hass.auth.async_get_user(user_input.context.user_id)
-            )
-        ):
-            user_name = user.name
-
-        try:
-            if llm_api:
-                api_prompt = llm_api.api_prompt
-            else:
-                api_prompt = llm.async_render_no_api_prompt(self.hass)
-
-            prompt = "\n".join(
-                (
-                    template.Template(
-                        llm.BASE_PROMPT
-                        + self.entry.options.get(
-                            CONF_PROMPT, llm.DEFAULT_INSTRUCTIONS_PROMPT
-                        ),
-                        self.hass,
-                    ).async_render(
-                        {
-                            "ha_name": self.hass.config.location_name,
-                            "user_name": user_name,
-                            "llm_context": llm_context,
-                        },
-                        parse_result=False,
-                    ),
-                    api_prompt,
-                )
-            )
-
-        except TemplateError as err:
-            LOGGER.error("Error rendering prompt: %s", err)
-            intent_response.async_set_error(
-                intent.IntentResponseErrorCode.UNKNOWN,
-                f"Sorry, I had a problem with my template: {err}",
-            )
-            return conversation.ConversationResult(
-                response=intent_response, conversation_id=conversation_id
-            )
-
-        # Make a copy, because we attach it to the trace event.
-        messages = [
-            {"role": "user", "parts": prompt},
-            *messages[1:],
-        ]
+        messages = self.history.get(result.conversation_id, [])
+        if not supports_system_instruction:
+            if not messages:
+                messages = [{}, {"role": "model", "parts": "Ok"}]
+            messages[0] = {"role": "user", "parts": prompt}

        LOGGER.debug("Input: '%s' with history: %s", user_input.text, messages)
        trace.async_conversation_trace_append(
-            trace.ConversationTraceEventType.AGENT_DETAIL, {"messages": messages}
+            trace.ConversationTraceEventType.AGENT_DETAIL,
+            {
+                # Make a copy to attach it to the trace event.
+                "messages": messages[:]
+                if supports_system_instruction
+                else messages[2:],
+                "prompt": prompt,
+            },
        )

        chat = model.start_chat(history=messages)
@ -307,24 +284,20 @@ class GoogleGenerativeAIConversationEntity(
                        f"Sorry, I had a problem talking to Google Generative AI: {err}"
                    )

-                intent_response.async_set_error(
+                result.response.async_set_error(
                    intent.IntentResponseErrorCode.UNKNOWN,
                    error,
                )
-                return conversation.ConversationResult(
-                    response=intent_response, conversation_id=conversation_id
-                )
+                return result

            LOGGER.debug("Response: %s", chat_response.parts)
            if not chat_response.parts:
-                intent_response.async_set_error(
+                result.response.async_set_error(
                    intent.IntentResponseErrorCode.UNKNOWN,
                    "Sorry, I had a problem getting a response from Google Generative AI.",
                )
-                return conversation.ConversationResult(
-                    response=intent_response, conversation_id=conversation_id
-                )
-            self.history[conversation_id] = chat.history
+                return result
+            self.history[result.conversation_id] = chat.history
            function_calls = [
                part.function_call for part in chat_response.parts if part.function_call
            ]
@ -355,9 +328,48 @@ class GoogleGenerativeAIConversationEntity(
                )
            chat_request = protos.Content(parts=tool_responses)

-        intent_response.async_set_speech(
+        result.response.async_set_speech(
            " ".join([part.text.strip() for part in chat_response.parts if part.text])
        )
-        return conversation.ConversationResult(
-            response=intent_response, conversation_id=conversation_id
+        return result
+
+    async def _async_render_prompt(
+        self,
+        user_input: conversation.ConversationInput,
+        llm_api: llm.APIInstance | None,
+        llm_context: llm.LLMContext,
+    ) -> str:
+        user_name: str | None = None
+        if (
+            user_input.context
+            and user_input.context.user_id
+            and (
+                user := await self.hass.auth.async_get_user(user_input.context.user_id)
+            )
+        ):
+            user_name = user.name
+
+        if llm_api:
+            api_prompt = llm_api.api_prompt
+        else:
+            api_prompt = llm.async_render_no_api_prompt(self.hass)
+
+        return "\n".join(
+            (
+                template.Template(
+                    llm.BASE_PROMPT
+                    + self.entry.options.get(
+                        CONF_PROMPT, llm.DEFAULT_INSTRUCTIONS_PROMPT
+                    ),
+                    self.hass,
+                ).async_render(
+                    {
+                        "ha_name": self.hass.config.location_name,
+                        "user_name": user_name,
+                        "llm_context": llm_context,
+                    },
+                    parse_result=False,
+                ),
+                api_prompt,
+            )
        )
--- a/homeassistant/helpers/llm.py
+++ b/homeassistant/helpers/llm.py
@ -43,6 +43,7 @@ BASE_PROMPT = (
 )

 DEFAULT_INSTRUCTIONS_PROMPT = """You are a voice assistant for Home Assistant.
+Answer questions about the world truthfully.
 Answer in plain text. Keep it simple and to the point.
 """

--- a/tests/components/google_generative_ai_conversation/snapshots/test_conversation.ambr
+++ b/tests/components/google_generative_ai_conversation/snapshots/test_conversation.ambr
@ -1,5 +1,5 @@
 # serializer version: 1
-# name: test_chat_history
+# name: test_chat_history[models/gemini-1.0-pro-False]
  list([
    tuple(
      '',
@ -12,13 +12,14 @@
          'top_k': 64,
          'top_p': 0.95,
        }),
-        'model_name': 'models/gemini-1.5-flash-latest',
+        'model_name': 'models/gemini-1.0-pro',
        'safety_settings': dict({
          'DANGEROUS': 'BLOCK_MEDIUM_AND_ABOVE',
          'HARASSMENT': 'BLOCK_MEDIUM_AND_ABOVE',
          'HATE': 'BLOCK_MEDIUM_AND_ABOVE',
          'SEXUAL': 'BLOCK_MEDIUM_AND_ABOVE',
        }),
+        'system_instruction': None,
        'tools': None,
      }),
    ),
@ -32,6 +33,7 @@
            'parts': '''
              Current time is 05:00:00. Today's date is 2024-05-24.
              You are a voice assistant for Home Assistant.
+              Answer questions about the world truthfully.
              Answer in plain text. Keep it simple and to the point.
              Only if the user wants to control a device, tell them to edit the AI configuration and allow access to Home Assistant.
            ''',
@ -63,13 +65,14 @@
          'top_k': 64,
          'top_p': 0.95,
        }),
-        'model_name': 'models/gemini-1.5-flash-latest',
+        'model_name': 'models/gemini-1.0-pro',
        'safety_settings': dict({
          'DANGEROUS': 'BLOCK_MEDIUM_AND_ABOVE',
          'HARASSMENT': 'BLOCK_MEDIUM_AND_ABOVE',
          'HATE': 'BLOCK_MEDIUM_AND_ABOVE',
          'SEXUAL': 'BLOCK_MEDIUM_AND_ABOVE',
        }),
+        'system_instruction': None,
        'tools': None,
      }),
    ),
@ -83,6 +86,7 @@
            'parts': '''
              Current time is 05:00:00. Today's date is 2024-05-24.
              You are a voice assistant for Home Assistant.
+              Answer questions about the world truthfully.
              Answer in plain text. Keep it simple and to the point.
              Only if the user wants to control a device, tell them to edit the AI configuration and allow access to Home Assistant.
            ''',
@ -113,6 +117,108 @@
    ),
  ])
 # ---
+# name: test_chat_history[models/gemini-1.5-pro-True]
+  list([
+    tuple(
+      '',
+      tuple(
+      ),
+      dict({
+        'generation_config': dict({
+          'max_output_tokens': 150,
+          'temperature': 1.0,
+          'top_k': 64,
+          'top_p': 0.95,
+        }),
+        'model_name': 'models/gemini-1.5-pro',
+        'safety_settings': dict({
+          'DANGEROUS': 'BLOCK_MEDIUM_AND_ABOVE',
+          'HARASSMENT': 'BLOCK_MEDIUM_AND_ABOVE',
+          'HATE': 'BLOCK_MEDIUM_AND_ABOVE',
+          'SEXUAL': 'BLOCK_MEDIUM_AND_ABOVE',
+        }),
+        'system_instruction': '''
+          Current time is 05:00:00. Today's date is 2024-05-24.
+          You are a voice assistant for Home Assistant.
+          Answer questions about the world truthfully.
+          Answer in plain text. Keep it simple and to the point.
+          Only if the user wants to control a device, tell them to edit the AI configuration and allow access to Home Assistant.
+        ''',
+        'tools': None,
+      }),
+    ),
+    tuple(
+      '().start_chat',
+      tuple(
+      ),
+      dict({
+        'history': list([
+        ]),
+      }),
+    ),
+    tuple(
+      '().start_chat().send_message_async',
+      tuple(
+        '1st user request',
+      ),
+      dict({
+      }),
+    ),
+    tuple(
+      '',
+      tuple(
+      ),
+      dict({
+        'generation_config': dict({
+          'max_output_tokens': 150,
+          'temperature': 1.0,
+          'top_k': 64,
+          'top_p': 0.95,
+        }),
+        'model_name': 'models/gemini-1.5-pro',
+        'safety_settings': dict({
+          'DANGEROUS': 'BLOCK_MEDIUM_AND_ABOVE',
+          'HARASSMENT': 'BLOCK_MEDIUM_AND_ABOVE',
+          'HATE': 'BLOCK_MEDIUM_AND_ABOVE',
+          'SEXUAL': 'BLOCK_MEDIUM_AND_ABOVE',
+        }),
+        'system_instruction': '''
+          Current time is 05:00:00. Today's date is 2024-05-24.
+          You are a voice assistant for Home Assistant.
+          Answer questions about the world truthfully.
+          Answer in plain text. Keep it simple and to the point.
+          Only if the user wants to control a device, tell them to edit the AI configuration and allow access to Home Assistant.
+        ''',
+        'tools': None,
+      }),
+    ),
+    tuple(
+      '().start_chat',
+      tuple(
+      ),
+      dict({
+        'history': list([
+          dict({
+            'parts': '1st user request',
+            'role': 'user',
+          }),
+          dict({
+            'parts': '1st model response',
+            'role': 'model',
+          }),
+        ]),
+      }),
+    ),
+    tuple(
+      '().start_chat().send_message_async',
+      tuple(
+        '2nd user request',
+      ),
+      dict({
+      }),
+    ),
+  ])
+# ---
 # name: test_default_prompt[config_entry_options0-None]
  list([
    tuple(
@ -133,6 +239,13 @@
          'HATE': 'BLOCK_MEDIUM_AND_ABOVE',
          'SEXUAL': 'BLOCK_MEDIUM_AND_ABOVE',
        }),
+        'system_instruction': '''
+          Current time is 05:00:00. Today's date is 2024-05-24.
+          You are a voice assistant for Home Assistant.
+          Answer questions about the world truthfully.
+          Answer in plain text. Keep it simple and to the point.
+          <no_api_prompt>
+        ''',
        'tools': None,
      }),
    ),
@ -142,19 +255,6 @@
      ),
      dict({
        'history': list([
-          dict({
-            'parts': '''
-              Current time is 05:00:00. Today's date is 2024-05-24.
-              You are a voice assistant for Home Assistant.
-              Answer in plain text. Keep it simple and to the point.
-              <no_api_prompt>
-            ''',
-            'role': 'user',
-          }),
-          dict({
-            'parts': 'Ok',
-            'role': 'model',
-          }),
        ]),
      }),
    ),
@ -188,6 +288,13 @@
          'HATE': 'BLOCK_MEDIUM_AND_ABOVE',
          'SEXUAL': 'BLOCK_MEDIUM_AND_ABOVE',
        }),
+        'system_instruction': '''
+          Current time is 05:00:00. Today's date is 2024-05-24.
+          You are a voice assistant for Home Assistant.
+          Answer questions about the world truthfully.
+          Answer in plain text. Keep it simple and to the point.
+          <no_api_prompt>
+        ''',
        'tools': None,
      }),
    ),
@ -197,19 +304,6 @@
      ),
      dict({
        'history': list([
-          dict({
-            'parts': '''
-              Current time is 05:00:00. Today's date is 2024-05-24.
-              You are a voice assistant for Home Assistant.
-              Answer in plain text. Keep it simple and to the point.
-              <no_api_prompt>
-            ''',
-            'role': 'user',
-          }),
-          dict({
-            'parts': 'Ok',
-            'role': 'model',
-          }),
        ]),
      }),
    ),
@ -243,6 +337,13 @@
          'HATE': 'BLOCK_MEDIUM_AND_ABOVE',
          'SEXUAL': 'BLOCK_MEDIUM_AND_ABOVE',
        }),
+        'system_instruction': '''
+          Current time is 05:00:00. Today's date is 2024-05-24.
+          You are a voice assistant for Home Assistant.
+          Answer questions about the world truthfully.
+          Answer in plain text. Keep it simple and to the point.
+          <api_prompt>
+        ''',
        'tools': None,
      }),
    ),
@ -252,19 +353,6 @@
      ),
      dict({
        'history': list([
-          dict({
-            'parts': '''
-              Current time is 05:00:00. Today's date is 2024-05-24.
-              You are a voice assistant for Home Assistant.
-              Answer in plain text. Keep it simple and to the point.
-              <api_prompt>
-            ''',
-            'role': 'user',
-          }),
-          dict({
-            'parts': 'Ok',
-            'role': 'model',
-          }),
        ]),
      }),
    ),
@ -298,6 +386,13 @@
          'HATE': 'BLOCK_MEDIUM_AND_ABOVE',
          'SEXUAL': 'BLOCK_MEDIUM_AND_ABOVE',
        }),
+        'system_instruction': '''
+          Current time is 05:00:00. Today's date is 2024-05-24.
+          You are a voice assistant for Home Assistant.
+          Answer questions about the world truthfully.
+          Answer in plain text. Keep it simple and to the point.
+          <api_prompt>
+        ''',
        'tools': None,
      }),
    ),
@ -307,19 +402,6 @@
      ),
      dict({
        'history': list([
-          dict({
-            'parts': '''
-              Current time is 05:00:00. Today's date is 2024-05-24.
-              You are a voice assistant for Home Assistant.
-              Answer in plain text. Keep it simple and to the point.
-              <api_prompt>
-            ''',
-            'role': 'user',
-          }),
-          dict({
-            'parts': 'Ok',
-            'role': 'model',
-          }),
        ]),
      }),
    ),
--- a/tests/components/google_generative_ai_conversation/test_conversation.py
+++ b/tests/components/google_generative_ai_conversation/test_conversation.py
@ -12,6 +12,9 @@ import voluptuous as vol

 from homeassistant.components import conversation
 from homeassistant.components.conversation import trace
+from homeassistant.components.google_generative_ai_conversation.const import (
+    CONF_CHAT_MODEL,
+)
 from homeassistant.components.google_generative_ai_conversation.conversation import (
    _escape_decode,
 )
@ -99,13 +102,22 @@ async def test_default_prompt(
    assert mock_get_tools.called == (CONF_LLM_HASS_API in config_entry_options)


+@pytest.mark.parametrize(
+    ("model_name", "supports_system_instruction"),
+    [("models/gemini-1.5-pro", True), ("models/gemini-1.0-pro", False)],
+)
 async def test_chat_history(
    hass: HomeAssistant,
    mock_config_entry: MockConfigEntry,
    mock_init_component,
+    model_name: str,
+    supports_system_instruction: bool,
    snapshot: SnapshotAssertion,
 ) -> None:
    """Test that the agent keeps track of the chat history."""
+    hass.config_entries.async_update_entry(
+        mock_config_entry, options={CONF_CHAT_MODEL: model_name}
+    )
    with patch("google.generativeai.GenerativeModel") as mock_model:
        mock_chat = AsyncMock()
        mock_model.return_value.start_chat.return_value = mock_chat
@ -115,9 +127,14 @@ async def test_chat_history(
        mock_part.function_call = None
        mock_part.text = "1st model response"
        chat_response.parts = [mock_part]
-        mock_chat.history = [
-            {"role": "user", "parts": "prompt"},
-            {"role": "model", "parts": "Ok"},
+        if supports_system_instruction:
+            mock_chat.history = []
+        else:
+            mock_chat.history = [
+                {"role": "user", "parts": "prompt"},
+                {"role": "model", "parts": "Ok"},
+            ]
+        mock_chat.history += [
            {"role": "user", "parts": "1st user request"},
            {"role": "model", "parts": "1st model response"},
        ]
@ -256,7 +273,7 @@ async def test_function_call(
    ]
    # AGENT_DETAIL event contains the raw prompt passed to the model
    detail_event = trace_events[1]
-    assert "Answer in plain text" in detail_event["data"]["messages"][0]["parts"]
+    assert "Answer in plain text" in detail_event["data"]["prompt"]


@patch(
@ -492,9 +509,9 @@ async def test_template_variables(
    ), result
    assert (
        "The user name is Test User."
-        in mock_model.mock_calls[1][2]["history"][0]["parts"]
+        in mock_model.mock_calls[0][2]["system_instruction"]
    )
-    assert "The user id is 12345." in mock_model.mock_calls[1][2]["history"][0]["parts"]
+    assert "The user id is 12345." in mock_model.mock_calls[0][2]["system_instruction"]


 async def test_conversation_agent(