Allow arbitrary Gemini attachments (#138751)

* Gemini: Allow arbitrary attachments This lets me use Gemini to extract information from PDFs, HTML, or other files. * Gemini: Only add deprecation warning when deprecated parameter has a value * Gemini: Use Files.upload() for both images and other files This simplifies the code. Within the Google client, this takes a different codepath (it uploads images as a file instead of re-saving them into inline bytes). I think that's a feature (it's probably more efficient?). * Gemini: Deduplicate filenames
2025-07-24 13:47:35 +00:00 · 2025-02-23 19:11:38 -05:00 · 2025-02-23 19:11:38 -05:00 · 580c6f2684
commit 580c6f2684
parent d62c18c225
5 changed files with 59 additions and 50 deletions
--- a/homeassistant/components/google_generative_ai_conversation/init.py
+++ b/homeassistant/components/google_generative_ai_conversation/init.py
@ -2,12 +2,10 @@
 from __future__ import annotations
 import mimetypes
 from pathlib import Path
 from google import genai  # type: ignore[attr-defined]
 from google.genai.errors import APIError, ClientError
 from PIL import Image
 from requests.exceptions import Timeout
 import voluptuous as vol
@ -26,6 +24,7 @@ from homeassistant.exceptions import (
    HomeAssistantError,
 )
 from homeassistant.helpers import config_validation as cv
 from homeassistant.helpers.issue_registry import IssueSeverity, async_create_issue
 from homeassistant.helpers.typing import ConfigType
 from .const import (
@ -38,6 +37,7 @@ from .const import (
 SERVICE_GENERATE_CONTENT = "generate_content"
 CONF_IMAGE_FILENAME = "image_filename"
 CONF_FILENAMES = "filenames"
 CONFIG_SCHEMA = cv.config_entry_only_config_schema(DOMAIN)
 PLATFORMS = (Platform.CONVERSATION,)
@ -50,31 +50,43 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
    async def generate_content(call: ServiceCall) -> ServiceResponse:
        """Generate content from text and optionally images."""
        if call.data[CONF_IMAGE_FILENAME]:
            # Deprecated in 2025.3, to remove in 2025.9
            async_create_issue(
                hass,
                DOMAIN,
                "deprecated_image_filename_parameter",
                breaks_in_ha_version="2025.9.0",
                is_fixable=False,
                severity=IssueSeverity.WARNING,
                translation_key="deprecated_image_filename_parameter",
            )
        prompt_parts = [call.data[CONF_PROMPT]]
        def append_images_to_prompt():
            image_filenames = call.data[CONF_IMAGE_FILENAME]
            for image_filename in image_filenames:
                if not hass.config.is_allowed_path(image_filename):
                    raise HomeAssistantError(
                        f"Cannot read `{image_filename}`, no access to path; "
                        "`allowlist_external_dirs` may need to be adjusted in "
                        "`configuration.yaml`"
                    )
                if not Path(image_filename).exists():
                    raise HomeAssistantError(f"`{image_filename}` does not exist")
                mime_type, _ = mimetypes.guess_type(image_filename)
                if mime_type is None or not mime_type.startswith("image"):
                    raise HomeAssistantError(f"`{image_filename}` is not an image")
                prompt_parts.append(Image.open(image_filename))
        await hass.async_add_executor_job(append_images_to_prompt)
        config_entry: GoogleGenerativeAIConfigEntry = hass.config_entries.async_entries(
            DOMAIN
        )[0]
        client = config_entry.runtime_data
        def append_files_to_prompt():
            image_filenames = call.data[CONF_IMAGE_FILENAME]
            filenames = call.data[CONF_FILENAMES]
            for filename in set(image_filenames + filenames):
                if not hass.config.is_allowed_path(filename):
                    raise HomeAssistantError(
                        f"Cannot read `{filename}`, no access to path; "
                        "`allowlist_external_dirs` may need to be adjusted in "
                        "`configuration.yaml`"
                    )
                if not Path(filename).exists():
                    raise HomeAssistantError(f"`{filename}` does not exist")
                prompt_parts.append(client.files.upload(file=filename))
        await hass.async_add_executor_job(append_files_to_prompt)
        try:
            response = await client.aio.models.generate_content(
                model=RECOMMENDED_CHAT_MODEL, contents=prompt_parts
@ -105,6 +117,9 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
                vol.Optional(CONF_IMAGE_FILENAME, default=[]): vol.All(
                    cv.ensure_list, [cv.string]
                ),
                vol.Optional(CONF_FILENAMES, default=[]): vol.All(
                    cv.ensure_list, [cv.string]
                ),
            }
        ),
        supports_response=SupportsResponse.ONLY,
--- a/homeassistant/components/google_generative_ai_conversation/services.yaml
+++ b/homeassistant/components/google_generative_ai_conversation/services.yaml
@ -9,3 +9,8 @@ generate_content:
      required: false
      selector:
        object:
    filenames:
      required: false
      selector:
        text:
          multiple: true
--- a/homeassistant/components/google_generative_ai_conversation/strings.json
+++ b/homeassistant/components/google_generative_ai_conversation/strings.json
@ -56,10 +56,21 @@
        },
        "image_filename": {
          "name": "Image filename",
-          "description": "Images",
+          "description": "Deprecated. Use filenames instead.",
          "example": "/config/www/image.jpg"
        },
        "filenames": {
          "name": "Attachment filenames",
          "description": "Attachments to add to the prompt (images, PDFs, etc)",
          "example": "/config/www/image.jpg"
        }
      }
    }
  },
  "issues": {
    "deprecated_image_filename_parameter": {
      "title": "Deprecated 'image_filename' parameter",
      "description": "The 'image_filename' parameter in Google Generative AI actions is deprecated. Please edit scripts and automations to use 'filenames' intead."
    }
  }
 }
--- a/tests/components/google_generative_ai_conversation/snapshots/test_init.ambr
+++ b/tests/components/google_generative_ai_conversation/snapshots/test_init.ambr
@ -8,7 +8,8 @@
      dict({
        'contents': list([
          'Describe this image from my doorbell camera',
-          b'image bytes',
+          b'some file',
          b'some file',
        ]),
        'model': 'models/gemini-2.0-flash',
      }),
--- a/tests/components/google_generative_ai_conversation/test_init.py
+++ b/tests/components/google_generative_ai_conversation/test_init.py
@ -66,8 +66,8 @@ async def test_generate_content_service_with_image(
            ),
        ) as mock_generate,
        patch(
-            "homeassistant.components.google_generative_ai_conversation.Image.open",
+            "google.genai.files.Files.upload",
-            return_value=b"image bytes",
+            return_value=b"some file",
        ),
        patch("pathlib.Path.exists", return_value=True),
        patch.object(hass.config, "is_allowed_path", return_value=True),
@ -77,7 +77,7 @@ async def test_generate_content_service_with_image(
            "generate_content",
            {
                "prompt": "Describe this image from my doorbell camera",
-                "image_filename": "doorbell_snapshot.jpg",
+                "filenames": ["doorbell_snapshot.jpg", "context.txt", "context.txt"],
            },
            blocking=True,
            return_response=True,
@ -161,7 +161,7 @@ async def test_generate_content_service_with_image_not_allowed_path(
            "generate_content",
            {
                "prompt": "Describe this image from my doorbell camera",
-                "image_filename": "doorbell_snapshot.jpg",
+                "filenames": "doorbell_snapshot.jpg",
            },
            blocking=True,
            return_response=True,
@ -186,30 +186,7 @@ async def test_generate_content_service_with_image_not_exists(
            "generate_content",
            {
                "prompt": "Describe this image from my doorbell camera",
-                "image_filename": "doorbell_snapshot.jpg",
+                "filenames": "doorbell_snapshot.jpg",
            },
            blocking=True,
            return_response=True,
        )
@pytest.mark.usefixtures("mock_init_component")
 async def test_generate_content_service_with_non_image(hass: HomeAssistant) -> None:
    """Test generate content service with a non image."""
    with (
        patch("pathlib.Path.exists", return_value=True),
        patch.object(hass.config, "is_allowed_path", return_value=True),
        patch("pathlib.Path.exists", return_value=True),
        pytest.raises(
            HomeAssistantError, match="`doorbell_snapshot.mp4` is not an image"
        ),
    ):
        await hass.services.async_call(
            "google_generative_ai_conversation",
            "generate_content",
            {
                "prompt": "Describe this image from my doorbell camera",
                "image_filename": "doorbell_snapshot.mp4",
            },
            blocking=True,
            return_response=True,