From e19f1788647d47a6de34a6a51d29db738465534f Mon Sep 17 00:00:00 2001 From: Franck Nijhof Date: Thu, 12 Jun 2025 13:55:26 +0200 Subject: [PATCH] Make duplicate issue detection more strict (#146633) --- .github/workflows/detect-duplicate-issues.yml | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/.github/workflows/detect-duplicate-issues.yml b/.github/workflows/detect-duplicate-issues.yml index 509868541fd..b01a0d68352 100644 --- a/.github/workflows/detect-duplicate-issues.yml +++ b/.github/workflows/detect-duplicate-issues.yml @@ -133,12 +133,18 @@ jobs: // Build search query for issues with any of the current integration labels const labelQueries = integrationLabels.map(label => `label:"${label}"`); + + // Calculate date 6 months ago + const sixMonthsAgo = new Date(); + sixMonthsAgo.setMonth(sixMonthsAgo.getMonth() - 6); + const dateFilter = `created:>=${sixMonthsAgo.toISOString().split('T')[0]}`; + let searchQuery; if (labelQueries.length === 1) { - searchQuery = `repo:${context.repo.owner}/${context.repo.repo} is:issue ${labelQueries[0]}`; + searchQuery = `repo:${context.repo.owner}/${context.repo.repo} is:issue ${labelQueries[0]} ${dateFilter}`; } else { - searchQuery = `repo:${context.repo.owner}/${context.repo.repo} is:issue (${labelQueries.join(' OR ')})`; + searchQuery = `repo:${context.repo.owner}/${context.repo.repo} is:issue (${labelQueries.join(' OR ')}) ${dateFilter}`; } console.log(`Search query: ${searchQuery}`); @@ -227,29 +233,34 @@ jobs: if: steps.extract.outputs.should_continue == 'true' && steps.fetch_similar.outputs.has_similar == 'true' uses: actions/ai-inference@v1.1.0 with: - model: openai/gpt-4o-mini + model: openai/gpt-4o system-prompt: | - You are a Home Assistant issue duplicate detector. Your task is to identify potential duplicate issues based on their content. + You are a Home Assistant issue duplicate detector. Your task is to identify TRUE DUPLICATES - issues that report the EXACT SAME problem, not just similar or related issues. + + CRITICAL: An issue is ONLY a duplicate if: + - It describes the SAME problem with the SAME root cause + - Issues about the same integration but different problems are NOT duplicates + - Issues with similar symptoms but different causes are NOT duplicates Important considerations: - Open issues are more relevant than closed ones for duplicate detection - Recently updated issues may indicate ongoing work or discussion - Issues with more comments are generally more relevant and active - - Higher comment count often indicates community engagement and importance - Older closed issues might be resolved differently than newer approaches - Consider the time between issues - very old issues may have different contexts Rules: - 1. Compare the current issue with the provided similar issues + 1. ONLY mark as duplicate if the issues describe IDENTICAL problems 2. Look for issues that report the same problem or request the same functionality - 3. Consider different wording but same underlying issue as duplicates + 3. Different error messages = NOT a duplicate (even if same integration) 4. For CLOSED issues, only mark as duplicate if they describe the EXACT same problem - 5. For OPEN issues, use a lower threshold (70%+ similarity) + 5. For OPEN issues, use a lower threshold (90%+ similarity) 6. Prioritize issues with higher comment counts as they indicate more activity/relevance - 7. Return ONLY a JSON array of issue numbers that are potential duplicates - 8. If no duplicates are found, return an empty array: [] - 9. Maximum 5 potential duplicates, prioritize open issues with comments - 10. Consider the age of issues - prefer recent duplicates over very old ones + 7. When in doubt, do NOT mark as duplicate + 8. Return ONLY a JSON array of issue numbers that are duplicates + 9. If no duplicates are found, return an empty array: [] + 10. Maximum 5 potential duplicates, prioritize open issues with comments + 11. Consider the age of issues - prefer recent duplicates over very old ones Example response format: [1234, 5678, 9012] @@ -259,10 +270,10 @@ jobs: Title: ${{ steps.extract.outputs.current_title }} Body: ${{ steps.extract.outputs.current_body }} - Similar issues to compare against (each includes state, creation date, last update, and comment count): + Other issues to compare against (each includes state, creation date, last update, and comment count): ${{ steps.fetch_similar.outputs.similar_issues }} - Analyze these issues and identify which ones are potential duplicates of the current issue. Consider their state (open/closed), how recently they were updated, and their comment count (higher = more relevant). + Analyze these issues and identify which ones describe IDENTICAL problems and thus are duplicates of the current issue. When sorting them, consider their state (open/closed), how recently they were updated, and their comment count (higher = more relevant). max-tokens: 100