mirror of
https://github.com/home-assistant/core.git
synced 2025-07-28 23:57:06 +00:00
Add duplicate issue detection using GitHub AI models (#146487)
This commit is contained in:
parent
9ee45518e9
commit
4a50f4ffc1
374
.github/workflows/detect-duplicate-issues.yml
vendored
Normal file
374
.github/workflows/detect-duplicate-issues.yml
vendored
Normal file
@ -0,0 +1,374 @@
|
|||||||
|
name: Auto-detect duplicate issues
|
||||||
|
|
||||||
|
# yamllint disable-line rule:truthy
|
||||||
|
on:
|
||||||
|
issues:
|
||||||
|
types: [labeled]
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
issues: write
|
||||||
|
models: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
detect-duplicates:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Check if integration label was added and extract details
|
||||||
|
id: extract
|
||||||
|
uses: actions/github-script@v7.0.1
|
||||||
|
with:
|
||||||
|
script: |
|
||||||
|
// Debug: Log the event payload
|
||||||
|
console.log('Event name:', context.eventName);
|
||||||
|
console.log('Event action:', context.payload.action);
|
||||||
|
console.log('Event payload keys:', Object.keys(context.payload));
|
||||||
|
|
||||||
|
// Check the specific label that was added
|
||||||
|
const addedLabel = context.payload.label;
|
||||||
|
if (!addedLabel) {
|
||||||
|
console.log('No label found in labeled event payload');
|
||||||
|
core.setOutput('should_continue', 'false');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Label added: ${addedLabel.name}`);
|
||||||
|
|
||||||
|
if (!addedLabel.name.startsWith('integration:')) {
|
||||||
|
console.log('Added label is not an integration label, skipping duplicate detection');
|
||||||
|
core.setOutput('should_continue', 'false');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Integration label added: ${addedLabel.name}`);
|
||||||
|
|
||||||
|
let currentIssue;
|
||||||
|
let integrationLabels = [];
|
||||||
|
|
||||||
|
try {
|
||||||
|
const issue = await github.rest.issues.get({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: context.payload.issue.number
|
||||||
|
});
|
||||||
|
|
||||||
|
currentIssue = issue.data;
|
||||||
|
|
||||||
|
// Check if potential-duplicate label already exists
|
||||||
|
const hasPotentialDuplicateLabel = currentIssue.labels
|
||||||
|
.some(label => label.name === 'potential-duplicate');
|
||||||
|
|
||||||
|
if (hasPotentialDuplicateLabel) {
|
||||||
|
console.log('Issue already has potential-duplicate label, skipping duplicate detection');
|
||||||
|
core.setOutput('should_continue', 'false');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
integrationLabels = currentIssue.labels
|
||||||
|
.filter(label => label.name.startsWith('integration:'))
|
||||||
|
.map(label => label.name);
|
||||||
|
} catch (error) {
|
||||||
|
core.error(`Failed to fetch issue #${context.payload.issue.number}:`, error.message);
|
||||||
|
core.setOutput('should_continue', 'false');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we've already posted a duplicate detection comment recently
|
||||||
|
let comments;
|
||||||
|
try {
|
||||||
|
comments = await github.rest.issues.listComments({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: context.payload.issue.number,
|
||||||
|
per_page: 10
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
core.error('Failed to fetch comments:', error.message);
|
||||||
|
// Continue anyway, worst case we might post a duplicate comment
|
||||||
|
comments = { data: [] };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we've already posted a duplicate detection comment
|
||||||
|
const recentDuplicateComment = comments.data.find(comment =>
|
||||||
|
comment.user && comment.user.login === 'github-actions[bot]' &&
|
||||||
|
comment.body.includes('<!-- workflow: detect-duplicate-issues -->')
|
||||||
|
);
|
||||||
|
|
||||||
|
if (recentDuplicateComment) {
|
||||||
|
console.log('Already posted duplicate detection comment, skipping');
|
||||||
|
core.setOutput('should_continue', 'false');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
core.setOutput('should_continue', 'true');
|
||||||
|
core.setOutput('current_number', currentIssue.number);
|
||||||
|
core.setOutput('current_title', currentIssue.title);
|
||||||
|
core.setOutput('current_body', currentIssue.body);
|
||||||
|
core.setOutput('current_url', currentIssue.html_url);
|
||||||
|
core.setOutput('integration_labels', JSON.stringify(integrationLabels));
|
||||||
|
|
||||||
|
console.log(`Current issue: #${currentIssue.number}`);
|
||||||
|
console.log(`Integration labels: ${integrationLabels.join(', ')}`);
|
||||||
|
|
||||||
|
- name: Fetch similar issues
|
||||||
|
id: fetch_similar
|
||||||
|
if: steps.extract.outputs.should_continue == 'true'
|
||||||
|
uses: actions/github-script@v7.0.1
|
||||||
|
env:
|
||||||
|
INTEGRATION_LABELS: ${{ steps.extract.outputs.integration_labels }}
|
||||||
|
CURRENT_NUMBER: ${{ steps.extract.outputs.current_number }}
|
||||||
|
with:
|
||||||
|
script: |
|
||||||
|
const integrationLabels = JSON.parse(process.env.INTEGRATION_LABELS);
|
||||||
|
const currentNumber = parseInt(process.env.CURRENT_NUMBER);
|
||||||
|
|
||||||
|
if (integrationLabels.length === 0) {
|
||||||
|
console.log('No integration labels found, skipping duplicate detection');
|
||||||
|
core.setOutput('has_similar', 'false');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use GitHub search API to find issues with matching integration labels
|
||||||
|
console.log(`Searching for issues with integration labels: ${integrationLabels.join(', ')}`);
|
||||||
|
|
||||||
|
// Build search query for issues with any of the current integration labels
|
||||||
|
const labelQueries = integrationLabels.map(label => `label:"${label}"`);
|
||||||
|
let searchQuery;
|
||||||
|
|
||||||
|
if (labelQueries.length === 1) {
|
||||||
|
searchQuery = `repo:${context.repo.owner}/${context.repo.repo} is:issue ${labelQueries[0]}`;
|
||||||
|
} else {
|
||||||
|
searchQuery = `repo:${context.repo.owner}/${context.repo.repo} is:issue (${labelQueries.join(' OR ')})`;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Search query: ${searchQuery}`);
|
||||||
|
|
||||||
|
let result;
|
||||||
|
try {
|
||||||
|
result = await github.rest.search.issuesAndPullRequests({
|
||||||
|
q: searchQuery,
|
||||||
|
per_page: 15,
|
||||||
|
sort: 'updated',
|
||||||
|
order: 'desc'
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
core.error('Failed to search for similar issues:', error.message);
|
||||||
|
if (error.status === 403 && error.message.includes('rate limit')) {
|
||||||
|
core.error('GitHub API rate limit exceeded');
|
||||||
|
}
|
||||||
|
core.setOutput('has_similar', 'false');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter out the current issue, pull requests, and newer issues (higher numbers)
|
||||||
|
const similarIssues = result.data.items
|
||||||
|
.filter(item =>
|
||||||
|
item.number !== currentNumber &&
|
||||||
|
!item.pull_request &&
|
||||||
|
item.number < currentNumber // Only include older issues (lower numbers)
|
||||||
|
)
|
||||||
|
.map(item => ({
|
||||||
|
number: item.number,
|
||||||
|
title: item.title,
|
||||||
|
body: item.body,
|
||||||
|
url: item.html_url,
|
||||||
|
state: item.state,
|
||||||
|
createdAt: item.created_at,
|
||||||
|
updatedAt: item.updated_at,
|
||||||
|
comments: item.comments,
|
||||||
|
labels: item.labels.map(l => l.name)
|
||||||
|
}));
|
||||||
|
|
||||||
|
console.log(`Found ${similarIssues.length} issues with matching integration labels`);
|
||||||
|
console.log('Raw similar issues:', JSON.stringify(similarIssues.slice(0, 3), null, 2));
|
||||||
|
|
||||||
|
if (similarIssues.length === 0) {
|
||||||
|
console.log('No similar issues found, setting has_similar to false');
|
||||||
|
core.setOutput('has_similar', 'false');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('Similar issues found, setting has_similar to true');
|
||||||
|
core.setOutput('has_similar', 'true');
|
||||||
|
|
||||||
|
// Clean the issue data to prevent JSON parsing issues
|
||||||
|
const cleanedIssues = similarIssues.slice(0, 15).map(item => {
|
||||||
|
// Handle body with improved truncation and null handling
|
||||||
|
let cleanBody = '';
|
||||||
|
if (item.body && typeof item.body === 'string') {
|
||||||
|
// Remove control characters
|
||||||
|
const cleaned = item.body.replace(/[\u0000-\u001F\u007F-\u009F]/g, '');
|
||||||
|
// Truncate to 1000 characters and add ellipsis if needed
|
||||||
|
cleanBody = cleaned.length > 1000
|
||||||
|
? cleaned.substring(0, 1000) + '...'
|
||||||
|
: cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
number: item.number,
|
||||||
|
title: item.title.replace(/[\u0000-\u001F\u007F-\u009F]/g, ''), // Remove control characters
|
||||||
|
body: cleanBody,
|
||||||
|
url: item.url,
|
||||||
|
state: item.state,
|
||||||
|
createdAt: item.createdAt,
|
||||||
|
updatedAt: item.updatedAt,
|
||||||
|
comments: item.comments,
|
||||||
|
labels: item.labels
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Cleaned issues count: ${cleanedIssues.length}`);
|
||||||
|
console.log('First cleaned issue:', JSON.stringify(cleanedIssues[0], null, 2));
|
||||||
|
|
||||||
|
core.setOutput('similar_issues', JSON.stringify(cleanedIssues));
|
||||||
|
|
||||||
|
- name: Detect duplicates using AI
|
||||||
|
id: ai_detection
|
||||||
|
if: steps.extract.outputs.should_continue == 'true' && steps.fetch_similar.outputs.has_similar == 'true'
|
||||||
|
uses: actions/ai-inference@v1.1.0
|
||||||
|
with:
|
||||||
|
model: openai/gpt-4o-mini
|
||||||
|
system-prompt: |
|
||||||
|
You are a Home Assistant issue duplicate detector. Your task is to identify potential duplicate issues based on their content.
|
||||||
|
|
||||||
|
Important considerations:
|
||||||
|
- Open issues are more relevant than closed ones for duplicate detection
|
||||||
|
- Recently updated issues may indicate ongoing work or discussion
|
||||||
|
- Issues with more comments are generally more relevant and active
|
||||||
|
- Higher comment count often indicates community engagement and importance
|
||||||
|
- Older closed issues might be resolved differently than newer approaches
|
||||||
|
- Consider the time between issues - very old issues may have different contexts
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
1. Compare the current issue with the provided similar issues
|
||||||
|
2. Look for issues that report the same problem or request the same functionality
|
||||||
|
3. Consider different wording but same underlying issue as duplicates
|
||||||
|
4. For CLOSED issues, only mark as duplicate if they describe the EXACT same problem
|
||||||
|
5. For OPEN issues, use a lower threshold (70%+ similarity)
|
||||||
|
6. Prioritize issues with higher comment counts as they indicate more activity/relevance
|
||||||
|
7. Return ONLY a JSON array of issue numbers that are potential duplicates
|
||||||
|
8. If no duplicates are found, return an empty array: []
|
||||||
|
9. Maximum 5 potential duplicates, prioritize open issues with comments
|
||||||
|
10. Consider the age of issues - prefer recent duplicates over very old ones
|
||||||
|
|
||||||
|
Example response format:
|
||||||
|
[1234, 5678, 9012]
|
||||||
|
|
||||||
|
prompt: |
|
||||||
|
Current issue (just created):
|
||||||
|
Title: ${{ steps.extract.outputs.current_title }}
|
||||||
|
Body: ${{ steps.extract.outputs.current_body }}
|
||||||
|
|
||||||
|
Similar issues to compare against (each includes state, creation date, last update, and comment count):
|
||||||
|
${{ steps.fetch_similar.outputs.similar_issues }}
|
||||||
|
|
||||||
|
Analyze these issues and identify which ones are potential duplicates of the current issue. Consider their state (open/closed), how recently they were updated, and their comment count (higher = more relevant).
|
||||||
|
|
||||||
|
max-tokens: 100
|
||||||
|
|
||||||
|
- name: Post duplicate detection results
|
||||||
|
id: post_results
|
||||||
|
if: steps.extract.outputs.should_continue == 'true' && steps.fetch_similar.outputs.has_similar == 'true'
|
||||||
|
uses: actions/github-script@v7.0.1
|
||||||
|
env:
|
||||||
|
AI_RESPONSE: ${{ steps.ai_detection.outputs.response }}
|
||||||
|
SIMILAR_ISSUES: ${{ steps.fetch_similar.outputs.similar_issues }}
|
||||||
|
with:
|
||||||
|
script: |
|
||||||
|
const aiResponse = process.env.AI_RESPONSE;
|
||||||
|
|
||||||
|
console.log('Raw AI response:', JSON.stringify(aiResponse));
|
||||||
|
|
||||||
|
let duplicateNumbers = [];
|
||||||
|
try {
|
||||||
|
// Clean the response of any potential control characters
|
||||||
|
const cleanResponse = aiResponse.trim().replace(/[\u0000-\u001F\u007F-\u009F]/g, '');
|
||||||
|
console.log('Cleaned AI response:', cleanResponse);
|
||||||
|
|
||||||
|
duplicateNumbers = JSON.parse(cleanResponse);
|
||||||
|
|
||||||
|
// Ensure it's an array and contains only numbers
|
||||||
|
if (!Array.isArray(duplicateNumbers)) {
|
||||||
|
console.log('AI response is not an array, trying to extract numbers');
|
||||||
|
const numberMatches = cleanResponse.match(/\d+/g);
|
||||||
|
duplicateNumbers = numberMatches ? numberMatches.map(n => parseInt(n)) : [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter to only valid numbers
|
||||||
|
duplicateNumbers = duplicateNumbers.filter(n => typeof n === 'number' && !isNaN(n));
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.log('Failed to parse AI response as JSON:', error.message);
|
||||||
|
console.log('Raw response:', aiResponse);
|
||||||
|
|
||||||
|
// Fallback: try to extract numbers from the response
|
||||||
|
const numberMatches = aiResponse.match(/\d+/g);
|
||||||
|
duplicateNumbers = numberMatches ? numberMatches.map(n => parseInt(n)) : [];
|
||||||
|
console.log('Extracted numbers as fallback:', duplicateNumbers);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Array.isArray(duplicateNumbers) || duplicateNumbers.length === 0) {
|
||||||
|
console.log('No duplicates detected by AI');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`AI detected ${duplicateNumbers.length} potential duplicates: ${duplicateNumbers.join(', ')}`);
|
||||||
|
|
||||||
|
// Get details of detected duplicates
|
||||||
|
const similarIssues = JSON.parse(process.env.SIMILAR_ISSUES);
|
||||||
|
const duplicates = similarIssues.filter(issue => duplicateNumbers.includes(issue.number));
|
||||||
|
|
||||||
|
if (duplicates.length === 0) {
|
||||||
|
console.log('No matching issues found for detected numbers');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create comment with duplicate detection results
|
||||||
|
const duplicateLinks = duplicates.map(issue => `- [#${issue.number}: ${issue.title}](${issue.url})`).join('\n');
|
||||||
|
|
||||||
|
const commentBody = [
|
||||||
|
'<!-- workflow: detect-duplicate-issues -->',
|
||||||
|
'### 🔍 **Potential duplicate detection**',
|
||||||
|
'',
|
||||||
|
'I\'ve analyzed similar issues and found the following potential duplicates:',
|
||||||
|
'',
|
||||||
|
duplicateLinks,
|
||||||
|
'',
|
||||||
|
'**What to do next:**',
|
||||||
|
'1. Please review these issues to see if they match your issue',
|
||||||
|
'2. If you find an existing issue that covers your problem:',
|
||||||
|
' - Consider closing this issue',
|
||||||
|
' - Add your findings or 👍 on the existing issue instead',
|
||||||
|
'3. If your issue is different or adds new aspects, please clarify how it differs',
|
||||||
|
'',
|
||||||
|
'This helps keep our issues organized and ensures similar issues are consolidated for better visibility.',
|
||||||
|
'',
|
||||||
|
'*This message was generated automatically by our duplicate detection system.*'
|
||||||
|
].join('\n');
|
||||||
|
|
||||||
|
try {
|
||||||
|
await github.rest.issues.createComment({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: context.payload.issue.number,
|
||||||
|
body: commentBody
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`Posted duplicate detection comment with ${duplicates.length} potential duplicates`);
|
||||||
|
|
||||||
|
// Add the potential-duplicate label
|
||||||
|
await github.rest.issues.addLabels({
|
||||||
|
owner: context.repo.owner,
|
||||||
|
repo: context.repo.repo,
|
||||||
|
issue_number: context.payload.issue.number,
|
||||||
|
labels: ['potential-duplicate']
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('Added potential-duplicate label to the issue');
|
||||||
|
} catch (error) {
|
||||||
|
core.error('Failed to post duplicate detection comment or add label:', error.message);
|
||||||
|
if (error.status === 403) {
|
||||||
|
core.error('Permission denied or rate limit exceeded');
|
||||||
|
}
|
||||||
|
// Don't throw - we've done the analysis, just couldn't post the result
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user