mirror of
https://github.com/wled/WLED.git
synced 2025-11-21 16:57:32 +00:00
Add fork statistics visualizer tool with graphs and detailed reports
Co-authored-by: DedeHai <6280424+DedeHai@users.noreply.github.com>
This commit is contained in:
466
tools/fork_stats_visualizer.py
Executable file
466
tools/fork_stats_visualizer.py
Executable file
@@ -0,0 +1,466 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fork Statistics Visualizer
|
||||
|
||||
This script loads JSON data generated by fork_stats.py and displays
|
||||
detailed statistics both as formatted lists and visual graphs.
|
||||
|
||||
Usage:
|
||||
python3 tools/fork_stats_visualizer.py results.json
|
||||
python3 tools/fork_stats_visualizer.py results.json --save-plots
|
||||
python3 tools/fork_stats_visualizer.py results.json --output-dir ./plots
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
from datetime import datetime
|
||||
|
||||
# Try to import matplotlib, but allow running without it
|
||||
try:
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.patches as mpatches
|
||||
HAS_MATPLOTLIB = True
|
||||
except ImportError:
|
||||
HAS_MATPLOTLIB = False
|
||||
print("Warning: matplotlib not installed. Graphical visualizations will be disabled.")
|
||||
print("Install with: pip install -r tools/fork_stats_visualizer_requirements.txt")
|
||||
print()
|
||||
|
||||
def load_json_data(filepath: str) -> Dict:
|
||||
"""Load the JSON data file."""
|
||||
try:
|
||||
with open(filepath, 'r') as f:
|
||||
return json.load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"Error: File '{filepath}' not found.")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in '{filepath}': {e}")
|
||||
sys.exit(1)
|
||||
|
||||
def print_section_header(title: str):
|
||||
"""Print a formatted section header."""
|
||||
print("\n" + "=" * 80)
|
||||
print(f" {title}")
|
||||
print("=" * 80)
|
||||
|
||||
def print_repository_info(data: Dict):
|
||||
"""Print repository information."""
|
||||
print_section_header("REPOSITORY INFORMATION")
|
||||
|
||||
main_repo = data.get('main_repo', {})
|
||||
print(f"\nRepository: {main_repo.get('full_name', 'Unknown')}")
|
||||
print(f"Total Forks: {main_repo.get('forks_count', 0):,}")
|
||||
print(f"Stars: {main_repo.get('stargazers_count', 0):,}")
|
||||
print(f"Watchers: {main_repo.get('watchers_count', 0):,}")
|
||||
print(f"\nAnalyzed Forks: {data.get('total_forks', 0)}")
|
||||
|
||||
if 'analysis_timestamp' in data:
|
||||
timestamp = data['analysis_timestamp']
|
||||
try:
|
||||
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
||||
print(f"Analysis Date: {dt.strftime('%Y-%m-%d %H:%M:%S UTC')}")
|
||||
except:
|
||||
print(f"Analysis Date: {timestamp}")
|
||||
|
||||
def print_age_distribution(stats: Dict):
|
||||
"""Print fork age distribution statistics."""
|
||||
print_section_header("FORK AGE DISTRIBUTION")
|
||||
|
||||
age_dist = stats.get('age_distribution', {})
|
||||
total = stats.get('total_analyzed', 1)
|
||||
|
||||
age_labels = {
|
||||
'1_month': 'Last updated ≤ 1 month',
|
||||
'3_months': 'Last updated ≤ 3 months',
|
||||
'6_months': 'Last updated ≤ 6 months',
|
||||
'1_year': 'Last updated ≤ 1 year',
|
||||
'2_years': 'Last updated ≤ 2 years',
|
||||
'5_plus_years': 'Last updated > 5 years'
|
||||
}
|
||||
|
||||
print("\nAge Category Count Percentage")
|
||||
print("-" * 60)
|
||||
for key, label in age_labels.items():
|
||||
count = age_dist.get(key, 0)
|
||||
pct = (count / total * 100) if total > 0 else 0
|
||||
bar = '█' * int(pct / 2) # Visual bar
|
||||
print(f"{label:30} {count:5d} {pct:5.1f}% {bar}")
|
||||
|
||||
def print_activity_analysis(stats: Dict):
|
||||
"""Print fork activity analysis."""
|
||||
print_section_header("FORK ACTIVITY ANALYSIS")
|
||||
|
||||
total = stats.get('total_analyzed', 1)
|
||||
|
||||
metrics = [
|
||||
('Forks with unique branches', stats.get('forks_with_unique_branches', 0),
|
||||
stats.get('percentage_with_unique_branches', 0)),
|
||||
('Forks with recent main branch', stats.get('forks_with_recent_main', 0),
|
||||
stats.get('percentage_with_recent_main', 0)),
|
||||
('Forks that contributed PRs', stats.get('forks_that_contributed_prs', 0),
|
||||
stats.get('percentage_contributed_prs', 0)),
|
||||
('Active forks (no PR contributions)', stats.get('active_non_contributing_forks', 0),
|
||||
stats.get('percentage_active_non_contributing', 0)),
|
||||
]
|
||||
|
||||
print("\nMetric Count Percentage")
|
||||
print("-" * 65)
|
||||
for label, count, pct in metrics:
|
||||
bar = '█' * int(pct / 2) # Visual bar
|
||||
print(f"{label:35} {count:5d} {pct:5.1f}% {bar}")
|
||||
|
||||
def print_owner_commits(stats: Dict):
|
||||
"""Print owner commit statistics."""
|
||||
print_section_header("OWNER COMMIT ANALYSIS")
|
||||
|
||||
total_forks = stats.get('total_analyzed', 0)
|
||||
forks_with_commits = stats.get('forks_with_owner_commits', 0)
|
||||
total_commits = stats.get('total_owner_commits', 0)
|
||||
avg_commits = stats.get('avg_owner_commits_per_fork', 0)
|
||||
pct_with_commits = stats.get('percentage_with_owner_commits', 0)
|
||||
|
||||
print(f"\nForks with owner commits: {forks_with_commits:5d} / {total_forks:5d} ({pct_with_commits:.1f}%)")
|
||||
print(f"Total commits by fork owners: {total_commits:5d}")
|
||||
print(f"Average commits per fork: {avg_commits:5.1f}")
|
||||
|
||||
def print_top_forks(data: Dict, n: int = 20):
|
||||
"""Print top forks by various metrics."""
|
||||
analyzed_forks = data.get('analyzed_forks', [])
|
||||
|
||||
if not analyzed_forks:
|
||||
print("\nNo detailed fork data available in JSON file.")
|
||||
return
|
||||
|
||||
# Top forks by unique branches
|
||||
print_section_header(f"TOP {n} FORKS BY UNIQUE BRANCHES")
|
||||
forks_by_branches = sorted(analyzed_forks,
|
||||
key=lambda x: len(x.get('unique_branches', [])),
|
||||
reverse=True)[:n]
|
||||
|
||||
print(f"\n{'Rank':<6} {'Fork':<45} {'Unique Branches':<20} {'Owner Commits'}")
|
||||
print("-" * 90)
|
||||
for i, fork in enumerate(forks_by_branches, 1):
|
||||
unique_count = len(fork.get('unique_branches', []))
|
||||
owner_commits = fork.get('owner_commits', 0)
|
||||
print(f"{i:<6} {fork.get('full_name', 'Unknown'):<45} {unique_count:<20} {owner_commits}")
|
||||
|
||||
# Top forks by owner commits
|
||||
print_section_header(f"TOP {n} FORKS BY OWNER COMMITS")
|
||||
forks_by_commits = sorted(analyzed_forks,
|
||||
key=lambda x: x.get('owner_commits', 0),
|
||||
reverse=True)[:n]
|
||||
|
||||
print(f"\n{'Rank':<6} {'Fork':<45} {'Owner Commits':<20} {'Active'}")
|
||||
print("-" * 90)
|
||||
for i, fork in enumerate(forks_by_commits, 1):
|
||||
commits = fork.get('owner_commits', 0)
|
||||
is_active = "Yes" if fork.get('is_active', False) else "No"
|
||||
print(f"{i:<6} {fork.get('full_name', 'Unknown'):<45} {commits:<20} {is_active}")
|
||||
|
||||
# Active forks that haven't contributed
|
||||
print_section_header("ACTIVE FORKS WITHOUT PR CONTRIBUTIONS")
|
||||
active_no_pr = [f for f in analyzed_forks
|
||||
if f.get('is_active', False) and not f.get('has_contributed_prs', False)][:n]
|
||||
|
||||
if active_no_pr:
|
||||
print(f"\n{'Fork':<45} {'Recent Commits':<20} {'Owner Commits':<20} {'Days Behind'}")
|
||||
print("-" * 110)
|
||||
for fork in active_no_pr:
|
||||
recent = fork.get('recent_commits', 0)
|
||||
owner = fork.get('owner_commits', 0)
|
||||
days = fork.get('behind_main_by_days', 0)
|
||||
print(f"{fork.get('full_name', 'Unknown'):<45} {recent:<20} {owner:<20} {days}")
|
||||
else:
|
||||
print("\nNo active forks without PR contributions found.")
|
||||
|
||||
def create_age_distribution_chart(stats: Dict, save_path: str = None):
|
||||
"""Create a pie chart for fork age distribution."""
|
||||
if not HAS_MATPLOTLIB:
|
||||
print("Skipping age distribution chart (matplotlib not available)")
|
||||
return
|
||||
|
||||
age_dist = stats.get('age_distribution', {})
|
||||
|
||||
labels = ['≤ 1 month', '≤ 3 months', '≤ 6 months', '≤ 1 year', '≤ 2 years', '> 5 years']
|
||||
sizes = [
|
||||
age_dist.get('1_month', 0),
|
||||
age_dist.get('3_months', 0),
|
||||
age_dist.get('6_months', 0),
|
||||
age_dist.get('1_year', 0),
|
||||
age_dist.get('2_years', 0),
|
||||
age_dist.get('5_plus_years', 0)
|
||||
]
|
||||
|
||||
colors = ['#2ecc71', '#27ae60', '#f39c12', '#e67e22', '#e74c3c', '#95a5a6']
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 8))
|
||||
wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%',
|
||||
colors=colors, startangle=90)
|
||||
|
||||
ax.set_title('Fork Age Distribution (Last Update)', fontsize=16, fontweight='bold', pad=20)
|
||||
|
||||
# Make percentage text more readable
|
||||
for autotext in autotexts:
|
||||
autotext.set_color('white')
|
||||
autotext.set_fontsize(10)
|
||||
autotext.set_fontweight('bold')
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Saved: {save_path}")
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
plt.close()
|
||||
|
||||
def create_activity_bar_chart(stats: Dict, save_path: str = None):
|
||||
"""Create a bar chart for fork activity metrics."""
|
||||
if not HAS_MATPLOTLIB:
|
||||
print("Skipping activity bar chart (matplotlib not available)")
|
||||
return
|
||||
|
||||
metrics = [
|
||||
'Unique\nBranches',
|
||||
'Recent\nMain',
|
||||
'Contributed\nPRs',
|
||||
'Active\nNo PRs',
|
||||
'Owner\nCommits'
|
||||
]
|
||||
|
||||
values = [
|
||||
stats.get('percentage_with_unique_branches', 0),
|
||||
stats.get('percentage_with_recent_main', 0),
|
||||
stats.get('percentage_contributed_prs', 0),
|
||||
stats.get('percentage_active_non_contributing', 0),
|
||||
stats.get('percentage_with_owner_commits', 0)
|
||||
]
|
||||
|
||||
colors = ['#3498db', '#2ecc71', '#9b59b6', '#e67e22', '#e74c3c']
|
||||
|
||||
fig, ax = plt.subplots(figsize=(12, 7))
|
||||
bars = ax.bar(metrics, values, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
|
||||
|
||||
ax.set_ylabel('Percentage of Forks (%)', fontsize=12, fontweight='bold')
|
||||
ax.set_title('Fork Activity Metrics', fontsize=16, fontweight='bold', pad=20)
|
||||
ax.set_ylim(0, 100)
|
||||
ax.grid(axis='y', alpha=0.3, linestyle='--')
|
||||
|
||||
# Add value labels on bars
|
||||
for bar in bars:
|
||||
height = bar.get_height()
|
||||
ax.text(bar.get_x() + bar.get_width()/2., height + 1,
|
||||
f'{height:.1f}%', ha='center', va='bottom', fontweight='bold')
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Saved: {save_path}")
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
plt.close()
|
||||
|
||||
def create_owner_commits_distribution(data: Dict, save_path: str = None):
|
||||
"""Create a histogram of owner commits distribution."""
|
||||
if not HAS_MATPLOTLIB:
|
||||
print("Skipping owner commits distribution chart (matplotlib not available)")
|
||||
return
|
||||
|
||||
analyzed_forks = data.get('analyzed_forks', [])
|
||||
|
||||
if not analyzed_forks:
|
||||
print("No detailed fork data for commits distribution chart.")
|
||||
return
|
||||
|
||||
commits = [fork.get('owner_commits', 0) for fork in analyzed_forks if fork.get('owner_commits', 0) > 0]
|
||||
|
||||
if not commits:
|
||||
print("No owner commits data available.")
|
||||
return
|
||||
|
||||
fig, ax = plt.subplots(figsize=(12, 7))
|
||||
|
||||
# Create histogram with bins
|
||||
n, bins, patches = ax.hist(commits, bins=20, color='#3498db', alpha=0.7, edgecolor='black')
|
||||
|
||||
# Color code the bins
|
||||
cm = plt.cm.RdYlGn_r
|
||||
bin_centers = 0.5 * (bins[:-1] + bins[1:])
|
||||
col = bin_centers - min(bin_centers)
|
||||
col /= max(col)
|
||||
|
||||
for c, p in zip(col, patches):
|
||||
plt.setp(p, 'facecolor', cm(c))
|
||||
|
||||
ax.set_xlabel('Number of Owner Commits', fontsize=12, fontweight='bold')
|
||||
ax.set_ylabel('Number of Forks', fontsize=12, fontweight='bold')
|
||||
ax.set_title('Distribution of Owner Commits Across Forks', fontsize=16, fontweight='bold', pad=20)
|
||||
ax.grid(axis='y', alpha=0.3, linestyle='--')
|
||||
|
||||
# Add statistics text
|
||||
stats_text = f'Total Forks: {len(commits)}\nMean: {sum(commits)/len(commits):.1f}\nMax: {max(commits)}'
|
||||
ax.text(0.95, 0.95, stats_text, transform=ax.transAxes,
|
||||
verticalalignment='top', horizontalalignment='right',
|
||||
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
|
||||
fontsize=10, fontweight='bold')
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Saved: {save_path}")
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
plt.close()
|
||||
|
||||
def create_combined_dashboard(stats: Dict, data: Dict, save_path: str = None):
|
||||
"""Create a combined dashboard with multiple charts."""
|
||||
if not HAS_MATPLOTLIB:
|
||||
print("Skipping combined dashboard (matplotlib not available)")
|
||||
return
|
||||
|
||||
fig = plt.figure(figsize=(16, 10))
|
||||
|
||||
# Age distribution pie chart
|
||||
ax1 = plt.subplot(2, 2, 1)
|
||||
age_dist = stats.get('age_distribution', {})
|
||||
labels = ['≤1mo', '≤3mo', '≤6mo', '≤1yr', '≤2yr', '>5yr']
|
||||
sizes = [
|
||||
age_dist.get('1_month', 0),
|
||||
age_dist.get('3_months', 0),
|
||||
age_dist.get('6_months', 0),
|
||||
age_dist.get('1_year', 0),
|
||||
age_dist.get('2_years', 0),
|
||||
age_dist.get('5_plus_years', 0)
|
||||
]
|
||||
colors = ['#2ecc71', '#27ae60', '#f39c12', '#e67e22', '#e74c3c', '#95a5a6']
|
||||
ax1.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors, startangle=90)
|
||||
ax1.set_title('Fork Age Distribution', fontweight='bold')
|
||||
|
||||
# Activity metrics bar chart
|
||||
ax2 = plt.subplot(2, 2, 2)
|
||||
metrics = ['Unique\nBranches', 'Recent\nMain', 'PRs', 'Active\nNo PRs', 'Owner\nCommits']
|
||||
values = [
|
||||
stats.get('percentage_with_unique_branches', 0),
|
||||
stats.get('percentage_with_recent_main', 0),
|
||||
stats.get('percentage_contributed_prs', 0),
|
||||
stats.get('percentage_active_non_contributing', 0),
|
||||
stats.get('percentage_with_owner_commits', 0)
|
||||
]
|
||||
colors_bar = ['#3498db', '#2ecc71', '#9b59b6', '#e67e22', '#e74c3c']
|
||||
bars = ax2.bar(metrics, values, color=colors_bar, alpha=0.8)
|
||||
ax2.set_ylabel('Percentage (%)')
|
||||
ax2.set_title('Activity Metrics', fontweight='bold')
|
||||
ax2.set_ylim(0, 100)
|
||||
ax2.grid(axis='y', alpha=0.3)
|
||||
|
||||
# Owner commits histogram
|
||||
ax3 = plt.subplot(2, 2, 3)
|
||||
analyzed_forks = data.get('analyzed_forks', [])
|
||||
commits = [fork.get('owner_commits', 0) for fork in analyzed_forks if fork.get('owner_commits', 0) > 0]
|
||||
if commits:
|
||||
ax3.hist(commits, bins=15, color='#3498db', alpha=0.7, edgecolor='black')
|
||||
ax3.set_xlabel('Owner Commits')
|
||||
ax3.set_ylabel('Frequency')
|
||||
ax3.set_title('Owner Commits Distribution', fontweight='bold')
|
||||
ax3.grid(axis='y', alpha=0.3)
|
||||
|
||||
# Summary statistics
|
||||
ax4 = plt.subplot(2, 2, 4)
|
||||
ax4.axis('off')
|
||||
|
||||
main_repo = data.get('main_repo', {})
|
||||
summary_text = f"""
|
||||
REPOSITORY STATISTICS
|
||||
{'='*35}
|
||||
|
||||
Repository: {main_repo.get('full_name', 'Unknown')}
|
||||
Total Forks: {main_repo.get('forks_count', 0):,}
|
||||
Analyzed: {stats.get('total_analyzed', 0)}
|
||||
|
||||
KEY METRICS:
|
||||
• Unique Branches: {stats.get('forks_with_unique_branches', 0)} ({stats.get('percentage_with_unique_branches', 0):.1f}%)
|
||||
• PR Contributors: {stats.get('forks_that_contributed_prs', 0)} ({stats.get('percentage_contributed_prs', 0):.1f}%)
|
||||
• Owner Commits: {stats.get('total_owner_commits', 0):,}
|
||||
• Avg Commits/Fork: {stats.get('avg_owner_commits_per_fork', 0):.1f}
|
||||
|
||||
INSIGHTS:
|
||||
• Recent Forks: {age_dist.get('1_month', 0) + age_dist.get('3_months', 0)}
|
||||
• Very Old (>5yr): {age_dist.get('5_plus_years', 0)}
|
||||
• Active No PRs: {stats.get('active_non_contributing_forks', 0)}
|
||||
"""
|
||||
|
||||
ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
|
||||
verticalalignment='top', fontsize=11, fontfamily='monospace',
|
||||
bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.3))
|
||||
|
||||
fig.suptitle('Fork Statistics Dashboard', fontsize=18, fontweight='bold', y=0.98)
|
||||
plt.tight_layout(rect=[0, 0.03, 1, 0.96])
|
||||
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
||||
print(f"Saved: {save_path}")
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
plt.close()
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Visualize fork statistics data')
|
||||
parser.add_argument('json_file', help='Path to the JSON file with fork statistics')
|
||||
parser.add_argument('--save-plots', action='store_true', help='Save plots to files instead of displaying')
|
||||
parser.add_argument('--output-dir', default='./fork_plots', help='Directory to save plots (default: ./fork_plots)')
|
||||
parser.add_argument('--top-n', type=int, default=20, help='Number of top forks to display (default: 20)')
|
||||
parser.add_argument('--no-graphs', action='store_true', help='Skip graph generation, only show text statistics')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load data
|
||||
data = load_json_data(args.json_file)
|
||||
stats = data.get('statistics', {})
|
||||
|
||||
# Print text statistics
|
||||
print_repository_info(data)
|
||||
print_age_distribution(stats)
|
||||
print_activity_analysis(stats)
|
||||
print_owner_commits(stats)
|
||||
print_top_forks(data, args.top_n)
|
||||
|
||||
# Generate graphs if not disabled
|
||||
if not args.no_graphs:
|
||||
print_section_header("GENERATING VISUAL CHARTS")
|
||||
|
||||
if args.save_plots:
|
||||
output_dir = Path(args.output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
print(f"\nSaving plots to: {output_dir}")
|
||||
|
||||
create_age_distribution_chart(stats, output_dir / "age_distribution.png")
|
||||
create_activity_bar_chart(stats, output_dir / "activity_metrics.png")
|
||||
create_owner_commits_distribution(data, output_dir / "owner_commits_distribution.png")
|
||||
create_combined_dashboard(stats, data, output_dir / "dashboard.png")
|
||||
|
||||
print(f"\nAll plots saved to: {output_dir}")
|
||||
else:
|
||||
print("\nDisplaying interactive plots...")
|
||||
print("Close each plot window to see the next one.")
|
||||
|
||||
create_age_distribution_chart(stats)
|
||||
create_activity_bar_chart(stats)
|
||||
create_owner_commits_distribution(data)
|
||||
create_combined_dashboard(stats, data)
|
||||
|
||||
print_section_header("VISUALIZATION COMPLETE")
|
||||
print(f"\nData source: {args.json_file}")
|
||||
print(f"Total forks analyzed: {stats.get('total_analyzed', 0)}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user