Files
WLED/tools/fork_stats_visualizer.py
2025-09-30 20:04:09 +00:00

467 lines
18 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Fork Statistics Visualizer
This script loads JSON data generated by fork_stats.py and displays
detailed statistics both as formatted lists and visual graphs.
Usage:
python3 tools/fork_stats_visualizer.py results.json
python3 tools/fork_stats_visualizer.py results.json --save-plots
python3 tools/fork_stats_visualizer.py results.json --output-dir ./plots
"""
import argparse
import json
import sys
from pathlib import Path
from typing import Dict, List
from datetime import datetime
# Try to import matplotlib, but allow running without it
try:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
HAS_MATPLOTLIB = True
except ImportError:
HAS_MATPLOTLIB = False
print("Warning: matplotlib not installed. Graphical visualizations will be disabled.")
print("Install with: pip install -r tools/fork_stats_visualizer_requirements.txt")
print()
def load_json_data(filepath: str) -> Dict:
"""Load the JSON data file."""
try:
with open(filepath, 'r') as f:
return json.load(f)
except FileNotFoundError:
print(f"Error: File '{filepath}' not found.")
sys.exit(1)
except json.JSONDecodeError as e:
print(f"Error: Invalid JSON in '{filepath}': {e}")
sys.exit(1)
def print_section_header(title: str):
"""Print a formatted section header."""
print("\n" + "=" * 80)
print(f" {title}")
print("=" * 80)
def print_repository_info(data: Dict):
"""Print repository information."""
print_section_header("REPOSITORY INFORMATION")
main_repo = data.get('main_repo', {})
print(f"\nRepository: {main_repo.get('full_name', 'Unknown')}")
print(f"Total Forks: {main_repo.get('forks_count', 0):,}")
print(f"Stars: {main_repo.get('stargazers_count', 0):,}")
print(f"Watchers: {main_repo.get('watchers_count', 0):,}")
print(f"\nAnalyzed Forks: {data.get('total_forks', 0)}")
if 'analysis_timestamp' in data:
timestamp = data['analysis_timestamp']
try:
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
print(f"Analysis Date: {dt.strftime('%Y-%m-%d %H:%M:%S UTC')}")
except:
print(f"Analysis Date: {timestamp}")
def print_age_distribution(stats: Dict):
"""Print fork age distribution statistics."""
print_section_header("FORK AGE DISTRIBUTION")
age_dist = stats.get('age_distribution', {})
total = stats.get('total_analyzed', 1)
age_labels = {
'1_month': 'Last updated ≤ 1 month',
'3_months': 'Last updated ≤ 3 months',
'6_months': 'Last updated ≤ 6 months',
'1_year': 'Last updated ≤ 1 year',
'2_years': 'Last updated ≤ 2 years',
'5_plus_years': 'Last updated > 5 years'
}
print("\nAge Category Count Percentage")
print("-" * 60)
for key, label in age_labels.items():
count = age_dist.get(key, 0)
pct = (count / total * 100) if total > 0 else 0
bar = '' * int(pct / 2) # Visual bar
print(f"{label:30} {count:5d} {pct:5.1f}% {bar}")
def print_activity_analysis(stats: Dict):
"""Print fork activity analysis."""
print_section_header("FORK ACTIVITY ANALYSIS")
total = stats.get('total_analyzed', 1)
metrics = [
('Forks with unique branches', stats.get('forks_with_unique_branches', 0),
stats.get('percentage_with_unique_branches', 0)),
('Forks with recent main branch', stats.get('forks_with_recent_main', 0),
stats.get('percentage_with_recent_main', 0)),
('Forks that contributed PRs', stats.get('forks_that_contributed_prs', 0),
stats.get('percentage_contributed_prs', 0)),
('Active forks (no PR contributions)', stats.get('active_non_contributing_forks', 0),
stats.get('percentage_active_non_contributing', 0)),
]
print("\nMetric Count Percentage")
print("-" * 65)
for label, count, pct in metrics:
bar = '' * int(pct / 2) # Visual bar
print(f"{label:35} {count:5d} {pct:5.1f}% {bar}")
def print_owner_commits(stats: Dict):
"""Print owner commit statistics."""
print_section_header("OWNER COMMIT ANALYSIS")
total_forks = stats.get('total_analyzed', 0)
forks_with_commits = stats.get('forks_with_owner_commits', 0)
total_commits = stats.get('total_owner_commits', 0)
avg_commits = stats.get('avg_owner_commits_per_fork', 0)
pct_with_commits = stats.get('percentage_with_owner_commits', 0)
print(f"\nForks with owner commits: {forks_with_commits:5d} / {total_forks:5d} ({pct_with_commits:.1f}%)")
print(f"Total commits by fork owners: {total_commits:5d}")
print(f"Average commits per fork: {avg_commits:5.1f}")
def print_top_forks(data: Dict, n: int = 20):
"""Print top forks by various metrics."""
analyzed_forks = data.get('analyzed_forks', [])
if not analyzed_forks:
print("\nNo detailed fork data available in JSON file.")
return
# Top forks by unique branches
print_section_header(f"TOP {n} FORKS BY UNIQUE BRANCHES")
forks_by_branches = sorted(analyzed_forks,
key=lambda x: len(x.get('unique_branches', [])),
reverse=True)[:n]
print(f"\n{'Rank':<6} {'Fork':<45} {'Unique Branches':<20} {'Owner Commits'}")
print("-" * 90)
for i, fork in enumerate(forks_by_branches, 1):
unique_count = len(fork.get('unique_branches', []))
owner_commits = fork.get('owner_commits', 0)
print(f"{i:<6} {fork.get('full_name', 'Unknown'):<45} {unique_count:<20} {owner_commits}")
# Top forks by owner commits
print_section_header(f"TOP {n} FORKS BY OWNER COMMITS")
forks_by_commits = sorted(analyzed_forks,
key=lambda x: x.get('owner_commits', 0),
reverse=True)[:n]
print(f"\n{'Rank':<6} {'Fork':<45} {'Owner Commits':<20} {'Active'}")
print("-" * 90)
for i, fork in enumerate(forks_by_commits, 1):
commits = fork.get('owner_commits', 0)
is_active = "Yes" if fork.get('is_active', False) else "No"
print(f"{i:<6} {fork.get('full_name', 'Unknown'):<45} {commits:<20} {is_active}")
# Active forks that haven't contributed
print_section_header("ACTIVE FORKS WITHOUT PR CONTRIBUTIONS")
active_no_pr = [f for f in analyzed_forks
if f.get('is_active', False) and not f.get('has_contributed_prs', False)][:n]
if active_no_pr:
print(f"\n{'Fork':<45} {'Recent Commits':<20} {'Owner Commits':<20} {'Days Behind'}")
print("-" * 110)
for fork in active_no_pr:
recent = fork.get('recent_commits', 0)
owner = fork.get('owner_commits', 0)
days = fork.get('behind_main_by_days', 0)
print(f"{fork.get('full_name', 'Unknown'):<45} {recent:<20} {owner:<20} {days}")
else:
print("\nNo active forks without PR contributions found.")
def create_age_distribution_chart(stats: Dict, save_path: str = None):
"""Create a pie chart for fork age distribution."""
if not HAS_MATPLOTLIB:
print("Skipping age distribution chart (matplotlib not available)")
return
age_dist = stats.get('age_distribution', {})
labels = ['≤ 1 month', '≤ 3 months', '≤ 6 months', '≤ 1 year', '≤ 2 years', '> 5 years']
sizes = [
age_dist.get('1_month', 0),
age_dist.get('3_months', 0),
age_dist.get('6_months', 0),
age_dist.get('1_year', 0),
age_dist.get('2_years', 0),
age_dist.get('5_plus_years', 0)
]
colors = ['#2ecc71', '#27ae60', '#f39c12', '#e67e22', '#e74c3c', '#95a5a6']
fig, ax = plt.subplots(figsize=(10, 8))
wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%',
colors=colors, startangle=90)
ax.set_title('Fork Age Distribution (Last Update)', fontsize=16, fontweight='bold', pad=20)
# Make percentage text more readable
for autotext in autotexts:
autotext.set_color('white')
autotext.set_fontsize(10)
autotext.set_fontweight('bold')
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
print(f"Saved: {save_path}")
else:
plt.show()
plt.close()
def create_activity_bar_chart(stats: Dict, save_path: str = None):
"""Create a bar chart for fork activity metrics."""
if not HAS_MATPLOTLIB:
print("Skipping activity bar chart (matplotlib not available)")
return
metrics = [
'Unique\nBranches',
'Recent\nMain',
'Contributed\nPRs',
'Active\nNo PRs',
'Owner\nCommits'
]
values = [
stats.get('percentage_with_unique_branches', 0),
stats.get('percentage_with_recent_main', 0),
stats.get('percentage_contributed_prs', 0),
stats.get('percentage_active_non_contributing', 0),
stats.get('percentage_with_owner_commits', 0)
]
colors = ['#3498db', '#2ecc71', '#9b59b6', '#e67e22', '#e74c3c']
fig, ax = plt.subplots(figsize=(12, 7))
bars = ax.bar(metrics, values, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
ax.set_ylabel('Percentage of Forks (%)', fontsize=12, fontweight='bold')
ax.set_title('Fork Activity Metrics', fontsize=16, fontweight='bold', pad=20)
ax.set_ylim(0, 100)
ax.grid(axis='y', alpha=0.3, linestyle='--')
# Add value labels on bars
for bar in bars:
height = bar.get_height()
ax.text(bar.get_x() + bar.get_width()/2., height + 1,
f'{height:.1f}%', ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
print(f"Saved: {save_path}")
else:
plt.show()
plt.close()
def create_owner_commits_distribution(data: Dict, save_path: str = None):
"""Create a histogram of owner commits distribution."""
if not HAS_MATPLOTLIB:
print("Skipping owner commits distribution chart (matplotlib not available)")
return
analyzed_forks = data.get('analyzed_forks', [])
if not analyzed_forks:
print("No detailed fork data for commits distribution chart.")
return
commits = [fork.get('owner_commits', 0) for fork in analyzed_forks if fork.get('owner_commits', 0) > 0]
if not commits:
print("No owner commits data available.")
return
fig, ax = plt.subplots(figsize=(12, 7))
# Create histogram with bins
n, bins, patches = ax.hist(commits, bins=20, color='#3498db', alpha=0.7, edgecolor='black')
# Color code the bins
cm = plt.cm.RdYlGn_r
bin_centers = 0.5 * (bins[:-1] + bins[1:])
col = bin_centers - min(bin_centers)
col /= max(col)
for c, p in zip(col, patches):
plt.setp(p, 'facecolor', cm(c))
ax.set_xlabel('Number of Owner Commits', fontsize=12, fontweight='bold')
ax.set_ylabel('Number of Forks', fontsize=12, fontweight='bold')
ax.set_title('Distribution of Owner Commits Across Forks', fontsize=16, fontweight='bold', pad=20)
ax.grid(axis='y', alpha=0.3, linestyle='--')
# Add statistics text
stats_text = f'Total Forks: {len(commits)}\nMean: {sum(commits)/len(commits):.1f}\nMax: {max(commits)}'
ax.text(0.95, 0.95, stats_text, transform=ax.transAxes,
verticalalignment='top', horizontalalignment='right',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
fontsize=10, fontweight='bold')
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
print(f"Saved: {save_path}")
else:
plt.show()
plt.close()
def create_combined_dashboard(stats: Dict, data: Dict, save_path: str = None):
"""Create a combined dashboard with multiple charts."""
if not HAS_MATPLOTLIB:
print("Skipping combined dashboard (matplotlib not available)")
return
fig = plt.figure(figsize=(16, 10))
# Age distribution pie chart
ax1 = plt.subplot(2, 2, 1)
age_dist = stats.get('age_distribution', {})
labels = ['≤1mo', '≤3mo', '≤6mo', '≤1yr', '≤2yr', '>5yr']
sizes = [
age_dist.get('1_month', 0),
age_dist.get('3_months', 0),
age_dist.get('6_months', 0),
age_dist.get('1_year', 0),
age_dist.get('2_years', 0),
age_dist.get('5_plus_years', 0)
]
colors = ['#2ecc71', '#27ae60', '#f39c12', '#e67e22', '#e74c3c', '#95a5a6']
ax1.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors, startangle=90)
ax1.set_title('Fork Age Distribution', fontweight='bold')
# Activity metrics bar chart
ax2 = plt.subplot(2, 2, 2)
metrics = ['Unique\nBranches', 'Recent\nMain', 'PRs', 'Active\nNo PRs', 'Owner\nCommits']
values = [
stats.get('percentage_with_unique_branches', 0),
stats.get('percentage_with_recent_main', 0),
stats.get('percentage_contributed_prs', 0),
stats.get('percentage_active_non_contributing', 0),
stats.get('percentage_with_owner_commits', 0)
]
colors_bar = ['#3498db', '#2ecc71', '#9b59b6', '#e67e22', '#e74c3c']
bars = ax2.bar(metrics, values, color=colors_bar, alpha=0.8)
ax2.set_ylabel('Percentage (%)')
ax2.set_title('Activity Metrics', fontweight='bold')
ax2.set_ylim(0, 100)
ax2.grid(axis='y', alpha=0.3)
# Owner commits histogram
ax3 = plt.subplot(2, 2, 3)
analyzed_forks = data.get('analyzed_forks', [])
commits = [fork.get('owner_commits', 0) for fork in analyzed_forks if fork.get('owner_commits', 0) > 0]
if commits:
ax3.hist(commits, bins=15, color='#3498db', alpha=0.7, edgecolor='black')
ax3.set_xlabel('Owner Commits')
ax3.set_ylabel('Frequency')
ax3.set_title('Owner Commits Distribution', fontweight='bold')
ax3.grid(axis='y', alpha=0.3)
# Summary statistics
ax4 = plt.subplot(2, 2, 4)
ax4.axis('off')
main_repo = data.get('main_repo', {})
summary_text = f"""
REPOSITORY STATISTICS
{'='*35}
Repository: {main_repo.get('full_name', 'Unknown')}
Total Forks: {main_repo.get('forks_count', 0):,}
Analyzed: {stats.get('total_analyzed', 0)}
KEY METRICS:
• Unique Branches: {stats.get('forks_with_unique_branches', 0)} ({stats.get('percentage_with_unique_branches', 0):.1f}%)
• PR Contributors: {stats.get('forks_that_contributed_prs', 0)} ({stats.get('percentage_contributed_prs', 0):.1f}%)
• Owner Commits: {stats.get('total_owner_commits', 0):,}
• Avg Commits/Fork: {stats.get('avg_owner_commits_per_fork', 0):.1f}
INSIGHTS:
• Recent Forks: {age_dist.get('1_month', 0) + age_dist.get('3_months', 0)}
• Very Old (>5yr): {age_dist.get('5_plus_years', 0)}
• Active No PRs: {stats.get('active_non_contributing_forks', 0)}
"""
ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
verticalalignment='top', fontsize=11, fontfamily='monospace',
bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.3))
fig.suptitle('Fork Statistics Dashboard', fontsize=18, fontweight='bold', y=0.98)
plt.tight_layout(rect=[0, 0.03, 1, 0.96])
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
print(f"Saved: {save_path}")
else:
plt.show()
plt.close()
def main():
parser = argparse.ArgumentParser(description='Visualize fork statistics data')
parser.add_argument('json_file', help='Path to the JSON file with fork statistics')
parser.add_argument('--save-plots', action='store_true', help='Save plots to files instead of displaying')
parser.add_argument('--output-dir', default='./fork_plots', help='Directory to save plots (default: ./fork_plots)')
parser.add_argument('--top-n', type=int, default=20, help='Number of top forks to display (default: 20)')
parser.add_argument('--no-graphs', action='store_true', help='Skip graph generation, only show text statistics')
args = parser.parse_args()
# Load data
data = load_json_data(args.json_file)
stats = data.get('statistics', {})
# Print text statistics
print_repository_info(data)
print_age_distribution(stats)
print_activity_analysis(stats)
print_owner_commits(stats)
print_top_forks(data, args.top_n)
# Generate graphs if not disabled
if not args.no_graphs:
print_section_header("GENERATING VISUAL CHARTS")
if args.save_plots:
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
print(f"\nSaving plots to: {output_dir}")
create_age_distribution_chart(stats, output_dir / "age_distribution.png")
create_activity_bar_chart(stats, output_dir / "activity_metrics.png")
create_owner_commits_distribution(data, output_dir / "owner_commits_distribution.png")
create_combined_dashboard(stats, data, output_dir / "dashboard.png")
print(f"\nAll plots saved to: {output_dir}")
else:
print("\nDisplaying interactive plots...")
print("Close each plot window to see the next one.")
create_age_distribution_chart(stats)
create_activity_bar_chart(stats)
create_owner_commits_distribution(data)
create_combined_dashboard(stats, data)
print_section_header("VISUALIZATION COMPLETE")
print(f"\nData source: {args.json_file}")
print(f"Total forks analyzed: {stats.get('total_analyzed', 0)}")
if __name__ == '__main__':
main()