mirror of
https://github.com/wled/WLED.git
synced 2025-11-17 15:01:01 +00:00
467 lines
18 KiB
Python
Executable File
467 lines
18 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Fork Statistics Visualizer
|
|
|
|
This script loads JSON data generated by fork_stats.py and displays
|
|
detailed statistics both as formatted lists and visual graphs.
|
|
|
|
Usage:
|
|
python3 tools/fork_stats_visualizer.py results.json
|
|
python3 tools/fork_stats_visualizer.py results.json --save-plots
|
|
python3 tools/fork_stats_visualizer.py results.json --output-dir ./plots
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, List
|
|
from datetime import datetime
|
|
|
|
# Try to import matplotlib, but allow running without it
|
|
try:
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.patches as mpatches
|
|
HAS_MATPLOTLIB = True
|
|
except ImportError:
|
|
HAS_MATPLOTLIB = False
|
|
print("Warning: matplotlib not installed. Graphical visualizations will be disabled.")
|
|
print("Install with: pip install -r tools/fork_stats_visualizer_requirements.txt")
|
|
print()
|
|
|
|
def load_json_data(filepath: str) -> Dict:
|
|
"""Load the JSON data file."""
|
|
try:
|
|
with open(filepath, 'r') as f:
|
|
return json.load(f)
|
|
except FileNotFoundError:
|
|
print(f"Error: File '{filepath}' not found.")
|
|
sys.exit(1)
|
|
except json.JSONDecodeError as e:
|
|
print(f"Error: Invalid JSON in '{filepath}': {e}")
|
|
sys.exit(1)
|
|
|
|
def print_section_header(title: str):
|
|
"""Print a formatted section header."""
|
|
print("\n" + "=" * 80)
|
|
print(f" {title}")
|
|
print("=" * 80)
|
|
|
|
def print_repository_info(data: Dict):
|
|
"""Print repository information."""
|
|
print_section_header("REPOSITORY INFORMATION")
|
|
|
|
main_repo = data.get('main_repo', {})
|
|
print(f"\nRepository: {main_repo.get('full_name', 'Unknown')}")
|
|
print(f"Total Forks: {main_repo.get('forks_count', 0):,}")
|
|
print(f"Stars: {main_repo.get('stargazers_count', 0):,}")
|
|
print(f"Watchers: {main_repo.get('watchers_count', 0):,}")
|
|
print(f"\nAnalyzed Forks: {data.get('total_forks', 0)}")
|
|
|
|
if 'analysis_timestamp' in data:
|
|
timestamp = data['analysis_timestamp']
|
|
try:
|
|
dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
|
|
print(f"Analysis Date: {dt.strftime('%Y-%m-%d %H:%M:%S UTC')}")
|
|
except:
|
|
print(f"Analysis Date: {timestamp}")
|
|
|
|
def print_age_distribution(stats: Dict):
|
|
"""Print fork age distribution statistics."""
|
|
print_section_header("FORK AGE DISTRIBUTION")
|
|
|
|
age_dist = stats.get('age_distribution', {})
|
|
total = stats.get('total_analyzed', 1)
|
|
|
|
age_labels = {
|
|
'1_month': 'Last updated ≤ 1 month',
|
|
'3_months': 'Last updated ≤ 3 months',
|
|
'6_months': 'Last updated ≤ 6 months',
|
|
'1_year': 'Last updated ≤ 1 year',
|
|
'2_years': 'Last updated ≤ 2 years',
|
|
'5_plus_years': 'Last updated > 5 years'
|
|
}
|
|
|
|
print("\nAge Category Count Percentage")
|
|
print("-" * 60)
|
|
for key, label in age_labels.items():
|
|
count = age_dist.get(key, 0)
|
|
pct = (count / total * 100) if total > 0 else 0
|
|
bar = '█' * int(pct / 2) # Visual bar
|
|
print(f"{label:30} {count:5d} {pct:5.1f}% {bar}")
|
|
|
|
def print_activity_analysis(stats: Dict):
|
|
"""Print fork activity analysis."""
|
|
print_section_header("FORK ACTIVITY ANALYSIS")
|
|
|
|
total = stats.get('total_analyzed', 1)
|
|
|
|
metrics = [
|
|
('Forks with unique branches', stats.get('forks_with_unique_branches', 0),
|
|
stats.get('percentage_with_unique_branches', 0)),
|
|
('Forks with recent main branch', stats.get('forks_with_recent_main', 0),
|
|
stats.get('percentage_with_recent_main', 0)),
|
|
('Forks that contributed PRs', stats.get('forks_that_contributed_prs', 0),
|
|
stats.get('percentage_contributed_prs', 0)),
|
|
('Active forks (no PR contributions)', stats.get('active_non_contributing_forks', 0),
|
|
stats.get('percentage_active_non_contributing', 0)),
|
|
]
|
|
|
|
print("\nMetric Count Percentage")
|
|
print("-" * 65)
|
|
for label, count, pct in metrics:
|
|
bar = '█' * int(pct / 2) # Visual bar
|
|
print(f"{label:35} {count:5d} {pct:5.1f}% {bar}")
|
|
|
|
def print_owner_commits(stats: Dict):
|
|
"""Print owner commit statistics."""
|
|
print_section_header("OWNER COMMIT ANALYSIS")
|
|
|
|
total_forks = stats.get('total_analyzed', 0)
|
|
forks_with_commits = stats.get('forks_with_owner_commits', 0)
|
|
total_commits = stats.get('total_owner_commits', 0)
|
|
avg_commits = stats.get('avg_owner_commits_per_fork', 0)
|
|
pct_with_commits = stats.get('percentage_with_owner_commits', 0)
|
|
|
|
print(f"\nForks with owner commits: {forks_with_commits:5d} / {total_forks:5d} ({pct_with_commits:.1f}%)")
|
|
print(f"Total commits by fork owners: {total_commits:5d}")
|
|
print(f"Average commits per fork: {avg_commits:5.1f}")
|
|
|
|
def print_top_forks(data: Dict, n: int = 20):
|
|
"""Print top forks by various metrics."""
|
|
analyzed_forks = data.get('analyzed_forks', [])
|
|
|
|
if not analyzed_forks:
|
|
print("\nNo detailed fork data available in JSON file.")
|
|
return
|
|
|
|
# Top forks by unique branches
|
|
print_section_header(f"TOP {n} FORKS BY UNIQUE BRANCHES")
|
|
forks_by_branches = sorted(analyzed_forks,
|
|
key=lambda x: len(x.get('unique_branches', [])),
|
|
reverse=True)[:n]
|
|
|
|
print(f"\n{'Rank':<6} {'Fork':<45} {'Unique Branches':<20} {'Owner Commits'}")
|
|
print("-" * 90)
|
|
for i, fork in enumerate(forks_by_branches, 1):
|
|
unique_count = len(fork.get('unique_branches', []))
|
|
owner_commits = fork.get('owner_commits', 0)
|
|
print(f"{i:<6} {fork.get('full_name', 'Unknown'):<45} {unique_count:<20} {owner_commits}")
|
|
|
|
# Top forks by owner commits
|
|
print_section_header(f"TOP {n} FORKS BY OWNER COMMITS")
|
|
forks_by_commits = sorted(analyzed_forks,
|
|
key=lambda x: x.get('owner_commits', 0),
|
|
reverse=True)[:n]
|
|
|
|
print(f"\n{'Rank':<6} {'Fork':<45} {'Owner Commits':<20} {'Active'}")
|
|
print("-" * 90)
|
|
for i, fork in enumerate(forks_by_commits, 1):
|
|
commits = fork.get('owner_commits', 0)
|
|
is_active = "Yes" if fork.get('is_active', False) else "No"
|
|
print(f"{i:<6} {fork.get('full_name', 'Unknown'):<45} {commits:<20} {is_active}")
|
|
|
|
# Active forks that haven't contributed
|
|
print_section_header("ACTIVE FORKS WITHOUT PR CONTRIBUTIONS")
|
|
active_no_pr = [f for f in analyzed_forks
|
|
if f.get('is_active', False) and not f.get('has_contributed_prs', False)][:n]
|
|
|
|
if active_no_pr:
|
|
print(f"\n{'Fork':<45} {'Recent Commits':<20} {'Owner Commits':<20} {'Days Behind'}")
|
|
print("-" * 110)
|
|
for fork in active_no_pr:
|
|
recent = fork.get('recent_commits', 0)
|
|
owner = fork.get('owner_commits', 0)
|
|
days = fork.get('behind_main_by_days', 0)
|
|
print(f"{fork.get('full_name', 'Unknown'):<45} {recent:<20} {owner:<20} {days}")
|
|
else:
|
|
print("\nNo active forks without PR contributions found.")
|
|
|
|
def create_age_distribution_chart(stats: Dict, save_path: str = None):
|
|
"""Create a pie chart for fork age distribution."""
|
|
if not HAS_MATPLOTLIB:
|
|
print("Skipping age distribution chart (matplotlib not available)")
|
|
return
|
|
|
|
age_dist = stats.get('age_distribution', {})
|
|
|
|
labels = ['≤ 1 month', '≤ 3 months', '≤ 6 months', '≤ 1 year', '≤ 2 years', '> 5 years']
|
|
sizes = [
|
|
age_dist.get('1_month', 0),
|
|
age_dist.get('3_months', 0),
|
|
age_dist.get('6_months', 0),
|
|
age_dist.get('1_year', 0),
|
|
age_dist.get('2_years', 0),
|
|
age_dist.get('5_plus_years', 0)
|
|
]
|
|
|
|
colors = ['#2ecc71', '#27ae60', '#f39c12', '#e67e22', '#e74c3c', '#95a5a6']
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 8))
|
|
wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%',
|
|
colors=colors, startangle=90)
|
|
|
|
ax.set_title('Fork Age Distribution (Last Update)', fontsize=16, fontweight='bold', pad=20)
|
|
|
|
# Make percentage text more readable
|
|
for autotext in autotexts:
|
|
autotext.set_color('white')
|
|
autotext.set_fontsize(10)
|
|
autotext.set_fontweight('bold')
|
|
|
|
plt.tight_layout()
|
|
|
|
if save_path:
|
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
|
print(f"Saved: {save_path}")
|
|
else:
|
|
plt.show()
|
|
|
|
plt.close()
|
|
|
|
def create_activity_bar_chart(stats: Dict, save_path: str = None):
|
|
"""Create a bar chart for fork activity metrics."""
|
|
if not HAS_MATPLOTLIB:
|
|
print("Skipping activity bar chart (matplotlib not available)")
|
|
return
|
|
|
|
metrics = [
|
|
'Unique\nBranches',
|
|
'Recent\nMain',
|
|
'Contributed\nPRs',
|
|
'Active\nNo PRs',
|
|
'Owner\nCommits'
|
|
]
|
|
|
|
values = [
|
|
stats.get('percentage_with_unique_branches', 0),
|
|
stats.get('percentage_with_recent_main', 0),
|
|
stats.get('percentage_contributed_prs', 0),
|
|
stats.get('percentage_active_non_contributing', 0),
|
|
stats.get('percentage_with_owner_commits', 0)
|
|
]
|
|
|
|
colors = ['#3498db', '#2ecc71', '#9b59b6', '#e67e22', '#e74c3c']
|
|
|
|
fig, ax = plt.subplots(figsize=(12, 7))
|
|
bars = ax.bar(metrics, values, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5)
|
|
|
|
ax.set_ylabel('Percentage of Forks (%)', fontsize=12, fontweight='bold')
|
|
ax.set_title('Fork Activity Metrics', fontsize=16, fontweight='bold', pad=20)
|
|
ax.set_ylim(0, 100)
|
|
ax.grid(axis='y', alpha=0.3, linestyle='--')
|
|
|
|
# Add value labels on bars
|
|
for bar in bars:
|
|
height = bar.get_height()
|
|
ax.text(bar.get_x() + bar.get_width()/2., height + 1,
|
|
f'{height:.1f}%', ha='center', va='bottom', fontweight='bold')
|
|
|
|
plt.tight_layout()
|
|
|
|
if save_path:
|
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
|
print(f"Saved: {save_path}")
|
|
else:
|
|
plt.show()
|
|
|
|
plt.close()
|
|
|
|
def create_owner_commits_distribution(data: Dict, save_path: str = None):
|
|
"""Create a histogram of owner commits distribution."""
|
|
if not HAS_MATPLOTLIB:
|
|
print("Skipping owner commits distribution chart (matplotlib not available)")
|
|
return
|
|
|
|
analyzed_forks = data.get('analyzed_forks', [])
|
|
|
|
if not analyzed_forks:
|
|
print("No detailed fork data for commits distribution chart.")
|
|
return
|
|
|
|
commits = [fork.get('owner_commits', 0) for fork in analyzed_forks if fork.get('owner_commits', 0) > 0]
|
|
|
|
if not commits:
|
|
print("No owner commits data available.")
|
|
return
|
|
|
|
fig, ax = plt.subplots(figsize=(12, 7))
|
|
|
|
# Create histogram with bins
|
|
n, bins, patches = ax.hist(commits, bins=20, color='#3498db', alpha=0.7, edgecolor='black')
|
|
|
|
# Color code the bins
|
|
cm = plt.cm.RdYlGn_r
|
|
bin_centers = 0.5 * (bins[:-1] + bins[1:])
|
|
col = bin_centers - min(bin_centers)
|
|
col /= max(col)
|
|
|
|
for c, p in zip(col, patches):
|
|
plt.setp(p, 'facecolor', cm(c))
|
|
|
|
ax.set_xlabel('Number of Owner Commits', fontsize=12, fontweight='bold')
|
|
ax.set_ylabel('Number of Forks', fontsize=12, fontweight='bold')
|
|
ax.set_title('Distribution of Owner Commits Across Forks', fontsize=16, fontweight='bold', pad=20)
|
|
ax.grid(axis='y', alpha=0.3, linestyle='--')
|
|
|
|
# Add statistics text
|
|
stats_text = f'Total Forks: {len(commits)}\nMean: {sum(commits)/len(commits):.1f}\nMax: {max(commits)}'
|
|
ax.text(0.95, 0.95, stats_text, transform=ax.transAxes,
|
|
verticalalignment='top', horizontalalignment='right',
|
|
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
|
|
fontsize=10, fontweight='bold')
|
|
|
|
plt.tight_layout()
|
|
|
|
if save_path:
|
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
|
print(f"Saved: {save_path}")
|
|
else:
|
|
plt.show()
|
|
|
|
plt.close()
|
|
|
|
def create_combined_dashboard(stats: Dict, data: Dict, save_path: str = None):
|
|
"""Create a combined dashboard with multiple charts."""
|
|
if not HAS_MATPLOTLIB:
|
|
print("Skipping combined dashboard (matplotlib not available)")
|
|
return
|
|
|
|
fig = plt.figure(figsize=(16, 10))
|
|
|
|
# Age distribution pie chart
|
|
ax1 = plt.subplot(2, 2, 1)
|
|
age_dist = stats.get('age_distribution', {})
|
|
labels = ['≤1mo', '≤3mo', '≤6mo', '≤1yr', '≤2yr', '>5yr']
|
|
sizes = [
|
|
age_dist.get('1_month', 0),
|
|
age_dist.get('3_months', 0),
|
|
age_dist.get('6_months', 0),
|
|
age_dist.get('1_year', 0),
|
|
age_dist.get('2_years', 0),
|
|
age_dist.get('5_plus_years', 0)
|
|
]
|
|
colors = ['#2ecc71', '#27ae60', '#f39c12', '#e67e22', '#e74c3c', '#95a5a6']
|
|
ax1.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors, startangle=90)
|
|
ax1.set_title('Fork Age Distribution', fontweight='bold')
|
|
|
|
# Activity metrics bar chart
|
|
ax2 = plt.subplot(2, 2, 2)
|
|
metrics = ['Unique\nBranches', 'Recent\nMain', 'PRs', 'Active\nNo PRs', 'Owner\nCommits']
|
|
values = [
|
|
stats.get('percentage_with_unique_branches', 0),
|
|
stats.get('percentage_with_recent_main', 0),
|
|
stats.get('percentage_contributed_prs', 0),
|
|
stats.get('percentage_active_non_contributing', 0),
|
|
stats.get('percentage_with_owner_commits', 0)
|
|
]
|
|
colors_bar = ['#3498db', '#2ecc71', '#9b59b6', '#e67e22', '#e74c3c']
|
|
bars = ax2.bar(metrics, values, color=colors_bar, alpha=0.8)
|
|
ax2.set_ylabel('Percentage (%)')
|
|
ax2.set_title('Activity Metrics', fontweight='bold')
|
|
ax2.set_ylim(0, 100)
|
|
ax2.grid(axis='y', alpha=0.3)
|
|
|
|
# Owner commits histogram
|
|
ax3 = plt.subplot(2, 2, 3)
|
|
analyzed_forks = data.get('analyzed_forks', [])
|
|
commits = [fork.get('owner_commits', 0) for fork in analyzed_forks if fork.get('owner_commits', 0) > 0]
|
|
if commits:
|
|
ax3.hist(commits, bins=15, color='#3498db', alpha=0.7, edgecolor='black')
|
|
ax3.set_xlabel('Owner Commits')
|
|
ax3.set_ylabel('Frequency')
|
|
ax3.set_title('Owner Commits Distribution', fontweight='bold')
|
|
ax3.grid(axis='y', alpha=0.3)
|
|
|
|
# Summary statistics
|
|
ax4 = plt.subplot(2, 2, 4)
|
|
ax4.axis('off')
|
|
|
|
main_repo = data.get('main_repo', {})
|
|
summary_text = f"""
|
|
REPOSITORY STATISTICS
|
|
{'='*35}
|
|
|
|
Repository: {main_repo.get('full_name', 'Unknown')}
|
|
Total Forks: {main_repo.get('forks_count', 0):,}
|
|
Analyzed: {stats.get('total_analyzed', 0)}
|
|
|
|
KEY METRICS:
|
|
• Unique Branches: {stats.get('forks_with_unique_branches', 0)} ({stats.get('percentage_with_unique_branches', 0):.1f}%)
|
|
• PR Contributors: {stats.get('forks_that_contributed_prs', 0)} ({stats.get('percentage_contributed_prs', 0):.1f}%)
|
|
• Owner Commits: {stats.get('total_owner_commits', 0):,}
|
|
• Avg Commits/Fork: {stats.get('avg_owner_commits_per_fork', 0):.1f}
|
|
|
|
INSIGHTS:
|
|
• Recent Forks: {age_dist.get('1_month', 0) + age_dist.get('3_months', 0)}
|
|
• Very Old (>5yr): {age_dist.get('5_plus_years', 0)}
|
|
• Active No PRs: {stats.get('active_non_contributing_forks', 0)}
|
|
"""
|
|
|
|
ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes,
|
|
verticalalignment='top', fontsize=11, fontfamily='monospace',
|
|
bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.3))
|
|
|
|
fig.suptitle('Fork Statistics Dashboard', fontsize=18, fontweight='bold', y=0.98)
|
|
plt.tight_layout(rect=[0, 0.03, 1, 0.96])
|
|
|
|
if save_path:
|
|
plt.savefig(save_path, dpi=300, bbox_inches='tight')
|
|
print(f"Saved: {save_path}")
|
|
else:
|
|
plt.show()
|
|
|
|
plt.close()
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Visualize fork statistics data')
|
|
parser.add_argument('json_file', help='Path to the JSON file with fork statistics')
|
|
parser.add_argument('--save-plots', action='store_true', help='Save plots to files instead of displaying')
|
|
parser.add_argument('--output-dir', default='./fork_plots', help='Directory to save plots (default: ./fork_plots)')
|
|
parser.add_argument('--top-n', type=int, default=20, help='Number of top forks to display (default: 20)')
|
|
parser.add_argument('--no-graphs', action='store_true', help='Skip graph generation, only show text statistics')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Load data
|
|
data = load_json_data(args.json_file)
|
|
stats = data.get('statistics', {})
|
|
|
|
# Print text statistics
|
|
print_repository_info(data)
|
|
print_age_distribution(stats)
|
|
print_activity_analysis(stats)
|
|
print_owner_commits(stats)
|
|
print_top_forks(data, args.top_n)
|
|
|
|
# Generate graphs if not disabled
|
|
if not args.no_graphs:
|
|
print_section_header("GENERATING VISUAL CHARTS")
|
|
|
|
if args.save_plots:
|
|
output_dir = Path(args.output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
print(f"\nSaving plots to: {output_dir}")
|
|
|
|
create_age_distribution_chart(stats, output_dir / "age_distribution.png")
|
|
create_activity_bar_chart(stats, output_dir / "activity_metrics.png")
|
|
create_owner_commits_distribution(data, output_dir / "owner_commits_distribution.png")
|
|
create_combined_dashboard(stats, data, output_dir / "dashboard.png")
|
|
|
|
print(f"\nAll plots saved to: {output_dir}")
|
|
else:
|
|
print("\nDisplaying interactive plots...")
|
|
print("Close each plot window to see the next one.")
|
|
|
|
create_age_distribution_chart(stats)
|
|
create_activity_bar_chart(stats)
|
|
create_owner_commits_distribution(data)
|
|
create_combined_dashboard(stats, data)
|
|
|
|
print_section_header("VISUALIZATION COMPLETE")
|
|
print(f"\nData source: {args.json_file}")
|
|
print(f"Total forks analyzed: {stats.get('total_analyzed', 0)}")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|