#!/usr/bin/env python3 """ Fork Statistics Visualizer This script loads JSON data generated by fork_stats.py and displays detailed statistics both as formatted lists and visual graphs. Usage: python3 tools/fork_stats_visualizer.py results.json python3 tools/fork_stats_visualizer.py results.json --save-plots python3 tools/fork_stats_visualizer.py results.json --output-dir ./plots """ import argparse import json import sys from pathlib import Path from typing import Dict, List from datetime import datetime # Try to import matplotlib, but allow running without it try: import matplotlib.pyplot as plt import matplotlib.patches as mpatches HAS_MATPLOTLIB = True except ImportError: HAS_MATPLOTLIB = False print("Warning: matplotlib not installed. Graphical visualizations will be disabled.") print("Install with: pip install -r tools/fork_stats_visualizer_requirements.txt") print() def load_json_data(filepath: str) -> Dict: """Load the JSON data file.""" try: with open(filepath, 'r') as f: return json.load(f) except FileNotFoundError: print(f"Error: File '{filepath}' not found.") sys.exit(1) except json.JSONDecodeError as e: print(f"Error: Invalid JSON in '{filepath}': {e}") sys.exit(1) def print_section_header(title: str): """Print a formatted section header.""" print("\n" + "=" * 80) print(f" {title}") print("=" * 80) def print_repository_info(data: Dict): """Print repository information.""" print_section_header("REPOSITORY INFORMATION") main_repo = data.get('main_repo', {}) print(f"\nRepository: {main_repo.get('full_name', 'Unknown')}") print(f"Total Forks: {main_repo.get('forks_count', 0):,}") print(f"Stars: {main_repo.get('stargazers_count', 0):,}") print(f"Watchers: {main_repo.get('watchers_count', 0):,}") print(f"\nAnalyzed Forks: {data.get('total_forks', 0)}") if 'analysis_timestamp' in data: timestamp = data['analysis_timestamp'] try: dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00')) print(f"Analysis Date: {dt.strftime('%Y-%m-%d %H:%M:%S UTC')}") except: print(f"Analysis Date: {timestamp}") def print_age_distribution(stats: Dict): """Print fork age distribution statistics.""" print_section_header("FORK AGE DISTRIBUTION") age_dist = stats.get('age_distribution', {}) total = stats.get('total_analyzed', 1) age_labels = { '1_month': 'Last updated ≤ 1 month', '3_months': 'Last updated ≤ 3 months', '6_months': 'Last updated ≤ 6 months', '1_year': 'Last updated ≤ 1 year', '2_years': 'Last updated ≤ 2 years', '5_plus_years': 'Last updated > 5 years' } print("\nAge Category Count Percentage") print("-" * 60) for key, label in age_labels.items(): count = age_dist.get(key, 0) pct = (count / total * 100) if total > 0 else 0 bar = '█' * int(pct / 2) # Visual bar print(f"{label:30} {count:5d} {pct:5.1f}% {bar}") def print_activity_analysis(stats: Dict): """Print fork activity analysis.""" print_section_header("FORK ACTIVITY ANALYSIS") total = stats.get('total_analyzed', 1) metrics = [ ('Forks with unique branches', stats.get('forks_with_unique_branches', 0), stats.get('percentage_with_unique_branches', 0)), ('Forks with recent main branch', stats.get('forks_with_recent_main', 0), stats.get('percentage_with_recent_main', 0)), ('Forks that contributed PRs', stats.get('forks_that_contributed_prs', 0), stats.get('percentage_contributed_prs', 0)), ('Active forks (no PR contributions)', stats.get('active_non_contributing_forks', 0), stats.get('percentage_active_non_contributing', 0)), ] print("\nMetric Count Percentage") print("-" * 65) for label, count, pct in metrics: bar = '█' * int(pct / 2) # Visual bar print(f"{label:35} {count:5d} {pct:5.1f}% {bar}") def print_owner_commits(stats: Dict): """Print owner commit statistics.""" print_section_header("OWNER COMMIT ANALYSIS") total_forks = stats.get('total_analyzed', 0) forks_with_commits = stats.get('forks_with_owner_commits', 0) total_commits = stats.get('total_owner_commits', 0) avg_commits = stats.get('avg_owner_commits_per_fork', 0) pct_with_commits = stats.get('percentage_with_owner_commits', 0) print(f"\nForks with owner commits: {forks_with_commits:5d} / {total_forks:5d} ({pct_with_commits:.1f}%)") print(f"Total commits by fork owners: {total_commits:5d}") print(f"Average commits per fork: {avg_commits:5.1f}") def print_top_forks(data: Dict, n: int = 20): """Print top forks by various metrics.""" analyzed_forks = data.get('analyzed_forks', []) if not analyzed_forks: print("\nNo detailed fork data available in JSON file.") return # Top forks by unique branches print_section_header(f"TOP {n} FORKS BY UNIQUE BRANCHES") forks_by_branches = sorted(analyzed_forks, key=lambda x: len(x.get('unique_branches', [])), reverse=True)[:n] print(f"\n{'Rank':<6} {'Fork':<45} {'Unique Branches':<20} {'Owner Commits'}") print("-" * 90) for i, fork in enumerate(forks_by_branches, 1): unique_count = len(fork.get('unique_branches', [])) owner_commits = fork.get('owner_commits', 0) print(f"{i:<6} {fork.get('full_name', 'Unknown'):<45} {unique_count:<20} {owner_commits}") # Top forks by owner commits print_section_header(f"TOP {n} FORKS BY OWNER COMMITS") forks_by_commits = sorted(analyzed_forks, key=lambda x: x.get('owner_commits', 0), reverse=True)[:n] print(f"\n{'Rank':<6} {'Fork':<45} {'Owner Commits':<20} {'Active'}") print("-" * 90) for i, fork in enumerate(forks_by_commits, 1): commits = fork.get('owner_commits', 0) is_active = "Yes" if fork.get('is_active', False) else "No" print(f"{i:<6} {fork.get('full_name', 'Unknown'):<45} {commits:<20} {is_active}") # Active forks that haven't contributed print_section_header("ACTIVE FORKS WITHOUT PR CONTRIBUTIONS") active_no_pr = [f for f in analyzed_forks if f.get('is_active', False) and not f.get('has_contributed_prs', False)][:n] if active_no_pr: print(f"\n{'Fork':<45} {'Recent Commits':<20} {'Owner Commits':<20} {'Days Behind'}") print("-" * 110) for fork in active_no_pr: recent = fork.get('recent_commits', 0) owner = fork.get('owner_commits', 0) days = fork.get('behind_main_by_days', 0) print(f"{fork.get('full_name', 'Unknown'):<45} {recent:<20} {owner:<20} {days}") else: print("\nNo active forks without PR contributions found.") def create_age_distribution_chart(stats: Dict, save_path: str = None): """Create a pie chart for fork age distribution.""" if not HAS_MATPLOTLIB: print("Skipping age distribution chart (matplotlib not available)") return age_dist = stats.get('age_distribution', {}) labels = ['≤ 1 month', '≤ 3 months', '≤ 6 months', '≤ 1 year', '≤ 2 years', '> 5 years'] sizes = [ age_dist.get('1_month', 0), age_dist.get('3_months', 0), age_dist.get('6_months', 0), age_dist.get('1_year', 0), age_dist.get('2_years', 0), age_dist.get('5_plus_years', 0) ] colors = ['#2ecc71', '#27ae60', '#f39c12', '#e67e22', '#e74c3c', '#95a5a6'] fig, ax = plt.subplots(figsize=(10, 8)) wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors, startangle=90) ax.set_title('Fork Age Distribution (Last Update)', fontsize=16, fontweight='bold', pad=20) # Make percentage text more readable for autotext in autotexts: autotext.set_color('white') autotext.set_fontsize(10) autotext.set_fontweight('bold') plt.tight_layout() if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight') print(f"Saved: {save_path}") else: plt.show() plt.close() def create_activity_bar_chart(stats: Dict, save_path: str = None): """Create a bar chart for fork activity metrics.""" if not HAS_MATPLOTLIB: print("Skipping activity bar chart (matplotlib not available)") return metrics = [ 'Unique\nBranches', 'Recent\nMain', 'Contributed\nPRs', 'Active\nNo PRs', 'Owner\nCommits' ] values = [ stats.get('percentage_with_unique_branches', 0), stats.get('percentage_with_recent_main', 0), stats.get('percentage_contributed_prs', 0), stats.get('percentage_active_non_contributing', 0), stats.get('percentage_with_owner_commits', 0) ] colors = ['#3498db', '#2ecc71', '#9b59b6', '#e67e22', '#e74c3c'] fig, ax = plt.subplots(figsize=(12, 7)) bars = ax.bar(metrics, values, color=colors, alpha=0.8, edgecolor='black', linewidth=1.5) ax.set_ylabel('Percentage of Forks (%)', fontsize=12, fontweight='bold') ax.set_title('Fork Activity Metrics', fontsize=16, fontweight='bold', pad=20) ax.set_ylim(0, 100) ax.grid(axis='y', alpha=0.3, linestyle='--') # Add value labels on bars for bar in bars: height = bar.get_height() ax.text(bar.get_x() + bar.get_width()/2., height + 1, f'{height:.1f}%', ha='center', va='bottom', fontweight='bold') plt.tight_layout() if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight') print(f"Saved: {save_path}") else: plt.show() plt.close() def create_owner_commits_distribution(data: Dict, save_path: str = None): """Create a histogram of owner commits distribution.""" if not HAS_MATPLOTLIB: print("Skipping owner commits distribution chart (matplotlib not available)") return analyzed_forks = data.get('analyzed_forks', []) if not analyzed_forks: print("No detailed fork data for commits distribution chart.") return commits = [fork.get('owner_commits', 0) for fork in analyzed_forks if fork.get('owner_commits', 0) > 0] if not commits: print("No owner commits data available.") return fig, ax = plt.subplots(figsize=(12, 7)) # Create histogram with bins n, bins, patches = ax.hist(commits, bins=20, color='#3498db', alpha=0.7, edgecolor='black') # Color code the bins cm = plt.cm.RdYlGn_r bin_centers = 0.5 * (bins[:-1] + bins[1:]) col = bin_centers - min(bin_centers) col /= max(col) for c, p in zip(col, patches): plt.setp(p, 'facecolor', cm(c)) ax.set_xlabel('Number of Owner Commits', fontsize=12, fontweight='bold') ax.set_ylabel('Number of Forks', fontsize=12, fontweight='bold') ax.set_title('Distribution of Owner Commits Across Forks', fontsize=16, fontweight='bold', pad=20) ax.grid(axis='y', alpha=0.3, linestyle='--') # Add statistics text stats_text = f'Total Forks: {len(commits)}\nMean: {sum(commits)/len(commits):.1f}\nMax: {max(commits)}' ax.text(0.95, 0.95, stats_text, transform=ax.transAxes, verticalalignment='top', horizontalalignment='right', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5), fontsize=10, fontweight='bold') plt.tight_layout() if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight') print(f"Saved: {save_path}") else: plt.show() plt.close() def create_combined_dashboard(stats: Dict, data: Dict, save_path: str = None): """Create a combined dashboard with multiple charts.""" if not HAS_MATPLOTLIB: print("Skipping combined dashboard (matplotlib not available)") return fig = plt.figure(figsize=(16, 10)) # Age distribution pie chart ax1 = plt.subplot(2, 2, 1) age_dist = stats.get('age_distribution', {}) labels = ['≤1mo', '≤3mo', '≤6mo', '≤1yr', '≤2yr', '>5yr'] sizes = [ age_dist.get('1_month', 0), age_dist.get('3_months', 0), age_dist.get('6_months', 0), age_dist.get('1_year', 0), age_dist.get('2_years', 0), age_dist.get('5_plus_years', 0) ] colors = ['#2ecc71', '#27ae60', '#f39c12', '#e67e22', '#e74c3c', '#95a5a6'] ax1.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors, startangle=90) ax1.set_title('Fork Age Distribution', fontweight='bold') # Activity metrics bar chart ax2 = plt.subplot(2, 2, 2) metrics = ['Unique\nBranches', 'Recent\nMain', 'PRs', 'Active\nNo PRs', 'Owner\nCommits'] values = [ stats.get('percentage_with_unique_branches', 0), stats.get('percentage_with_recent_main', 0), stats.get('percentage_contributed_prs', 0), stats.get('percentage_active_non_contributing', 0), stats.get('percentage_with_owner_commits', 0) ] colors_bar = ['#3498db', '#2ecc71', '#9b59b6', '#e67e22', '#e74c3c'] bars = ax2.bar(metrics, values, color=colors_bar, alpha=0.8) ax2.set_ylabel('Percentage (%)') ax2.set_title('Activity Metrics', fontweight='bold') ax2.set_ylim(0, 100) ax2.grid(axis='y', alpha=0.3) # Owner commits histogram ax3 = plt.subplot(2, 2, 3) analyzed_forks = data.get('analyzed_forks', []) commits = [fork.get('owner_commits', 0) for fork in analyzed_forks if fork.get('owner_commits', 0) > 0] if commits: ax3.hist(commits, bins=15, color='#3498db', alpha=0.7, edgecolor='black') ax3.set_xlabel('Owner Commits') ax3.set_ylabel('Frequency') ax3.set_title('Owner Commits Distribution', fontweight='bold') ax3.grid(axis='y', alpha=0.3) # Summary statistics ax4 = plt.subplot(2, 2, 4) ax4.axis('off') main_repo = data.get('main_repo', {}) summary_text = f""" REPOSITORY STATISTICS {'='*35} Repository: {main_repo.get('full_name', 'Unknown')} Total Forks: {main_repo.get('forks_count', 0):,} Analyzed: {stats.get('total_analyzed', 0)} KEY METRICS: • Unique Branches: {stats.get('forks_with_unique_branches', 0)} ({stats.get('percentage_with_unique_branches', 0):.1f}%) • PR Contributors: {stats.get('forks_that_contributed_prs', 0)} ({stats.get('percentage_contributed_prs', 0):.1f}%) • Owner Commits: {stats.get('total_owner_commits', 0):,} • Avg Commits/Fork: {stats.get('avg_owner_commits_per_fork', 0):.1f} INSIGHTS: • Recent Forks: {age_dist.get('1_month', 0) + age_dist.get('3_months', 0)} • Very Old (>5yr): {age_dist.get('5_plus_years', 0)} • Active No PRs: {stats.get('active_non_contributing_forks', 0)} """ ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes, verticalalignment='top', fontsize=11, fontfamily='monospace', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.3)) fig.suptitle('Fork Statistics Dashboard', fontsize=18, fontweight='bold', y=0.98) plt.tight_layout(rect=[0, 0.03, 1, 0.96]) if save_path: plt.savefig(save_path, dpi=300, bbox_inches='tight') print(f"Saved: {save_path}") else: plt.show() plt.close() def main(): parser = argparse.ArgumentParser(description='Visualize fork statistics data') parser.add_argument('json_file', help='Path to the JSON file with fork statistics') parser.add_argument('--save-plots', action='store_true', help='Save plots to files instead of displaying') parser.add_argument('--output-dir', default='./fork_plots', help='Directory to save plots (default: ./fork_plots)') parser.add_argument('--top-n', type=int, default=20, help='Number of top forks to display (default: 20)') parser.add_argument('--no-graphs', action='store_true', help='Skip graph generation, only show text statistics') args = parser.parse_args() # Load data data = load_json_data(args.json_file) stats = data.get('statistics', {}) # Print text statistics print_repository_info(data) print_age_distribution(stats) print_activity_analysis(stats) print_owner_commits(stats) print_top_forks(data, args.top_n) # Generate graphs if not disabled if not args.no_graphs: print_section_header("GENERATING VISUAL CHARTS") if args.save_plots: output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) print(f"\nSaving plots to: {output_dir}") create_age_distribution_chart(stats, output_dir / "age_distribution.png") create_activity_bar_chart(stats, output_dir / "activity_metrics.png") create_owner_commits_distribution(data, output_dir / "owner_commits_distribution.png") create_combined_dashboard(stats, data, output_dir / "dashboard.png") print(f"\nAll plots saved to: {output_dir}") else: print("\nDisplaying interactive plots...") print("Close each plot window to see the next one.") create_age_distribution_chart(stats) create_activity_bar_chart(stats) create_owner_commits_distribution(data) create_combined_dashboard(stats, data) print_section_header("VISUALIZATION COMPLETE") print(f"\nData source: {args.json_file}") print(f"Total forks analyzed: {stats.get('total_analyzed', 0)}") if __name__ == '__main__': main()