#!/usr/bin/env python3
"""
Search GitHub repositories for cloud providers from cloud_providers_index.json
Save results to a new JSON file
"""

import json
import subprocess
import sys
from datetime import datetime
from typing import List, Dict, Any

INDEX_FILE = "/data/data/com.termux/files/home/grok/cloud_host/cloud_providers_index.json"
OUTPUT_FILE = "/data/data/com.termux/files/home/grok/cloud_host/cloud_providers_github_repos.json"

def read_index(file_path: str) -> List[Dict[str, Any]]:
    """
    Read the cloud providers index JSON file
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        return data.get('all_providers', [])
    except Exception as e:
        print(f"Error reading index file: {e}")
        return []

def search_github_repos(search_term: str, limit: int = 30) -> List[Dict[str, Any]]:
    """
    Search GitHub for repositories matching the provider name
    """
    try:
        cmd = [
            "gh", "search", "repos",
            search_term,
            f"--limit={limit}",
            "--json=fullName,name,description,stargazersCount,forksCount,language,createdAt,updatedAt,url,owner"
        ]

        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)

        if result.returncode != 0:
            return []

        # Parse JSON output
        repos = json.loads(result.stdout)
        return repos if isinstance(repos, list) else []

    except json.JSONDecodeError:
        return []
    except subprocess.TimeoutExpired:
        return []
    except Exception as e:
        return []

def extract_provider_name(full_name: str) -> str:
    """
    Extract a simple name from full provider name
    E.g., "Amazon Web Services (AWS)" -> "aws"
    """
    name = full_name.lower()
    # Remove parentheses and special chars
    name = name.replace("(", "").replace(")", "").split()[0]
    return name

def main():
    """
    Main function
    """
    print("GitHub Repository Search from Cloud Providers Index")
    print(f"Index file: {INDEX_FILE}")
    print(f"Output file: {OUTPUT_FILE}")
    print("")

    # Read index
    print("Reading providers index...", end=" ", flush=True)
    providers = read_index(INDEX_FILE)
    print(f"✓ Found {len(providers)} providers")
    print("")

    # Prepare results structure
    results = {
        "search_metadata": {
            "search_date": datetime.utcnow().isoformat() + "Z",
            "index_source": "cloud_providers_index.json",
            "total_providers_searched": len(providers),
            "tool": "gh CLI",
            "limit_per_provider": 30
        },
        "providers_repositories": {}
    }

    total_repos = 0

    for i, provider in enumerate(providers):
        provider_id = provider.get('id')
        provider_name = provider.get('name', '')

        print(f"[{i+1:2d}/{len(providers)}] {provider_name:<50}", end=" ", flush=True)

        # Use provider name for search
        search_term = provider_name

        repos = search_github_repos(search_term, limit=30)

        results["providers_repositories"][str(provider_id)] = {
            "provider_id": provider_id,
            "provider_name": provider_name,
            "provider_url": provider.get('url', ''),
            "search_term": search_term,
            "repositories_found": len(repos),
            "repositories": repos
        }

        total_repos += len(repos)
        print(f"✓ Found {len(repos)} repos")

        # Rate limiting
        import time
        if i < len(providers) - 1:
            time.sleep(2)

    # Update metadata
    results["search_metadata"]["total_repositories_found"] = total_repos
    results["search_metadata"]["providers_with_repos"] = len([p for p in results["providers_repositories"].values() if p["repositories_found"] > 0])

    # Save to JSON file
    print("")
    print("Saving results...", end=" ", flush=True)
    try:
        with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        print("✓ Done")
    except Exception as e:
        print(f"✗ Error: {e}")
        return 1

    # Display results
    print("")
    print("=" * 80)
    print("SEARCH COMPLETED SUCCESSFULLY")
    print("=" * 80)
    print("")
    print(f"Output file: {OUTPUT_FILE}")

    import os
    file_size = os.path.getsize(OUTPUT_FILE)
    print(f"File size: {file_size / 1024 / 1024:.2f} MB")
    print("")
    print("Summary:")
    print(f"  Total Providers: {len(providers)}")
    print(f"  Total Repositories Found: {total_repos}")
    print(f"  Average Repos per Provider: {total_repos / len(providers):.1f}")
    print(f"  Providers with Results: {results['search_metadata']['providers_with_repos']}")
    print("")

    # Show top providers by repo count
    print("Top 10 Providers by Repository Count:")
    sorted_providers = sorted(
        results["providers_repositories"].values(),
        key=lambda x: x["repositories_found"],
        reverse=True
    )
    for i, provider_data in enumerate(sorted_providers[:10], 1):
        print(f"  {i:2d}. {provider_data['provider_name']:<45} {provider_data['repositories_found']:3d} repos")

    print("")
    print("✓ Task completed successfully!")

    return 0

if __name__ == "__main__":
    exit(main())
