#!/usr/bin/env python3
"""
Search GitHub for cloud provider repositories using gh CLI
and save results to JSON file
"""

import json
import subprocess
import sys
from datetime import datetime

OUTPUT_FILE = "/data/data/com.termux/files/home/grok/cloud_host/cloud_providers_repos.json"

PROVIDERS = [
    "aws",
    "azure",
    "google-cloud",
    "alibaba-cloud",
    "oracle-cloud",
    "digitalocean",
    "kubernetes",
    "docker",
    "terraform",
    "ansible",
    "openstack",
    "linode",
    "vultr",
    "hetzner",
    "scaleway",
    "cloudflare",
    "nginx",
    "apache",
    "jenkins",
    "prometheus",
]

def search_provider(provider_name: str) -> list:
    """
    Search GitHub for repositories matching the provider name
    """
    try:
        print(f"Searching: {provider_name}...", end=" ", flush=True)

        cmd = [
            "gh", "search", "repos",
            provider_name,
            "--limit=50",
            "--json=fullName,name,description,stargazersCount,forksCount,language,createdAt,updatedAt,url,owner"
        ]

        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)

        if result.returncode != 0:
            print(f"Error (exit code: {result.returncode})")
            return []

        # Parse JSON output
        repos = json.loads(result.stdout)
        print(f"Found {len(repos)} repos")
        return repos

    except json.JSONDecodeError as e:
        print(f"JSON parse error: {e}")
        return []
    except subprocess.TimeoutExpired:
        print("Timeout")
        return []
    except Exception as e:
        print(f"Error: {e}")
        return []

def main():
    """
    Main function
    """
    print("GitHub Cloud Provider Repository Search using gh CLI")
    print(f"Output file: {OUTPUT_FILE}")
    print(f"Total providers: {len(PROVIDERS)}")
    print("")

    results = {
        "search_metadata": {
            "search_date": datetime.utcnow().isoformat() + "Z",
            "total_providers_searched": len(PROVIDERS),
            "tool": "gh CLI",
        },
        "repositories_by_provider": {}
    }

    total_repos = 0

    for i, provider in enumerate(PROVIDERS):
        print(f"[{i+1:2d}/{len(PROVIDERS)}]", end=" ")

        repos = search_provider(provider)

        if repos:
            results["repositories_by_provider"][provider] = {
                "search_term": provider,
                "repository_count": len(repos),
                "repositories": repos
            }
            total_repos += len(repos)
        else:
            results["repositories_by_provider"][provider] = {
                "search_term": provider,
                "repository_count": 0,
                "repositories": []
            }

        # Rate limiting
        import time
        if i < len(PROVIDERS) - 1:
            time.sleep(2)

    # Update metadata
    results["search_metadata"]["total_repositories_found"] = total_repos
    results["search_metadata"]["providers_with_results"] = len([p for p in results["repositories_by_provider"].values() if p["repository_count"] > 0])

    # Save to JSON file
    try:
        with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)

        print("")
        print(f"✓ Search completed!")
        print(f"✓ Total repositories found: {total_repos}")
        print(f"✓ Results saved to: {OUTPUT_FILE}")

        # Show file info
        import os
        file_size = os.path.getsize(OUTPUT_FILE)
        print(f"✓ File size: {file_size / 1024:.2f} KB")

        # Show preview
        print("")
        print("JSON structure preview:")
        preview = {
            "search_metadata": results["search_metadata"],
            "sample_provider": list(results["repositories_by_provider"].items())[0] if results["repositories_by_provider"] else None
        }
        print(json.dumps(preview, indent=2, ensure_ascii=False)[:500])

    except Exception as e:
        print(f"✗ Error saving results: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()
