#!/usr/bin/env python3
"""
API拦截器 - 使用Selenium捕获淘宝API请求
策略：启用Chrome DevTools Protocol (CDP) 监听网络请求
"""

import json
import time
import os
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

OUTPUT_DIR = "api_captured"
os.makedirs(OUTPUT_DIR, exist_ok=True)

def log(message, level="INFO"):
    timestamp = datetime.now().strftime('%H:%M:%S')
    print(f"[{timestamp}] [{level}] {message}")

def setup_driver_with_network_capture():
    """配置支持网络捕获的Chrome驱动"""
    log("配置Chrome驱动（启用网络监听）...")

    # 启用Performance日志
    caps = DesiredCapabilities.CHROME.copy()
    caps['goog:loggingPrefs'] = {'performance': 'ALL'}

    chrome_options = Options()
    chrome_options.add_argument('--headless=new')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-blink-features=AutomationControlled')
    chrome_options.add_argument('--window-size=1920,1080')

    # 随机UA
    chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')

    chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
    chrome_options.add_experimental_option('useAutomationExtension', False)

    # 启用Performance日志
    chrome_options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})

    chrome_options.binary_location = '/data/data/com.termux/files/usr/bin/chromium-browser'

    chromedriver_path = '/data/data/com.termux/files/usr/bin/chromedriver'
    service = Service(executable_path=chromedriver_path)

    driver = webdriver.Chrome(service=service, options=chrome_options)

    # 隐藏webdriver
    driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
        'source': '''
            Object.defineProperty(navigator, 'webdriver', {
                get: () => undefined
            });
        '''
    })

    # 启用网络监听
    driver.execute_cdp_cmd('Network.enable', {})

    log("✓ Chrome驱动已启动（网络监听已启用）", "SUCCESS")
    return driver

def get_network_logs(driver):
    """获取网络请求日志"""
    logs = driver.get_log('performance')
    return logs

def filter_api_requests(logs):
    """过滤出API请求"""
    api_requests = []

    for entry in logs:
        try:
            log_entry = json.loads(entry['message'])
            message = log_entry.get('message', {})
            method = message.get('method', '')

            # 只关注网络请求
            if method == 'Network.responseReceived':
                params = message.get('params', {})
                response = params.get('response', {})
                url = response.get('url', '')

                # 过滤API相关的URL
                if any(keyword in url for keyword in [
                    '/search', '/api/', '.json', '/h5api/',
                    '/mtop.', 'taobao.com', 's.taobao.com'
                ]):
                    request_info = {
                        'url': url,
                        'method': response.get('method', 'GET'),
                        'status': response.get('status', 0),
                        'mimeType': response.get('mimeType', ''),
                        'headers': response.get('headers', {}),
                        'requestId': params.get('requestId', '')
                    }
                    api_requests.append(request_info)

        except Exception as e:
            continue

    return api_requests

def capture_api_from_search(keyword, wait_time=20):
    """捕获搜索时的API请求"""
    log(f"\n{'='*70}")
    log(f"捕获API: {keyword}")
    log(f"{'='*70}")

    driver = setup_driver_with_network_capture()

    try:
        # 构建URL
        from urllib.parse import quote
        url = f"https://s.taobao.com/search?q={quote(keyword)}"

        log(f"访问URL: {url}")
        driver.get(url)

        log(f"等待 {wait_time} 秒让页面完全加载...")
        time.sleep(wait_time)

        # 滚动触发更多请求
        log("滚动页面...")
        for i in range(3):
            driver.execute_script(f"window.scrollBy(0, {500 + i*200});")
            time.sleep(2)

        # 获取网络日志
        log("获取网络请求日志...")
        logs = get_network_logs(driver)

        log(f"  总日志条目: {len(logs)}")

        # 过滤API请求
        api_requests = filter_api_requests(logs)

        log(f"✓ 捕获到 {len(api_requests)} 个API请求", "SUCCESS")

        # 保存结果
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

        # 保存API请求
        api_file = os.path.join(OUTPUT_DIR, f"api_requests_{keyword}_{timestamp}.json")
        with open(api_file, 'w', encoding='utf-8') as f:
            json.dump(api_requests, f, ensure_ascii=False, indent=2)
        log(f"✓ API请求已保存: {api_file}")

        # 保存完整日志（调试用）
        full_log_file = os.path.join(OUTPUT_DIR, f"full_logs_{keyword}_{timestamp}.json")
        with open(full_log_file, 'w', encoding='utf-8') as f:
            # 只保存message部分
            simplified_logs = []
            for entry in logs:
                try:
                    simplified_logs.append(json.loads(entry['message']))
                except:
                    pass
            json.dump(simplified_logs, f, ensure_ascii=False, indent=2)
        log(f"✓ 完整日志已保存: {full_log_file}")

        # 显示关键API
        log("\n关键API请求:")
        for idx, req in enumerate(api_requests[:10], 1):
            log(f"\n  [{idx}] {req['method']} - {req['status']}")
            log(f"      URL: {req['url'][:100]}...")
            log(f"      类型: {req['mimeType']}")

        return api_requests

    except Exception as e:
        log(f"✗ 错误: {e}", "ERROR")
        import traceback
        log(traceback.format_exc(), "ERROR")
        return []

    finally:
        driver.quit()

def analyze_captured_apis():
    """分析捕获的API"""
    log("\n" + "="*70)
    log("分析捕获的API")
    log("="*70)

    # 读取所有捕获的API文件
    api_files = [f for f in os.listdir(OUTPUT_DIR) if f.startswith('api_requests_')]

    if not api_files:
        log("未找到捕获的API文件", "WARNING")
        return

    all_apis = {}

    for file in api_files:
        filepath = os.path.join(OUTPUT_DIR, file)
        with open(filepath, 'r', encoding='utf-8') as f:
            apis = json.load(f)

            for api in apis:
                url = api['url']
                # 提取基础URL（去掉参数）
                base_url = url.split('?')[0]

                if base_url not in all_apis:
                    all_apis[base_url] = {
                        'count': 0,
                        'example': api,
                        'urls': []
                    }
                all_apis[base_url]['count'] += 1
                all_apis[base_url]['urls'].append(url)

    # 按出现次数排序
    sorted_apis = sorted(all_apis.items(), key=lambda x: x[1]['count'], reverse=True)

    log(f"\n发现 {len(sorted_apis)} 个不同的API端点\n")

    # 显示前10个
    log("最常见的API端点:")
    for idx, (base_url, info) in enumerate(sorted_apis[:10], 1):
        log(f"\n[{idx}] 出现 {info['count']} 次")
        log(f"    URL: {base_url}")
        log(f"    方法: {info['example']['method']}")
        log(f"    类型: {info['example']['mimeType']}")

        # 显示一个完整URL示例
        if info['urls']:
            log(f"    示例: {info['urls'][0][:150]}...")

    # 保存分析结果
    analysis_file = os.path.join(OUTPUT_DIR, "api_analysis.json")
    analysis_data = {
        'total_endpoints': len(sorted_apis),
        'endpoints': [
            {
                'base_url': base_url,
                'count': info['count'],
                'method': info['example']['method'],
                'mimeType': info['example']['mimeType'],
                'example_url': info['urls'][0] if info['urls'] else ''
            }
            for base_url, info in sorted_apis[:20]
        ]
    }

    with open(analysis_file, 'w', encoding='utf-8') as f:
        json.dump(analysis_data, f, ensure_ascii=False, indent=2)

    log(f"\n✓ 分析结果已保存: {analysis_file}")

def main():
    """主函数"""
    import sys

    # 测试关键词
    test_keywords = ['牙刷', '牙膏', '洗发水']

    if len(sys.argv) > 1:
        # 从命令行参数获取关键词
        test_keywords = sys.argv[1:]

    log("="*70)
    log("淘宝API拦截器")
    log("="*70)
    log(f"将捕获以下关键词的API: {', '.join(test_keywords)}\n")

    for keyword in test_keywords:
        capture_api_from_search(keyword)
        log("\n等待5秒后继续...\n")
        time.sleep(5)

    # 分析捕获的API
    analyze_captured_apis()

    log("\n" + "="*70)
    log("完成！")
    log(f"所有文件保存在: {OUTPUT_DIR}/")
    log("="*70)

if __name__ == "__main__":
    main()
