#!/usr/bin/env python3
"""
淘宝API直接请求器
基于捕获的API进行直接请求（绕过页面渲染）
"""

import requests
import json
import time
import random
import os
import hashlib
from datetime import datetime
from urllib.parse import quote, parse_qs, urlparse
import pandas as pd

OUTPUT_DIR = "api_direct_results"
os.makedirs(OUTPUT_DIR, exist_ok=True)

def log(message, level="INFO"):
    timestamp = datetime.now().strftime('%H:%M:%S')
    print(f"[{timestamp}] [{level}] {message}")

def generate_sign(t, token, appkey):
    """
    简化的签名生成（真实签名更复杂）
    这里只是示例，需要根据实际抓包分析
    """
    # 淘宝MTOP签名算法很复杂，需要逆向
    # 这里返回一个占位符
    return hashlib.md5(f"{token}&{t}&{appkey}".encode()).hexdigest()

def request_mtop_api(api_name, data, version="1.0"):
    """
    请求淘宝MTOP API
    基于捕获的API结构
    """
    log(f"请求MTOP API: {api_name}")

    base_url = f"https://h5api.m.taobao.com/h5/{api_name}/{version}/"

    # 时间戳（毫秒）
    t = str(int(time.time() * 1000))

    # 基础参数
    params = {
        'jsv': '2.7.4',
        'appKey': '12574478',  # 从捕获的请求中获得
        't': t,
        'sign': 'placeholder_sign',  # 需要真实签名算法
        'api': api_name,
        'v': version,
        'type': 'json',  # 或jsonp
        'dataType': 'json',
        'timeout': '10000',
        'data': json.dumps(data, separators=(',', ':'))
    }

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Referer': 'https://s.taobao.com/',
        'Accept': 'application/json',
    }

    try:
        response = requests.get(base_url, params=params, headers=headers, timeout=10)

        log(f"  状态码: {response.status_code}")

        if response.status_code == 200:
            try:
                data = response.json()
                log(f"  ✓ 获取JSON数据", "SUCCESS")
                return data
            except:
                log(f"  响应内容: {response.text[:200]}", "WARNING")
                return None
        else:
            log(f"  ✗ 请求失败", "ERROR")
            return None

    except Exception as e:
        log(f"  ✗ 错误: {e}", "ERROR")
        return None

def test_captured_api_urls():
    """
    测试捕获的真实API URL
    """
    log("="*70)
    log("测试捕获的API URL")
    log("="*70)

    # 读取捕获的API
    api_file = "api_captured/api_requests_牙刷_20251010_135245.json"

    if not os.path.exists(api_file):
        log("未找到捕获的API文件，请先运行: python api_interceptor.py", "ERROR")
        return

    with open(api_file, 'r', encoding='utf-8') as f:
        apis = json.load(f)

    # 过滤JSON类型的API
    json_apis = [api for api in apis if 'json' in api.get('mimeType', '').lower()]

    log(f"找到 {len(json_apis)} 个JSON API\n")

    results = []

    for idx, api in enumerate(json_apis[:5], 1):  # 测试前5个
        log(f"\n[{idx}] 测试API:")
        url = api['url']
        log(f"    URL: {url[:100]}...")

        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
            'Referer': 'https://s.taobao.com/',
            'Accept': 'application/json, text/plain, */*',
        }

        # 添加headers from captured request
        captured_headers = api.get('headers', {})
        if captured_headers:
            for key, value in captured_headers.items():
                if key.lower() not in ['content-length', 'host']:
                    headers[key] = value

        try:
            response = requests.get(url, headers=headers, timeout=10)

            log(f"    状态: {response.status_code}")

            if response.status_code == 200:
                content_type = response.headers.get('Content-Type', '')

                if 'json' in content_type:
                    try:
                        data = response.json()
                        log(f"    ✓ 成功获取JSON", "SUCCESS")

                        # 保存响应
                        result = {
                            'api_url': url,
                            'status': response.status_code,
                            'data': data
                        }
                        results.append(result)

                        # 显示数据结构
                        if isinstance(data, dict):
                            log(f"    数据键: {list(data.keys())[:5]}")

                            # 检查是否有商品数据
                            if 'data' in data:
                                log(f"    包含'data'字段")
                                if isinstance(data['data'], dict):
                                    log(f"    data键: {list(data['data'].keys())[:5]}")

                    except Exception as e:
                        log(f"    JSON解析失败: {e}", "WARNING")
                else:
                    log(f"    响应类型: {content_type}")
                    log(f"    内容: {response.text[:150]}")

            time.sleep(random.uniform(1, 2))

        except Exception as e:
            log(f"    ✗ 请求失败: {e}", "ERROR")

    # 保存结果
    if results:
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        result_file = os.path.join(OUTPUT_DIR, f"api_responses_{timestamp}.json")

        with open(result_file, 'w', encoding='utf-8') as f:
            json.dump(results, f, ensure_ascii=False, indent=2)

        log(f"\n✓ API响应已保存: {result_file}")

        return results

    return []

def extract_products_from_api_response(response_data):
    """
    从API响应中提取商品数据
    """
    products = []

    # 尝试不同的数据结构
    possible_paths = [
        ['data', 'items'],
        ['data', 'itemsArray'],
        ['data', 'mainItems'],
        ['data', 'auctions'],
        ['items'],
        ['itemsArray']
    ]

    for path in possible_paths:
        current = response_data
        for key in path:
            if isinstance(current, dict) and key in current:
                current = current[key]
            else:
                current = None
                break

        if current and isinstance(current, list):
            log(f"  找到商品数组: {path}")
            products = current
            break

    return products

def create_simple_api_scraper():
    """
    创建简化的API爬虫脚本模板
    """
    template = '''#!/usr/bin/env python3
"""
简化的淘宝API爬虫
使用真实的API端点（需要填入捕获的URL）
"""

import requests
import json
import time

def scrape_taobao_api(keyword):
    """
    使用API爬取商品
    """

    # 方法1: 直接使用捕获的完整URL（替换关键词部分）
    # 从api_captured/api_requests_*.json中复制URL
    url = "替换为捕获的真实API URL"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
        'Referer': 'https://s.taobao.com/',
        'Accept': 'application/json',
        # 添加其他必要的headers
    }

    try:
        response = requests.get(url, headers=headers, timeout=10)

        if response.status_code == 200:
            data = response.json()

            # 根据实际响应结构提取商品
            items = data.get('data', {}).get('items', [])

            products = []
            for item in items:
                product = {
                    'title': item.get('title', ''),
                    'price': item.get('price', ''),
                    'sales': item.get('sales', ''),
                    # 添加其他字段
                }
                products.append(product)

            return products

    except Exception as e:
        print(f"错误: {e}")
        return []

# 使用
products = scrape_taobao_api("牙刷")
print(f"获取到 {len(products)} 个商品")
'''

    template_file = os.path.join(OUTPUT_DIR, "simple_api_scraper_template.py")
    with open(template_file, 'w', encoding='utf-8') as f:
        f.write(template)

    log(f"✓ 模板已保存: {template_file}")

def main():
    """主函数"""
    log("="*70)
    log("淘宝API直接请求器")
    log("="*70)

    # 测试捕获的API
    results = test_captured_api_urls()

    if results:
        log(f"\n成功测试 {len(results)} 个API")

        # 尝试提取商品数据
        log("\n尝试从响应中提取商品数据...")
        for idx, result in enumerate(results, 1):
            log(f"\n[{idx}] 分析响应:")
            products = extract_products_from_api_response(result['data'])

            if products:
                log(f"  ✓ 找到 {len(products)} 个商品项", "SUCCESS")
                # 显示样本
                if products:
                    log(f"  样本: {str(products[0])[:150]}...")
            else:
                log(f"  未找到标准商品结构")

    # 创建模板
    create_simple_api_scraper()

    log("\n" + "="*70)
    log("提示:")
    log("1. 查看 api_direct_results/ 目录中的响应数据")
    log("2. 分析响应结构，找到包含商品的字段")
    log("3. 修改 simple_api_scraper_template.py 使用真实URL")
    log("="*70)

if __name__ == "__main__":
    main()
