#!/usr/bin/env python3
"""
淘宝终极爬虫 - 性能测试脚本
在无Cookie情况下测试程序的健壮性和性能指标
"""

import os
import sys
import time
import json
import logging
from datetime import datetime
import subprocess

# 配置日志
log_file = f"performance_test_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.FileHandler(log_file, encoding='utf-8'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)


def test_scraper_initialization():
    """测试1: 爬虫初始化性能"""
    logger.info("="*70)
    logger.info("测试1: 爬虫初始化性能")
    logger.info("="*70)

    start = time.time()

    try:
        # 测试导入和初始化
        cmd = [
            'python', 'taobao_ultimate_scraper.py',
            '-k', '测试关键词',
            '--delay', '1'
        ]

        result = subprocess.run(
            cmd,
            capture_output=True,
            text=True,
            timeout=60
        )

        init_time = time.time() - start

        logger.info(f"✓ 初始化耗时: {init_time:.2f}秒")

        # 分析输出
        output = result.stdout + result.stderr

        if "undetected-chromedriver" in output:
            logger.info("✓ undetected-chromedriver加载成功")

        if "Driver初始化成功" in output:
            logger.info("✓ Driver初始化成功")

        if "Cookie文件不存在" in output or "需要手动登录" in output:
            logger.info("⚠ Cookie不存在(预期行为)")

        return {
            'test': '初始化性能',
            'time': init_time,
            'status': 'success' if init_time < 10 else 'slow'
        }

    except subprocess.TimeoutExpired:
        logger.error("✗ 初始化超时")
        return {'test': '初始化性能', 'time': 60, 'status': 'timeout'}
    except Exception as e:
        logger.error(f"✗ 初始化失败: {e}")
        return {'test': '初始化性能', 'time': 0, 'status': 'failed'}


def test_cli_interface():
    """测试2: CLI接口完整性"""
    logger.info("\n" + "="*70)
    logger.info("测试2: CLI接口完整性")
    logger.info("="*70)

    tests = [
        {
            'name': '帮助信息',
            'cmd': ['python', 'taobao_ultimate_scraper.py', '-h'],
            'expected': ['usage:', 'keywords', 'pages']
        },
        {
            'name': '版本检查',
            'cmd': ['python', '-c', 'import undetected_chromedriver; print("OK")'],
            'expected': ['OK']
        },
        {
            'name': 'Selenium检查',
            'cmd': ['python', '-c', 'from selenium import webdriver; print("OK")'],
            'expected': ['OK']
        }
    ]

    results = []

    for test in tests:
        try:
            result = subprocess.run(
                test['cmd'],
                capture_output=True,
                text=True,
                timeout=10
            )

            output = result.stdout + result.stderr
            passed = all(exp in output for exp in test['expected'])

            status = "✓" if passed else "✗"
            logger.info(f"{status} {test['name']}: {'通过' if passed else '失败'}")

            results.append({
                'test': test['name'],
                'status': 'passed' if passed else 'failed'
            })

        except Exception as e:
            logger.error(f"✗ {test['name']}: {e}")
            results.append({'test': test['name'], 'status': 'error'})

    return results


def test_file_structure():
    """测试3: 文件结构完整性"""
    logger.info("\n" + "="*70)
    logger.info("测试3: 文件结构完整性")
    logger.info("="*70)

    required_files = [
        'taobao_ultimate_scraper.py',
        'daily_goods_100.txt',
        '淘宝终极爬虫使用指南.md',
        '淘宝终极爬虫实现报告.md',
        'demo_scraper.sh'
    ]

    results = []

    for file in required_files:
        exists = os.path.exists(file)
        status = "✓" if exists else "✗"

        if exists:
            size = os.path.getsize(file)
            logger.info(f"{status} {file}: {size} bytes")
        else:
            logger.warning(f"{status} {file}: 不存在")

        results.append({
            'file': file,
            'exists': exists,
            'size': os.path.getsize(file) if exists else 0
        })

    return results


def test_keywords_file():
    """测试4: 关键词文件分析"""
    logger.info("\n" + "="*70)
    logger.info("测试4: 关键词文件分析")
    logger.info("="*70)

    try:
        with open('daily_goods_100.txt', 'r', encoding='utf-8') as f:
            keywords = [line.strip() for line in f if line.strip()]

        logger.info(f"✓ 关键词总数: {len(keywords)}")
        logger.info(f"✓ 前10个关键词: {', '.join(keywords[:10])}")

        # 分类统计
        categories = {
            '洗护': ['牙刷', '牙膏', '洗发水', '沐浴露', '香皂', '洗面奶', '护肤'],
            '厨房': ['碗', '盘子', '筷子', '勺子', '菜刀', '砧板'],
            '清洁': ['拖把', '扫帚', '抹布', '洗洁精', '洗衣液'],
            '日用': ['毛巾', '浴巾', '纸巾', '垃圾袋', '保鲜']
        }

        category_counts = {}
        for cat, terms in categories.items():
            count = sum(1 for kw in keywords if any(term in kw for term in terms))
            category_counts[cat] = count
            logger.info(f"  {cat}用品: {count}个")

        return {
            'total': len(keywords),
            'categories': category_counts,
            'keywords': keywords
        }

    except Exception as e:
        logger.error(f"✗ 读取关键词文件失败: {e}")
        return None


def estimate_performance(keyword_count):
    """测试5: 性能预估"""
    logger.info("\n" + "="*70)
    logger.info("测试5: 性能预估分析")
    logger.info("="*70)

    # 基于GitHub项目经验的预估
    avg_time_per_keyword = 20  # 秒
    success_rate = 0.70  # 70%
    items_per_page = 35  # 平均每页商品数

    total_time_min = (keyword_count * avg_time_per_keyword) / 60
    expected_success = int(keyword_count * success_rate)
    expected_items = expected_success * items_per_page

    logger.info(f"基于103个关键词的预估:")
    logger.info(f"  单个关键词平均耗时: {avg_time_per_keyword}秒")
    logger.info(f"  预计总耗时: {total_time_min:.1f}分钟 ({total_time_min/60:.1f}小时)")
    logger.info(f"  预计成功率: {success_rate*100:.0f}%")
    logger.info(f"  预计成功数: {expected_success}个关键词")
    logger.info(f"  预计失败数: {keyword_count - expected_success}个关键词")
    logger.info(f"  预计总商品数: {expected_items}个")

    # 资源消耗预估
    logger.info(f"\n资源消耗预估:")
    logger.info(f"  内存占用: ~300-500MB (Chrome)")
    logger.info(f"  磁盘空间: ~10-20MB (数据)")
    logger.info(f"  网络流量: ~100-200MB")

    # 最佳/最坏情况
    logger.info(f"\n性能区间:")
    logger.info(f"  最佳情况 (80%成功):")
    logger.info(f"    - 成功: {int(keyword_count * 0.8)}个")
    logger.info(f"    - 商品: ~{int(keyword_count * 0.8 * items_per_page)}个")
    logger.info(f"    - 耗时: ~{keyword_count * 15 / 60:.0f}分钟")

    logger.info(f"  一般情况 (70%成功):")
    logger.info(f"    - 成功: {expected_success}个")
    logger.info(f"    - 商品: ~{expected_items}个")
    logger.info(f"    - 耗时: ~{total_time_min:.0f}分钟")

    logger.info(f"  最坏情况 (50%成功):")
    logger.info(f"    - 成功: {int(keyword_count * 0.5)}个")
    logger.info(f"    - 商品: ~{int(keyword_count * 0.5 * items_per_page)}个")
    logger.info(f"    - 耗时: ~{keyword_count * 25 / 60:.0f}分钟")

    return {
        'keyword_count': keyword_count,
        'avg_time_per_keyword': avg_time_per_keyword,
        'total_time_minutes': total_time_min,
        'success_rate': success_rate,
        'expected_success': expected_success,
        'expected_items': expected_items
    }


def test_error_handling():
    """测试6: 错误处理能力"""
    logger.info("\n" + "="*70)
    logger.info("测试6: 错误处理能力")
    logger.info("="*70)

    test_cases = [
        {
            'name': '无效关键词参数',
            'cmd': ['python', 'taobao_ultimate_scraper.py'],
            'should_fail': True
        },
        {
            'name': '无效文件路径',
            'cmd': ['python', 'taobao_ultimate_scraper.py', '-f', 'nonexistent.txt'],
            'should_fail': True
        },
        {
            'name': '无效页数',
            'cmd': ['python', 'taobao_ultimate_scraper.py', '-k', '测试', '-p', '-1'],
            'should_fail': True
        }
    ]

    results = []

    for test in test_cases:
        try:
            result = subprocess.run(
                test['cmd'],
                capture_output=True,
                text=True,
                timeout=10
            )

            failed = result.returncode != 0

            if test['should_fail']:
                passed = failed
                status = "✓" if passed else "✗"
                logger.info(f"{status} {test['name']}: {'正确处理' if passed else '未正确处理'}")
            else:
                passed = not failed
                status = "✓" if passed else "✗"
                logger.info(f"{status} {test['name']}: {'成功' if passed else '失败'}")

            results.append({
                'test': test['name'],
                'passed': passed
            })

        except subprocess.TimeoutExpired:
            logger.warning(f"⚠ {test['name']}: 超时")
            results.append({'test': test['name'], 'passed': False})
        except Exception as e:
            logger.error(f"✗ {test['name']}: {e}")
            results.append({'test': test['name'], 'passed': False})

    return results


def generate_test_report(all_results):
    """生成测试报告"""
    logger.info("\n" + "="*70)
    logger.info("性能测试报告")
    logger.info("="*70)

    report = {
        'test_time': datetime.now().isoformat(),
        'results': all_results,
        'summary': {}
    }

    # 统计
    total_tests = sum(len(r) if isinstance(r, list) else 1 for r in all_results.values())

    logger.info(f"\n总测试项: {total_tests}")

    # 保存报告
    report_file = f"performance_test_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(report_file, 'w', encoding='utf-8') as f:
        json.dump(report, f, ensure_ascii=False, indent=2)

    logger.info(f"✓ 测试报告已保存: {report_file}")

    return report


def main():
    """主测试流程"""
    logger.info("="*70)
    logger.info("淘宝终极爬虫 - 性能测试套件")
    logger.info("="*70)
    logger.info(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    logger.info("")

    start_time = time.time()

    all_results = {}

    # 执行所有测试
    all_results['initialization'] = test_scraper_initialization()
    all_results['cli_interface'] = test_cli_interface()
    all_results['file_structure'] = test_file_structure()

    keywords_info = test_keywords_file()
    all_results['keywords'] = keywords_info

    if keywords_info:
        all_results['performance_estimate'] = estimate_performance(keywords_info['total'])

    all_results['error_handling'] = test_error_handling()

    # 生成报告
    report = generate_test_report(all_results)

    total_time = time.time() - start_time

    logger.info("\n" + "="*70)
    logger.info("测试完成!")
    logger.info("="*70)
    logger.info(f"测试总耗时: {total_time:.2f}秒")
    logger.info(f"日志文件: {log_file}")

    # 显示关键预估
    if 'performance_estimate' in all_results:
        pe = all_results['performance_estimate']
        logger.info("\n" + "="*70)
        logger.info("103个日用品爬取预估")
        logger.info("="*70)
        logger.info(f"预计总耗时: {pe['total_time_minutes']:.1f}分钟")
        logger.info(f"预计成功率: {pe['success_rate']*100:.0f}%")
        logger.info(f"预计成功数: {pe['expected_success']}个关键词")
        logger.info(f"预计商品数: {pe['expected_items']}个")
        logger.info("="*70)

    logger.info("\n⚠ 注意: 实际运行需要先完成登录获取Cookie")
    logger.info("命令: python taobao_ultimate_scraper.py -k 测试 --no-headless --no-cookie")


if __name__ == "__main__":
    main()
