#!/usr/bin/env python3
import json
from collections import Counter, defaultdict

def generate_detailed_report():
    with open('/root/chinese_books.json', 'r', encoding='utf-8') as f:
        chinese_books = json.load(f)
    
    print("=" * 60)
    print("📖 中文书籍数据详细分析报告")
    print("=" * 60)
    
    # 基本统计
    total_books = len(chinese_books)
    print(f"\n📊 基本统计:")
    print(f"中文书籍总数: {total_books:,} 本")
    
    # 文件大小统计
    sizes = [book.get('filesize_reported', 0) for book in chinese_books if book.get('filesize_reported')]
    if sizes:
        total_size = sum(sizes) / (1024**3)  # GB
        avg_size = sum(sizes) / len(sizes) / (1024**2)  # MB
        print(f"总文件大小: {total_size:.2f} GB")
        print(f"平均文件大小: {avg_size:.2f} MB")
    
    # 语言标记统计
    languages = Counter(book.get('language', '未知') for book in chinese_books)
    print(f"\n🌐 语言标记分布:")
    for lang, count in languages.most_common():
        print(f"  {lang}: {count} 本 ({count/total_books*100:.1f}%)")
    
    # 年份趋势分析
    years = [book.get('year', '') for book in chinese_books if book.get('year') and book.get('year').isdigit()]
    year_counts = Counter(years)
    print(f"\n📈 年份趋势 (2000年后):")
    recent_years = {year: count for year, count in year_counts.items() if int(year) >= 2000}
    for year in sorted(recent_years.keys(), reverse=True)[:15]:
        count = recent_years[year]
        bar = "█" * (count // 100) if count >= 100 else "▌"
        print(f"  {year}: {count:4d} 本 {bar}")
    
    # 热门系列
    series = Counter(book.get('series', '') for book in chinese_books if book.get('series'))
    if series:
        print(f"\n📚 热门系列 (前10):")
        for s, count in series.most_common(10):
            print(f"  {s}: {count} 本")
    
    # 作者产量分析
    authors = Counter(book.get('author', '') for book in chinese_books if book.get('author'))
    productive_authors = {author: count for author, count in authors.items() if count >= 10}
    print(f"\n✍️ 高产作者 (10本以上, 前15):")
    for author, count in Counter(productive_authors).most_common(15):
        if author not in ['Unknown', '未知', 'null', 'Desconocido']:
            print(f"  {author}: {count} 本")
    
    # 出版社分析
    publishers = Counter(book.get('publisher', '') for book in chinese_books if book.get('publisher'))
    print(f"\n🏢 主要出版社 (前15):")
    for pub, count in publishers.most_common(15):
        if pub not in ['Unknown', '未知', 'null']:
            print(f"  {pub}: {count} 本")
    
    # 文件格式详细分析
    formats = Counter(book.get('extension', '') for book in chinese_books)
    print(f"\n📄 文件格式分布:")
    for fmt, count in formats.most_common():
        percentage = count / total_books * 100
        print(f"  {fmt.upper()}: {count:5d} 本 ({percentage:5.1f}%)")
    
    # 有ISBN的书籍
    books_with_isbn = [book for book in chinese_books if book.get('isbns')]
    print(f"\n🔢 ISBN信息:")
    print(f"有ISBN的书籍: {len(books_with_isbn)} 本 ({len(books_with_isbn)/total_books*100:.1f}%)")
    
    # 有描述的书籍
    books_with_desc = [book for book in chinese_books if book.get('description')]
    print(f"有描述的书籍: {len(books_with_desc)} 本 ({len(books_with_desc)/total_books*100:.1f}%)")
    
    print("\n" + "=" * 60)
    print("分析完成！")

if __name__ == "__main__":
    generate_detailed_report()
