#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
SEO 效果监控脚本
- 检查服务器上的页面可访问性和基本 SEO 指标
- 检查 robots.txt / sitemap.xml 是否正常返回
- 检查各页面的 HTTP 状态码、响应时间
- 检查页面中的关键 SEO 标签是否存在
"""

import os
import sys
import io
import json
import time
import urllib.request
import urllib.error

sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

SITE_URL = 'https://blog.quant-view.xyz'
CHECK_TIMEOUT = 15

# 需要检查的页面
PAGES = [
    '/',
    '/gfil-boss-panel-v70-review.html',
    '/gold-xauusd-trading-2026.html',
    '/tradingview-vs-gfil-boss.html',
    '/why-retail-traders-lose-money.html',
    '/trading-activity-tracked.html',
    '/forex-scalping-2026.html',
    '/institutional-traders-see-market-moves.html',
    '/ai-driven-market-intelligence.html',
    '/wti-crude-oil-2026.html',
    '/gfil-boss-panel-faq.html',
    '/sitemap.xml',
    '/robots.txt',
]

SEO_CHECKS = [
    ('title', '<title>'),
    ('description', 'meta name="description"'),
    ('canonical', 'rel="canonical"'),
    ('og:title', 'property="og:title"'),
    ('twitter:card', 'name="twitter:card"'),
    ('structured_data', 'application/ld+json'),
]


def check_page(url):
    """检查单个页面的状态和 SEO"""
    result = {
        'url': url,
        'status': None,
        'time_ms': None,
        'size_bytes': None,
        'seo_checks': {},
        'error': None,
    }

    try:
        req = urllib.request.Request(url, headers={
            'User-Agent': 'Mozilla/5.0 (compatible; GFIL-SEO-Bot/1.0)',
        })
        start = time.time()
        with urllib.request.urlopen(req, timeout=CHECK_TIMEOUT) as resp:
            elapsed = int((time.time() - start) * 1000)
            html = resp.read().decode('utf-8', errors='replace')

        result['status'] = resp.status
        result['time_ms'] = elapsed
        result['size_bytes'] = len(html)

        # SEO 标签检查
        for name, pattern in SEO_CHECKS:
            result['seo_checks'][name] = pattern in html

    except urllib.error.HTTPError as e:
        result['status'] = e.code
        result['error'] = str(e)
    except urllib.error.URLError as e:
        result['error'] = str(e.reason)
    except Exception as e:
        result['error'] = str(e)

    return result


def generate_report(results):
    """生成监控报告"""
    total = len(results)
    ok = sum(1 for r in results if r['status'] == 200)
    errors = [r for r in results if r['status'] != 200 or r['error']]

    report = []
    report.append('=' * 60)
    report.append(f'SEO 监控报告 - {time.strftime("%Y-%m-%d %H:%M")}')
    report.append('=' * 60)
    report.append(f'站点: {SITE_URL}')
    report.append(f'页面总数: {total}')
    report.append(f'正常: {ok}')
    report.append(f'异常: {len(errors)}')
    report.append('')

    if errors:
        report.append('--- 异常页面 ---')
        for r in errors:
            report.append(f'  [{r["status"] or "ERR"}] {r["url"]}')
            if r['error']:
                report.append(f'         {r["error"]}')
        report.append('')

    report.append('--- 各页面详情 ---')
    for r in results:
        status_str = str(r['status']) if r['status'] else 'ERR'
        time_str = f'{r["time_ms"]}ms' if r['time_ms'] else 'N/A'
        report.append(f'  [{status_str}] {r["url"]} ({time_str})')
        if r['seo_checks']:
            missing = [k for k, v in r['seo_checks'].items() if not v]
            if missing:
                report.append(f'         缺少 SEO 标签: {", ".join(missing)}')
        if r['error']:
            report.append(f'         错误: {r["error"]}')

    report.append('')
    report.append('=' * 60)

    report_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               '..', 'output', 'seo_report.txt')
    with open(report_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(report))

    print('\n'.join(report))
    print(f'\n报告已保存: {report_path}')
    return results


if __name__ == '__main__':
    print(f'开始 SEO 检查: {SITE_URL}\n')

    results = []
    for i, path in enumerate(PAGES, 1):
        url = f'{SITE_URL}{path}'
        print(f'[{i}/{len(PAGES)}] 检查: {path}')
        result = check_page(url)
        results.append(result)
        status_icon = 'OK' if result['status'] == 200 else 'FAIL'
        time_str = f'{result["time_ms"]}ms' if result['time_ms'] else 'N/A'
        print(f'  -> {status_icon} [{result["status"]}] {time_str}')
        if result['error']:
            print(f'  -> ERROR: {result["error"]}')
        # 加一点延迟避免被限流
        time.sleep(0.5)

    generate_report(results)