#!/usr/bin/env python # -*- coding: utf-8 -*- """ SEO 效果监控脚本 - 检查服务器上的页面可访问性和基本 SEO 指标 - 检查 robots.txt / sitemap.xml 是否正常返回 - 检查各页面的 HTTP 状态码、响应时间 - 检查页面中的关键 SEO 标签是否存在 """ import os import sys import io import json import time import urllib.request import urllib.error sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') SITE_URL = 'https://blog.quant-view.xyz' CHECK_TIMEOUT = 15 # 需要检查的页面 PAGES = [ '/', '/gfil-boss-panel-v70-review.html', '/gold-xauusd-trading-2026.html', '/tradingview-vs-gfil-boss.html', '/why-retail-traders-lose-money.html', '/trading-activity-tracked.html', '/forex-scalping-2026.html', '/institutional-traders-see-market-moves.html', '/ai-driven-market-intelligence.html', '/wti-crude-oil-2026.html', '/gfil-boss-panel-faq.html', '/sitemap.xml', '/robots.txt', ] SEO_CHECKS = [ ('title', ''), ('description', 'meta name="description"'), ('canonical', 'rel="canonical"'), ('og:title', 'property="og:title"'), ('twitter:card', 'name="twitter:card"'), ('structured_data', 'application/ld+json'), ] def check_page(url): """检查单个页面的状态和 SEO""" result = { 'url': url, 'status': None, 'time_ms': None, 'size_bytes': None, 'seo_checks': {}, 'error': None, } try: req = urllib.request.Request(url, headers={ 'User-Agent': 'Mozilla/5.0 (compatible; GFIL-SEO-Bot/1.0)', }) start = time.time() with urllib.request.urlopen(req, timeout=CHECK_TIMEOUT) as resp: elapsed = int((time.time() - start) * 1000) html = resp.read().decode('utf-8', errors='replace') result['status'] = resp.status result['time_ms'] = elapsed result['size_bytes'] = len(html) # SEO 标签检查 for name, pattern in SEO_CHECKS: result['seo_checks'][name] = pattern in html except urllib.error.HTTPError as e: result['status'] = e.code result['error'] = str(e) except urllib.error.URLError as e: result['error'] = str(e.reason) except Exception as e: result['error'] = str(e) return result def generate_report(results): """生成监控报告""" total = len(results) ok = sum(1 for r in results if r['status'] == 200) errors = [r for r in results if r['status'] != 200 or r['error']] report = [] report.append('=' * 60) report.append(f'SEO 监控报告 - {time.strftime("%Y-%m-%d %H:%M")}') report.append('=' * 60) report.append(f'站点: {SITE_URL}') report.append(f'页面总数: {total}') report.append(f'正常: {ok}') report.append(f'异常: {len(errors)}') report.append('') if errors: report.append('--- 异常页面 ---') for r in errors: report.append(f' [{r["status"] or "ERR"}] {r["url"]}') if r['error']: report.append(f' {r["error"]}') report.append('') report.append('--- 各页面详情 ---') for r in results: status_str = str(r['status']) if r['status'] else 'ERR' time_str = f'{r["time_ms"]}ms' if r['time_ms'] else 'N/A' report.append(f' [{status_str}] {r["url"]} ({time_str})') if r['seo_checks']: missing = [k for k, v in r['seo_checks'].items() if not v] if missing: report.append(f' 缺少 SEO 标签: {", ".join(missing)}') if r['error']: report.append(f' 错误: {r["error"]}') report.append('') report.append('=' * 60) report_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'output', 'seo_report.txt') with open(report_path, 'w', encoding='utf-8') as f: f.write('\n'.join(report)) print('\n'.join(report)) print(f'\n报告已保存: {report_path}') return results if __name__ == '__main__': print(f'开始 SEO 检查: {SITE_URL}\n') results = [] for i, path in enumerate(PAGES, 1): url = f'{SITE_URL}{path}' print(f'[{i}/{len(PAGES)}] 检查: {path}') result = check_page(url) results.append(result) status_icon = 'OK' if result['status'] == 200 else 'FAIL' time_str = f'{result["time_ms"]}ms' if result['time_ms'] else 'N/A' print(f' -> {status_icon} [{result["status"]}] {time_str}') if result['error']: print(f' -> ERROR: {result["error"]}') # 加一点延迟避免被限流 time.sleep(0.5) generate_report(results)