#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
SEO 效果监控脚本
- 检查服务器上的页面可访问性和基本 SEO 指标
- 检查 robots.txt / sitemap.xml 是否正常返回
- 检查各页面的 HTTP 状态码、响应时间
- 检查页面中的关键 SEO 标签是否存在
"""
import os
import sys
import io
import json
import time
import urllib.request
import urllib.error
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
SITE_URL = 'https://blog.quant-view.xyz'
CHECK_TIMEOUT = 15
# 需要检查的页面
PAGES = [
'/',
'/gfil-boss-panel-v70-review.html',
'/gold-xauusd-trading-2026.html',
'/tradingview-vs-gfil-boss.html',
'/why-retail-traders-lose-money.html',
'/trading-activity-tracked.html',
'/forex-scalping-2026.html',
'/institutional-traders-see-market-moves.html',
'/ai-driven-market-intelligence.html',
'/wti-crude-oil-2026.html',
'/gfil-boss-panel-faq.html',
'/sitemap.xml',
'/robots.txt',
]
SEO_CHECKS = [
('title', '
'),
('description', 'meta name="description"'),
('canonical', 'rel="canonical"'),
('og:title', 'property="og:title"'),
('twitter:card', 'name="twitter:card"'),
('structured_data', 'application/ld+json'),
]
def check_page(url):
"""检查单个页面的状态和 SEO"""
result = {
'url': url,
'status': None,
'time_ms': None,
'size_bytes': None,
'seo_checks': {},
'error': None,
}
try:
req = urllib.request.Request(url, headers={
'User-Agent': 'Mozilla/5.0 (compatible; GFIL-SEO-Bot/1.0)',
})
start = time.time()
with urllib.request.urlopen(req, timeout=CHECK_TIMEOUT) as resp:
elapsed = int((time.time() - start) * 1000)
html = resp.read().decode('utf-8', errors='replace')
result['status'] = resp.status
result['time_ms'] = elapsed
result['size_bytes'] = len(html)
# SEO 标签检查
for name, pattern in SEO_CHECKS:
result['seo_checks'][name] = pattern in html
except urllib.error.HTTPError as e:
result['status'] = e.code
result['error'] = str(e)
except urllib.error.URLError as e:
result['error'] = str(e.reason)
except Exception as e:
result['error'] = str(e)
return result
def generate_report(results):
"""生成监控报告"""
total = len(results)
ok = sum(1 for r in results if r['status'] == 200)
errors = [r for r in results if r['status'] != 200 or r['error']]
report = []
report.append('=' * 60)
report.append(f'SEO 监控报告 - {time.strftime("%Y-%m-%d %H:%M")}')
report.append('=' * 60)
report.append(f'站点: {SITE_URL}')
report.append(f'页面总数: {total}')
report.append(f'正常: {ok}')
report.append(f'异常: {len(errors)}')
report.append('')
if errors:
report.append('--- 异常页面 ---')
for r in errors:
report.append(f' [{r["status"] or "ERR"}] {r["url"]}')
if r['error']:
report.append(f' {r["error"]}')
report.append('')
report.append('--- 各页面详情 ---')
for r in results:
status_str = str(r['status']) if r['status'] else 'ERR'
time_str = f'{r["time_ms"]}ms' if r['time_ms'] else 'N/A'
report.append(f' [{status_str}] {r["url"]} ({time_str})')
if r['seo_checks']:
missing = [k for k, v in r['seo_checks'].items() if not v]
if missing:
report.append(f' 缺少 SEO 标签: {", ".join(missing)}')
if r['error']:
report.append(f' 错误: {r["error"]}')
report.append('')
report.append('=' * 60)
report_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'..', 'output', 'seo_report.txt')
with open(report_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(report))
print('\n'.join(report))
print(f'\n报告已保存: {report_path}')
return results
if __name__ == '__main__':
print(f'开始 SEO 检查: {SITE_URL}\n')
results = []
for i, path in enumerate(PAGES, 1):
url = f'{SITE_URL}{path}'
print(f'[{i}/{len(PAGES)}] 检查: {path}')
result = check_page(url)
results.append(result)
status_icon = 'OK' if result['status'] == 200 else 'FAIL'
time_str = f'{result["time_ms"]}ms' if result['time_ms'] else 'N/A'
print(f' -> {status_icon} [{result["status"]}] {time_str}')
if result['error']:
print(f' -> ERROR: {result["error"]}')
# 加一点延迟避免被限流
time.sleep(0.5)
generate_report(results)