157 lines
4.7 KiB
Python
157 lines
4.7 KiB
Python
|
|
#!/usr/bin/env python
|
||
|
|
# -*- coding: utf-8 -*-
|
||
|
|
"""
|
||
|
|
SEO 效果监控脚本
|
||
|
|
- 检查服务器上的页面可访问性和基本 SEO 指标
|
||
|
|
- 检查 robots.txt / sitemap.xml 是否正常返回
|
||
|
|
- 检查各页面的 HTTP 状态码、响应时间
|
||
|
|
- 检查页面中的关键 SEO 标签是否存在
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
import io
|
||
|
|
import json
|
||
|
|
import time
|
||
|
|
import urllib.request
|
||
|
|
import urllib.error
|
||
|
|
|
||
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||
|
|
|
||
|
|
SITE_URL = 'https://blog.quant-view.xyz'
|
||
|
|
CHECK_TIMEOUT = 15
|
||
|
|
|
||
|
|
# 需要检查的页面
|
||
|
|
PAGES = [
|
||
|
|
'/',
|
||
|
|
'/gfil-boss-panel-v70-review.html',
|
||
|
|
'/gold-xauusd-trading-2026.html',
|
||
|
|
'/tradingview-vs-gfil-boss.html',
|
||
|
|
'/why-retail-traders-lose-money.html',
|
||
|
|
'/trading-activity-tracked.html',
|
||
|
|
'/forex-scalping-2026.html',
|
||
|
|
'/institutional-traders-see-market-moves.html',
|
||
|
|
'/ai-driven-market-intelligence.html',
|
||
|
|
'/wti-crude-oil-2026.html',
|
||
|
|
'/gfil-boss-panel-faq.html',
|
||
|
|
'/sitemap.xml',
|
||
|
|
'/robots.txt',
|
||
|
|
]
|
||
|
|
|
||
|
|
SEO_CHECKS = [
|
||
|
|
('title', '<title>'),
|
||
|
|
('description', 'meta name="description"'),
|
||
|
|
('canonical', 'rel="canonical"'),
|
||
|
|
('og:title', 'property="og:title"'),
|
||
|
|
('twitter:card', 'name="twitter:card"'),
|
||
|
|
('structured_data', 'application/ld+json'),
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
def check_page(url):
|
||
|
|
"""检查单个页面的状态和 SEO"""
|
||
|
|
result = {
|
||
|
|
'url': url,
|
||
|
|
'status': None,
|
||
|
|
'time_ms': None,
|
||
|
|
'size_bytes': None,
|
||
|
|
'seo_checks': {},
|
||
|
|
'error': None,
|
||
|
|
}
|
||
|
|
|
||
|
|
try:
|
||
|
|
req = urllib.request.Request(url, headers={
|
||
|
|
'User-Agent': 'Mozilla/5.0 (compatible; GFIL-SEO-Bot/1.0)',
|
||
|
|
})
|
||
|
|
start = time.time()
|
||
|
|
with urllib.request.urlopen(req, timeout=CHECK_TIMEOUT) as resp:
|
||
|
|
elapsed = int((time.time() - start) * 1000)
|
||
|
|
html = resp.read().decode('utf-8', errors='replace')
|
||
|
|
|
||
|
|
result['status'] = resp.status
|
||
|
|
result['time_ms'] = elapsed
|
||
|
|
result['size_bytes'] = len(html)
|
||
|
|
|
||
|
|
# SEO 标签检查
|
||
|
|
for name, pattern in SEO_CHECKS:
|
||
|
|
result['seo_checks'][name] = pattern in html
|
||
|
|
|
||
|
|
except urllib.error.HTTPError as e:
|
||
|
|
result['status'] = e.code
|
||
|
|
result['error'] = str(e)
|
||
|
|
except urllib.error.URLError as e:
|
||
|
|
result['error'] = str(e.reason)
|
||
|
|
except Exception as e:
|
||
|
|
result['error'] = str(e)
|
||
|
|
|
||
|
|
return result
|
||
|
|
|
||
|
|
|
||
|
|
def generate_report(results):
|
||
|
|
"""生成监控报告"""
|
||
|
|
total = len(results)
|
||
|
|
ok = sum(1 for r in results if r['status'] == 200)
|
||
|
|
errors = [r for r in results if r['status'] != 200 or r['error']]
|
||
|
|
|
||
|
|
report = []
|
||
|
|
report.append('=' * 60)
|
||
|
|
report.append(f'SEO 监控报告 - {time.strftime("%Y-%m-%d %H:%M")}')
|
||
|
|
report.append('=' * 60)
|
||
|
|
report.append(f'站点: {SITE_URL}')
|
||
|
|
report.append(f'页面总数: {total}')
|
||
|
|
report.append(f'正常: {ok}')
|
||
|
|
report.append(f'异常: {len(errors)}')
|
||
|
|
report.append('')
|
||
|
|
|
||
|
|
if errors:
|
||
|
|
report.append('--- 异常页面 ---')
|
||
|
|
for r in errors:
|
||
|
|
report.append(f' [{r["status"] or "ERR"}] {r["url"]}')
|
||
|
|
if r['error']:
|
||
|
|
report.append(f' {r["error"]}')
|
||
|
|
report.append('')
|
||
|
|
|
||
|
|
report.append('--- 各页面详情 ---')
|
||
|
|
for r in results:
|
||
|
|
status_str = str(r['status']) if r['status'] else 'ERR'
|
||
|
|
time_str = f'{r["time_ms"]}ms' if r['time_ms'] else 'N/A'
|
||
|
|
report.append(f' [{status_str}] {r["url"]} ({time_str})')
|
||
|
|
if r['seo_checks']:
|
||
|
|
missing = [k for k, v in r['seo_checks'].items() if not v]
|
||
|
|
if missing:
|
||
|
|
report.append(f' 缺少 SEO 标签: {", ".join(missing)}')
|
||
|
|
if r['error']:
|
||
|
|
report.append(f' 错误: {r["error"]}')
|
||
|
|
|
||
|
|
report.append('')
|
||
|
|
report.append('=' * 60)
|
||
|
|
|
||
|
|
report_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||
|
|
'..', 'output', 'seo_report.txt')
|
||
|
|
with open(report_path, 'w', encoding='utf-8') as f:
|
||
|
|
f.write('\n'.join(report))
|
||
|
|
|
||
|
|
print('\n'.join(report))
|
||
|
|
print(f'\n报告已保存: {report_path}')
|
||
|
|
return results
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
print(f'开始 SEO 检查: {SITE_URL}\n')
|
||
|
|
|
||
|
|
results = []
|
||
|
|
for i, path in enumerate(PAGES, 1):
|
||
|
|
url = f'{SITE_URL}{path}'
|
||
|
|
print(f'[{i}/{len(PAGES)}] 检查: {path}')
|
||
|
|
result = check_page(url)
|
||
|
|
results.append(result)
|
||
|
|
status_icon = 'OK' if result['status'] == 200 else 'FAIL'
|
||
|
|
time_str = f'{result["time_ms"]}ms' if result['time_ms'] else 'N/A'
|
||
|
|
print(f' -> {status_icon} [{result["status"]}] {time_str}')
|
||
|
|
if result['error']:
|
||
|
|
print(f' -> ERROR: {result["error"]}')
|
||
|
|
# 加一点延迟避免被限流
|
||
|
|
time.sleep(0.5)
|
||
|
|
|
||
|
|
generate_report(results)
|