gfil-blog/deploy_scripts/seo_monitor.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
SEO 效果监控脚本
- 检查服务器上的页面可访问性和基本 SEO 指标
- 检查 robots.txt / sitemap.xml 是否正常返回
- 检查各页面的 HTTP 状态码、响应时间
- 检查页面中的关键 SEO 标签是否存在
"""

import os
import sys
import io
import json
import time
import urllib.request
import urllib.error

sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

SITE_URL = 'https://blog.quant-view.xyz'
CHECK_TIMEOUT = 15

# 需要检查的页面
PAGES = [
    '/',
    '/gfil-boss-panel-v70-review.html',
    '/gold-xauusd-trading-2026.html',
    '/tradingview-vs-gfil-boss.html',
    '/why-retail-traders-lose-money.html',
    '/trading-activity-tracked.html',
    '/forex-scalping-2026.html',
    '/institutional-traders-see-market-moves.html',
    '/ai-driven-market-intelligence.html',
    '/wti-crude-oil-2026.html',
    '/gfil-boss-panel-faq.html',
    '/sitemap.xml',
    '/robots.txt',
]

SEO_CHECKS = [
    ('title', '<title>'),
    ('description', 'meta name="description"'),
    ('canonical', 'rel="canonical"'),
    ('og:title', 'property="og:title"'),
    ('twitter:card', 'name="twitter:card"'),
    ('structured_data', 'application/ld+json'),
]


def check_page(url):
    """检查单个页面的状态和 SEO"""
    result = {
        'url': url,
        'status': None,
        'time_ms': None,
        'size_bytes': None,
        'seo_checks': {},
        'error': None,
    }

    try:
        req = urllib.request.Request(url, headers={
            'User-Agent': 'Mozilla/5.0 (compatible; GFIL-SEO-Bot/1.0)',
        })
        start = time.time()
        with urllib.request.urlopen(req, timeout=CHECK_TIMEOUT) as resp:
            elapsed = int((time.time() - start) * 1000)
            html = resp.read().decode('utf-8', errors='replace')

        result['status'] = resp.status
        result['time_ms'] = elapsed
        result['size_bytes'] = len(html)

        # SEO 标签检查
        for name, pattern in SEO_CHECKS:
            result['seo_checks'][name] = pattern in html

    except urllib.error.HTTPError as e:
        result['status'] = e.code
        result['error'] = str(e)
    except urllib.error.URLError as e:
        result['error'] = str(e.reason)
    except Exception as e:
        result['error'] = str(e)

    return result


def generate_report(results):
    """生成监控报告"""
    total = len(results)
    ok = sum(1 for r in results if r['status'] == 200)
    errors = [r for r in results if r['status'] != 200 or r['error']]

    report = []
    report.append('=' * 60)
    report.append(f'SEO 监控报告 - {time.strftime("%Y-%m-%d %H:%M")}')
    report.append('=' * 60)
    report.append(f'站点: {SITE_URL}')
    report.append(f'页面总数: {total}')
    report.append(f'正常: {ok}')
    report.append(f'异常: {len(errors)}')
    report.append('')

    if errors:
        report.append('--- 异常页面 ---')
        for r in errors:
            report.append(f'  [{r["status"] or "ERR"}] {r["url"]}')
            if r['error']:
                report.append(f'         {r["error"]}')
        report.append('')

    report.append('--- 各页面详情 ---')
    for r in results:
        status_str = str(r['status']) if r['status'] else 'ERR'
        time_str = f'{r["time_ms"]}ms' if r['time_ms'] else 'N/A'
        report.append(f'  [{status_str}] {r["url"]} ({time_str})')
        if r['seo_checks']:
            missing = [k for k, v in r['seo_checks'].items() if not v]
            if missing:
                report.append(f'         缺少 SEO 标签: {", ".join(missing)}')
        if r['error']:
            report.append(f'         错误: {r["error"]}')

    report.append('')
    report.append('=' * 60)

    report_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                               '..', 'output', 'seo_report.txt')
    with open(report_path, 'w', encoding='utf-8') as f:
        f.write('\n'.join(report))

    print('\n'.join(report))
    print(f'\n报告已保存: {report_path}')
    return results


if __name__ == '__main__':
    print(f'开始 SEO 检查: {SITE_URL}\n')

    results = []
    for i, path in enumerate(PAGES, 1):
        url = f'{SITE_URL}{path}'
        print(f'[{i}/{len(PAGES)}] 检查: {path}')
        result = check_page(url)
        results.append(result)
        status_icon = 'OK' if result['status'] == 200 else 'FAIL'
        time_str = f'{result["time_ms"]}ms' if result['time_ms'] else 'N/A'
        print(f'  -> {status_icon} [{result["status"]}] {time_str}')
        if result['error']:
            print(f'  -> ERROR: {result["error"]}')
        # 加一点延迟避免被限流
        time.sleep(0.5)

    generate_report(results)
Update README 2026-06-28 17:19:47 +00:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`
			`"""`
			`SEO 效果监控脚本`
			`- 检查服务器上的页面可访问性和基本 SEO 指标`
			`- 检查 robots.txt / sitemap.xml 是否正常返回`
			`- 检查各页面的 HTTP 状态码、响应时间`
			`- 检查页面中的关键 SEO 标签是否存在`
			`"""`

			`import os`
			`import sys`
			`import io`
			`import json`
			`import time`
			`import urllib.request`
			`import urllib.error`

			`sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')`

			`SITE_URL = 'https://blog.quant-view.xyz'`
			`CHECK_TIMEOUT = 15`

			`# 需要检查的页面`
			`PAGES = [`
			`'/',`
			`'/gfil-boss-panel-v70-review.html',`
			`'/gold-xauusd-trading-2026.html',`
			`'/tradingview-vs-gfil-boss.html',`
			`'/why-retail-traders-lose-money.html',`
			`'/trading-activity-tracked.html',`
			`'/forex-scalping-2026.html',`
			`'/institutional-traders-see-market-moves.html',`
			`'/ai-driven-market-intelligence.html',`
			`'/wti-crude-oil-2026.html',`
			`'/gfil-boss-panel-faq.html',`
			`'/sitemap.xml',`
			`'/robots.txt',`
			`]`

			`SEO_CHECKS = [`
			`('title', '<title>'),`
			`('description', 'meta name="description"'),`
			`('canonical', 'rel="canonical"'),`
			`('og:title', 'property="og:title"'),`
			`('twitter:card', 'name="twitter:card"'),`
			`('structured_data', 'application/ld+json'),`
			`]`


			`def check_page(url):`
			`"""检查单个页面的状态和 SEO"""`
			`result = {`
			`'url': url,`
			`'status': None,`
			`'time_ms': None,`
			`'size_bytes': None,`
			`'seo_checks': {},`
			`'error': None,`
			`}`

			`try:`
			`req = urllib.request.Request(url, headers={`
			`'User-Agent': 'Mozilla/5.0 (compatible; GFIL-SEO-Bot/1.0)',`
			`})`
			`start = time.time()`
			`with urllib.request.urlopen(req, timeout=CHECK_TIMEOUT) as resp:`
			`elapsed = int((time.time() - start) * 1000)`
			`html = resp.read().decode('utf-8', errors='replace')`

			`result['status'] = resp.status`
			`result['time_ms'] = elapsed`
			`result['size_bytes'] = len(html)`

			`# SEO 标签检查`
			`for name, pattern in SEO_CHECKS:`
			`result['seo_checks'][name] = pattern in html`

			`except urllib.error.HTTPError as e:`
			`result['status'] = e.code`
			`result['error'] = str(e)`
			`except urllib.error.URLError as e:`
			`result['error'] = str(e.reason)`
			`except Exception as e:`
			`result['error'] = str(e)`

			`return result`


			`def generate_report(results):`
			`"""生成监控报告"""`
			`total = len(results)`
			`ok = sum(1 for r in results if r['status'] == 200)`
			`errors = [r for r in results if r['status'] != 200 or r['error']]`

			`report = []`
			`report.append('=' * 60)`
			`report.append(f'SEO 监控报告 - {time.strftime("%Y-%m-%d %H:%M")}')`
			`report.append('=' * 60)`
			`report.append(f'站点: {SITE_URL}')`
			`report.append(f'页面总数: {total}')`
			`report.append(f'正常: {ok}')`
			`report.append(f'异常: {len(errors)}')`
			`report.append('')`

			`if errors:`
			`report.append('--- 异常页面 ---')`
			`for r in errors:`
			`report.append(f' [{r["status"] or "ERR"}] {r["url"]}')`
			`if r['error']:`
			`report.append(f' {r["error"]}')`
			`report.append('')`

			`report.append('--- 各页面详情 ---')`
			`for r in results:`
			`status_str = str(r['status']) if r['status'] else 'ERR'`
			`time_str = f'{r["time_ms"]}ms' if r['time_ms'] else 'N/A'`
			`report.append(f' [{status_str}] {r["url"]} ({time_str})')`
			`if r['seo_checks']:`
			`missing = [k for k, v in r['seo_checks'].items() if not v]`
			`if missing:`
			`report.append(f' 缺少 SEO 标签: {", ".join(missing)}')`
			`if r['error']:`
			`report.append(f' 错误: {r["error"]}')`

			`report.append('')`
			`report.append('=' * 60)`

			`report_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),`
			`'..', 'output', 'seo_report.txt')`
			`with open(report_path, 'w', encoding='utf-8') as f:`
			`f.write('\n'.join(report))`

			`print('\n'.join(report))`
			`print(f'\n报告已保存: {report_path}')`
			`return results`


			`if __name__ == '__main__':`
			`print(f'开始 SEO 检查: {SITE_URL}\n')`

			`results = []`
			`for i, path in enumerate(PAGES, 1):`
			`url = f'{SITE_URL}{path}'`
			`print(f'[{i}/{len(PAGES)}] 检查: {path}')`
			`result = check_page(url)`
			`results.append(result)`
			`status_icon = 'OK' if result['status'] == 200 else 'FAIL'`
			`time_str = f'{result["time_ms"]}ms' if result['time_ms'] else 'N/A'`
			`print(f' -> {status_icon} [{result["status"]}] {time_str}')`
			`if result['error']:`
			`print(f' -> ERROR: {result["error"]}')`
			`# 加一点延迟避免被限流`
			`time.sleep(0.5)`

			`generate_report(results)`