Update README
This commit is contained in:
156
deploy_scripts/gsc_index_monitor.py
Normal file
156
deploy_scripts/gsc_index_monitor.py
Normal file
@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
GSC 索引收割监控 — Google Search Console API
|
||||
每天检查: 已编入索引 vs 已发现未索引比例 + 各语言AIO展现量
|
||||
用法: python gsc_index_monitor.py
|
||||
"""
|
||||
import sys, io, os, json, datetime
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||
# RackNerd is US-based — no proxy needed
|
||||
|
||||
from google.oauth2 import service_account
|
||||
from google.auth.transport.requests import AuthorizedSession
|
||||
|
||||
SITE = 'https://blog.quant-view.xyz'
|
||||
SITE_URL = 'sc-domain:blog.quant-view.xyz'
|
||||
KEY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..',
|
||||
'gothic-venture-498218-u0-15afe4efe6f3.json')
|
||||
|
||||
def get_session(scopes):
|
||||
creds = service_account.Credentials.from_service_account_file(KEY_FILE, scopes=scopes)
|
||||
s = AuthorizedSession(creds)
|
||||
# No proxy — RackNerd US server has direct internet access
|
||||
return s
|
||||
|
||||
def check_indexing_status():
|
||||
"""拉取索引状态: 已编入索引 / 已发现未索引"""
|
||||
# Use Search Console URL Inspection API
|
||||
# For aggregate: use Webmasters v3 Search Analytics
|
||||
session = get_session(['https://www.googleapis.com/auth/webmasters.readonly'])
|
||||
|
||||
today = datetime.date.today()
|
||||
week_ago = today - datetime.timedelta(days=7)
|
||||
|
||||
# Search Analytics: get indexed page count by checking impressions > 0
|
||||
body = {
|
||||
'startDate': week_ago.isoformat(),
|
||||
'endDate': today.isoformat(),
|
||||
'dimensions': ['page', 'country'],
|
||||
'rowLimit': 500,
|
||||
}
|
||||
|
||||
resp = session.post(
|
||||
f'https://www.googleapis.com/webmasters/v3/sites/{SITE_URL}/searchAnalytics/query',
|
||||
data=json.dumps(body),
|
||||
headers={'Content-Type': 'application/json'},
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if resp.status_code != 200:
|
||||
print(f'Search Analytics API error: {resp.status_code}')
|
||||
print(resp.text[:500])
|
||||
return None
|
||||
|
||||
data = resp.json()
|
||||
rows = data.get('rows', [])
|
||||
|
||||
# Count unique pages that got impressions
|
||||
indexed_pages = set()
|
||||
countries = {}
|
||||
for row in rows:
|
||||
page = row['keys'][0]
|
||||
country = row['keys'][1]
|
||||
indexed_pages.add(page)
|
||||
countries[country] = countries.get(country, 0) + row.get('impressions', 0)
|
||||
|
||||
# Also get total pages from sitemap
|
||||
import urllib.request, re
|
||||
sitemap_url = f'{SITE}/sitemap.xml'
|
||||
try:
|
||||
req = urllib.request.Request(sitemap_url, headers={'User-Agent': 'GFIL-GSC/1.0'})
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
xml = r.read().decode()
|
||||
total_urls = len(re.findall(r'<loc>(https://[^<]+)</loc>', xml))
|
||||
except:
|
||||
total_urls = 259 # Fallback
|
||||
|
||||
# Also pull per-language stats
|
||||
lang_stats = {}
|
||||
for lang in ['en', 'zh', 'es', 'ar']:
|
||||
if lang == 'en':
|
||||
lang_pages = [p for p in indexed_pages if '/zh/' not in p and '/es/' not in p and '/ar/' not in p]
|
||||
else:
|
||||
prefix = f'{SITE}/tools/{lang}/'
|
||||
lang_pages = [p for p in indexed_pages if prefix in p or f'/{lang}/' in p]
|
||||
lang_stats[lang] = len(lang_pages)
|
||||
|
||||
return {
|
||||
'date': today.isoformat(),
|
||||
'total_sitemap_urls': total_urls,
|
||||
'indexed_pages': len(indexed_pages),
|
||||
'index_ratio': f'{len(indexed_pages)}/{total_urls} = {len(indexed_pages)*100//total_urls}%',
|
||||
'discovered_not_indexed': total_urls - len(indexed_pages),
|
||||
'countries': dict(sorted(countries.items(), key=lambda x: x[1], reverse=True)[:10]),
|
||||
'per_language': lang_stats,
|
||||
'total_impressions': sum(row.get('impressions', 0) for row in rows),
|
||||
'total_clicks': sum(row.get('clicks', 0) for row in rows),
|
||||
}
|
||||
|
||||
def check_sitemap_status():
|
||||
"""Check sitemap submission status in GSC"""
|
||||
session = get_session(['https://www.googleapis.com/auth/webmasters.readonly'])
|
||||
resp = session.get(
|
||||
f'https://www.googleapis.com/webmasters/v3/sites/{SITE_URL}/sitemaps',
|
||||
timeout=30
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
sitemaps = data.get('sitemap', [])
|
||||
results = []
|
||||
for s in sitemaps:
|
||||
results.append({
|
||||
'path': s.get('path', ''),
|
||||
'submitted': s.get('lastSubmitted', 'N/A'),
|
||||
'downloaded': s.get('lastDownloaded', 'N/A'),
|
||||
'urls': s.get('contents', [{}])[0].get('submitted', 0) if s.get('contents') else 0,
|
||||
'warnings': s.get('warnings', 0),
|
||||
'errors': s.get('errors', 0),
|
||||
})
|
||||
return results
|
||||
return None
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(f'=== GSC Index Harvest Monitor ===')
|
||||
print(f'Site: {SITE}')
|
||||
print(f'Time: {datetime.datetime.now().isoformat()}\n')
|
||||
|
||||
if not os.path.exists(KEY_FILE):
|
||||
print('Service account key not found. Skipping GSC API.')
|
||||
sys.exit(0)
|
||||
|
||||
# 1. Indexing status
|
||||
print('--- Index Status ---')
|
||||
stats = check_indexing_status()
|
||||
if stats:
|
||||
print(f' Sitemap URLs: {stats["total_sitemap_urls"]}')
|
||||
print(f' Indexed (7d imp): {stats["indexed_pages"]}')
|
||||
print(f' Index Ratio: {stats["index_ratio"]}')
|
||||
print(f' Discovered/NotIdx: {stats["discovered_not_indexed"]}')
|
||||
print(f' Total Impressions: {stats["total_impressions"]}')
|
||||
print(f' Total Clicks: {stats["total_clicks"]}')
|
||||
print(f'\n Per Language:')
|
||||
for lang, count in stats['per_language'].items():
|
||||
print(f' {lang}: {count} indexed pages')
|
||||
print(f'\n Top Countries:')
|
||||
for country, imps in stats['countries'].items():
|
||||
print(f' {country}: {imps} impressions')
|
||||
|
||||
# 2. Sitemap status
|
||||
print(f'\n--- Sitemap Status ---')
|
||||
sm_status = check_sitemap_status()
|
||||
if sm_status:
|
||||
for s in sm_status:
|
||||
print(f' {s["path"]}: {s["urls"]} URLs, {s.get("errors",0)} errors, {s.get("warnings",0)} warnings')
|
||||
|
||||
print(f'\n=== Done ===')
|
||||
Reference in New Issue
Block a user