Files
gfil-blog/deploy_scripts/indexing_api_push.py
2026-06-28 17:19:47 +00:00

128 lines
5.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Gemini 地下室方案 #4: Google Indexing API — 15分钟收录
普通 Sitemap 要等几天到几周。Indexing API 让新文章 15 分钟内出现在搜索结果。
配置: Google Cloud Console → 启用 Indexing API → 创建服务账号 → 下载 JSON 密钥
文档: https://developers.google.com/search/apis/indexing-api/v3/using-rest
"""
import json, sys, io, os, time
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
try:
from google.oauth2 import service_account
from google.auth.transport.requests import AuthorizedSession
except ImportError:
print("需要安装: pip install google-auth google-auth-oauthlib google-auth-httplib2")
sys.exit(1)
SITE = 'https://blog.quant-view.xyz'
SCOPES = ['https://www.googleapis.com/auth/indexing']
# === 配置: 你的服务账号 JSON 密钥路径 ===
KEY_FILE = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS',
os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'gothic-venture-498218-u0-15afe4efe6f3.json'))
def get_authenticated_session():
"""OAuth 2.0 认证 + 代理支持"""
if not os.path.exists(KEY_FILE):
print(f"❌ 密钥文件不存在: {KEY_FILE}")
sys.exit(1)
credentials = service_account.Credentials.from_service_account_file(KEY_FILE, scopes=SCOPES)
# 走本地代理访问 Google API (否则被墙 SSL EOF)
session = AuthorizedSession(credentials)
session.trust_env = False # RackNerd US server, no proxy needed
return session
def notify_google(url, notify_type='URL_UPDATED'):
"""向 Indexing API 发送通知"""
endpoint = 'https://indexing.googleapis.com/v3/urlNotifications:publish'
body = json.dumps({'url': url, 'type': notify_type})
session = get_authenticated_session()
resp = session.post(endpoint, data=body, headers={'Content-Type': 'application/json'})
result = resp.json()
if resp.status_code == 200:
notify_time = result.get('urlNotificationMetadata', {}).get('latestUpdate', {}).get('notifyTime', 'N/A')
print(f'{url} → Indexed at {notify_time}')
return True
elif resp.status_code == 403:
print(f'{url} → 403 Forbidden (可能需要验证域名所有权)')
return False
else:
print(f'{url} → HTTP {resp.status_code}: {result}')
return False
def push_sitemap_urls():
"""从 sitemap 读取所有 URL 并批量推送"""
import urllib.request, re
sitemap_url = f'{SITE}/sitemap.xml'
try:
req = urllib.request.Request(sitemap_url, headers={'User-Agent': 'GFIL-IndexingAPI/1.0'})
with urllib.request.urlopen(req, timeout=30) as r:
xml = r.read().decode()
urls = re.findall(r'<loc>(https://[^<]+)</loc>', xml)
print(f'从 sitemap 读取到 {len(urls)} 个 URL\n')
except Exception as e:
print(f'❌ 无法读取 sitemap: {e}')
sys.exit(1)
# Google 限制: 每天 200 个 URL
# Publish endpoint quota: 200 URLs/day per service account
limit = min(200, len(urls))
ok = 0
for url in urls[:limit]:
if notify_google(url):
ok += 1
time.sleep(1) # 1 second between requests
print(f'\n成功推送 {ok}/{limit} URLs')
def push_single_url(url):
"""推送单个 URL"""
return notify_google(url)
if __name__ == '__main__':
import argparse
p = argparse.ArgumentParser(description='Google Indexing API — 15分钟秒收录')
p.add_argument('--url', help='推送单个 URL')
p.add_argument('--all', action='store_true', help='从 sitemap 批量推送每天限200个')
p.add_argument('--daily', action='store_true', help='每日管线模式: 推送核心页面(不超配额)')
args = p.parse_args()
print('=== Google Indexing API Push ===')
print(f'Site: {SITE}\n')
if args.url:
push_single_url(args.url)
elif args.all:
push_sitemap_urls()
elif args.daily:
# Gemini: 只推 8 个筒仓枢纽页,让 Google 顺内链自然爬取其余页
# 高频推送会触发 Indexing API 滥用封禁
silo_hubs = [
f'{SITE}/tools/',
f'{SITE}/tools/position-sizing-ultimate-guide.html',
f'{SITE}/tools/forex-trading-beginners.html',
f'{SITE}/tools/forex-market-hours.html',
f'{SITE}/tools/candlestick-trading-guide.html',
f'{SITE}/tools/terminal-tools.html',
f'{SITE}/tools/live-market-overview.html',
f'{SITE}/tools/forex-trading-glossary.html',
]
ok = 0
for url in silo_hubs:
if notify_google(url):
ok += 1
time.sleep(1.0)
print(f'\nDaily push: {ok}/{len(silo_hubs)} silo hubs (sub-pages via natural crawl)')
else:
print('用法:')
print(' python indexing_api_push.py --url URL')
print(' python indexing_api_push.py --all (批量sitemap, 每天限200)')
print(' python indexing_api_push.py --daily (8个筒仓枢纽)')