128 lines
5.0 KiB
Python
128 lines
5.0 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Gemini 地下室方案 #4: Google Indexing API — 15分钟收录
|
||
普通 Sitemap 要等几天到几周。Indexing API 让新文章 15 分钟内出现在搜索结果。
|
||
|
||
配置: Google Cloud Console → 启用 Indexing API → 创建服务账号 → 下载 JSON 密钥
|
||
文档: https://developers.google.com/search/apis/indexing-api/v3/using-rest
|
||
"""
|
||
import json, sys, io, os, time
|
||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||
|
||
try:
|
||
from google.oauth2 import service_account
|
||
from google.auth.transport.requests import AuthorizedSession
|
||
except ImportError:
|
||
print("需要安装: pip install google-auth google-auth-oauthlib google-auth-httplib2")
|
||
sys.exit(1)
|
||
|
||
SITE = 'https://blog.quant-view.xyz'
|
||
SCOPES = ['https://www.googleapis.com/auth/indexing']
|
||
|
||
# === 配置: 你的服务账号 JSON 密钥路径 ===
|
||
KEY_FILE = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS',
|
||
os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'gothic-venture-498218-u0-15afe4efe6f3.json'))
|
||
|
||
def get_authenticated_session():
|
||
"""OAuth 2.0 认证 + 代理支持"""
|
||
if not os.path.exists(KEY_FILE):
|
||
print(f"❌ 密钥文件不存在: {KEY_FILE}")
|
||
sys.exit(1)
|
||
credentials = service_account.Credentials.from_service_account_file(KEY_FILE, scopes=SCOPES)
|
||
|
||
# 走本地代理访问 Google API (否则被墙 SSL EOF)
|
||
session = AuthorizedSession(credentials)
|
||
session.trust_env = False # RackNerd US server, no proxy needed
|
||
return session
|
||
|
||
def notify_google(url, notify_type='URL_UPDATED'):
|
||
"""向 Indexing API 发送通知"""
|
||
endpoint = 'https://indexing.googleapis.com/v3/urlNotifications:publish'
|
||
body = json.dumps({'url': url, 'type': notify_type})
|
||
|
||
session = get_authenticated_session()
|
||
resp = session.post(endpoint, data=body, headers={'Content-Type': 'application/json'})
|
||
|
||
result = resp.json()
|
||
if resp.status_code == 200:
|
||
notify_time = result.get('urlNotificationMetadata', {}).get('latestUpdate', {}).get('notifyTime', 'N/A')
|
||
print(f' ✅ {url} → Indexed at {notify_time}')
|
||
return True
|
||
elif resp.status_code == 403:
|
||
print(f' ❌ {url} → 403 Forbidden (可能需要验证域名所有权)')
|
||
return False
|
||
else:
|
||
print(f' ❌ {url} → HTTP {resp.status_code}: {result}')
|
||
return False
|
||
|
||
def push_sitemap_urls():
|
||
"""从 sitemap 读取所有 URL 并批量推送"""
|
||
import urllib.request, re
|
||
|
||
sitemap_url = f'{SITE}/sitemap.xml'
|
||
try:
|
||
req = urllib.request.Request(sitemap_url, headers={'User-Agent': 'GFIL-IndexingAPI/1.0'})
|
||
with urllib.request.urlopen(req, timeout=30) as r:
|
||
xml = r.read().decode()
|
||
urls = re.findall(r'<loc>(https://[^<]+)</loc>', xml)
|
||
print(f'从 sitemap 读取到 {len(urls)} 个 URL\n')
|
||
except Exception as e:
|
||
print(f'❌ 无法读取 sitemap: {e}')
|
||
sys.exit(1)
|
||
|
||
# Google 限制: 每天 200 个 URL
|
||
# Publish endpoint quota: 200 URLs/day per service account
|
||
limit = min(200, len(urls))
|
||
ok = 0
|
||
for url in urls[:limit]:
|
||
if notify_google(url):
|
||
ok += 1
|
||
time.sleep(1) # 1 second between requests
|
||
|
||
print(f'\n成功推送 {ok}/{limit} URLs')
|
||
|
||
def push_single_url(url):
|
||
"""推送单个 URL"""
|
||
return notify_google(url)
|
||
|
||
if __name__ == '__main__':
|
||
import argparse
|
||
p = argparse.ArgumentParser(description='Google Indexing API — 15分钟秒收录')
|
||
p.add_argument('--url', help='推送单个 URL')
|
||
p.add_argument('--all', action='store_true', help='从 sitemap 批量推送(每天限200个)')
|
||
p.add_argument('--daily', action='store_true', help='每日管线模式: 推送核心页面(不超配额)')
|
||
args = p.parse_args()
|
||
|
||
print('=== Google Indexing API Push ===')
|
||
print(f'Site: {SITE}\n')
|
||
|
||
if args.url:
|
||
push_single_url(args.url)
|
||
elif args.all:
|
||
push_sitemap_urls()
|
||
elif args.daily:
|
||
# Gemini: 只推 8 个筒仓枢纽页,让 Google 顺内链自然爬取其余页
|
||
# 高频推送会触发 Indexing API 滥用封禁
|
||
silo_hubs = [
|
||
f'{SITE}/tools/',
|
||
f'{SITE}/tools/position-sizing-ultimate-guide.html',
|
||
f'{SITE}/tools/forex-trading-beginners.html',
|
||
f'{SITE}/tools/forex-market-hours.html',
|
||
f'{SITE}/tools/candlestick-trading-guide.html',
|
||
f'{SITE}/tools/terminal-tools.html',
|
||
f'{SITE}/tools/live-market-overview.html',
|
||
f'{SITE}/tools/forex-trading-glossary.html',
|
||
]
|
||
ok = 0
|
||
for url in silo_hubs:
|
||
if notify_google(url):
|
||
ok += 1
|
||
time.sleep(1.0)
|
||
print(f'\nDaily push: {ok}/{len(silo_hubs)} silo hubs (sub-pages via natural crawl)')
|
||
else:
|
||
print('用法:')
|
||
print(' python indexing_api_push.py --url URL')
|
||
print(' python indexing_api_push.py --all (批量sitemap, 每天限200)')
|
||
print(' python indexing_api_push.py --daily (8个筒仓枢纽)')
|