Update README
This commit is contained in:
127
deploy_scripts/indexing_api_push.py
Normal file
127
deploy_scripts/indexing_api_push.py
Normal file
@ -0,0 +1,127 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Gemini 地下室方案 #4: Google Indexing API — 15分钟收录
|
||||
普通 Sitemap 要等几天到几周。Indexing API 让新文章 15 分钟内出现在搜索结果。
|
||||
|
||||
配置: Google Cloud Console → 启用 Indexing API → 创建服务账号 → 下载 JSON 密钥
|
||||
文档: https://developers.google.com/search/apis/indexing-api/v3/using-rest
|
||||
"""
|
||||
import json, sys, io, os, time
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
||||
|
||||
try:
|
||||
from google.oauth2 import service_account
|
||||
from google.auth.transport.requests import AuthorizedSession
|
||||
except ImportError:
|
||||
print("需要安装: pip install google-auth google-auth-oauthlib google-auth-httplib2")
|
||||
sys.exit(1)
|
||||
|
||||
SITE = 'https://blog.quant-view.xyz'
|
||||
SCOPES = ['https://www.googleapis.com/auth/indexing']
|
||||
|
||||
# === 配置: 你的服务账号 JSON 密钥路径 ===
|
||||
KEY_FILE = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS',
|
||||
os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'gothic-venture-498218-u0-15afe4efe6f3.json'))
|
||||
|
||||
def get_authenticated_session():
|
||||
"""OAuth 2.0 认证 + 代理支持"""
|
||||
if not os.path.exists(KEY_FILE):
|
||||
print(f"❌ 密钥文件不存在: {KEY_FILE}")
|
||||
sys.exit(1)
|
||||
credentials = service_account.Credentials.from_service_account_file(KEY_FILE, scopes=SCOPES)
|
||||
|
||||
# 走本地代理访问 Google API (否则被墙 SSL EOF)
|
||||
session = AuthorizedSession(credentials)
|
||||
session.trust_env = False # RackNerd US server, no proxy needed
|
||||
return session
|
||||
|
||||
def notify_google(url, notify_type='URL_UPDATED'):
|
||||
"""向 Indexing API 发送通知"""
|
||||
endpoint = 'https://indexing.googleapis.com/v3/urlNotifications:publish'
|
||||
body = json.dumps({'url': url, 'type': notify_type})
|
||||
|
||||
session = get_authenticated_session()
|
||||
resp = session.post(endpoint, data=body, headers={'Content-Type': 'application/json'})
|
||||
|
||||
result = resp.json()
|
||||
if resp.status_code == 200:
|
||||
notify_time = result.get('urlNotificationMetadata', {}).get('latestUpdate', {}).get('notifyTime', 'N/A')
|
||||
print(f' ✅ {url} → Indexed at {notify_time}')
|
||||
return True
|
||||
elif resp.status_code == 403:
|
||||
print(f' ❌ {url} → 403 Forbidden (可能需要验证域名所有权)')
|
||||
return False
|
||||
else:
|
||||
print(f' ❌ {url} → HTTP {resp.status_code}: {result}')
|
||||
return False
|
||||
|
||||
def push_sitemap_urls():
|
||||
"""从 sitemap 读取所有 URL 并批量推送"""
|
||||
import urllib.request, re
|
||||
|
||||
sitemap_url = f'{SITE}/sitemap.xml'
|
||||
try:
|
||||
req = urllib.request.Request(sitemap_url, headers={'User-Agent': 'GFIL-IndexingAPI/1.0'})
|
||||
with urllib.request.urlopen(req, timeout=30) as r:
|
||||
xml = r.read().decode()
|
||||
urls = re.findall(r'<loc>(https://[^<]+)</loc>', xml)
|
||||
print(f'从 sitemap 读取到 {len(urls)} 个 URL\n')
|
||||
except Exception as e:
|
||||
print(f'❌ 无法读取 sitemap: {e}')
|
||||
sys.exit(1)
|
||||
|
||||
# Google 限制: 每天 200 个 URL
|
||||
# Publish endpoint quota: 200 URLs/day per service account
|
||||
limit = min(200, len(urls))
|
||||
ok = 0
|
||||
for url in urls[:limit]:
|
||||
if notify_google(url):
|
||||
ok += 1
|
||||
time.sleep(1) # 1 second between requests
|
||||
|
||||
print(f'\n成功推送 {ok}/{limit} URLs')
|
||||
|
||||
def push_single_url(url):
|
||||
"""推送单个 URL"""
|
||||
return notify_google(url)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
p = argparse.ArgumentParser(description='Google Indexing API — 15分钟秒收录')
|
||||
p.add_argument('--url', help='推送单个 URL')
|
||||
p.add_argument('--all', action='store_true', help='从 sitemap 批量推送(每天限200个)')
|
||||
p.add_argument('--daily', action='store_true', help='每日管线模式: 推送核心页面(不超配额)')
|
||||
args = p.parse_args()
|
||||
|
||||
print('=== Google Indexing API Push ===')
|
||||
print(f'Site: {SITE}\n')
|
||||
|
||||
if args.url:
|
||||
push_single_url(args.url)
|
||||
elif args.all:
|
||||
push_sitemap_urls()
|
||||
elif args.daily:
|
||||
# Gemini: 只推 8 个筒仓枢纽页,让 Google 顺内链自然爬取其余页
|
||||
# 高频推送会触发 Indexing API 滥用封禁
|
||||
silo_hubs = [
|
||||
f'{SITE}/tools/',
|
||||
f'{SITE}/tools/position-sizing-ultimate-guide.html',
|
||||
f'{SITE}/tools/forex-trading-beginners.html',
|
||||
f'{SITE}/tools/forex-market-hours.html',
|
||||
f'{SITE}/tools/candlestick-trading-guide.html',
|
||||
f'{SITE}/tools/terminal-tools.html',
|
||||
f'{SITE}/tools/live-market-overview.html',
|
||||
f'{SITE}/tools/forex-trading-glossary.html',
|
||||
]
|
||||
ok = 0
|
||||
for url in silo_hubs:
|
||||
if notify_google(url):
|
||||
ok += 1
|
||||
time.sleep(1.0)
|
||||
print(f'\nDaily push: {ok}/{len(silo_hubs)} silo hubs (sub-pages via natural crawl)')
|
||||
else:
|
||||
print('用法:')
|
||||
print(' python indexing_api_push.py --url URL')
|
||||
print(' python indexing_api_push.py --all (批量sitemap, 每天限200)')
|
||||
print(' python indexing_api_push.py --daily (8个筒仓枢纽)')
|
||||
Reference in New Issue
Block a user