#!/usr/bin/env python # -*- coding: utf-8 -*- """ Gemini 地下室方案 #4: Google Indexing API — 15分钟收录 普通 Sitemap 要等几天到几周。Indexing API 让新文章 15 分钟内出现在搜索结果。 配置: Google Cloud Console → 启用 Indexing API → 创建服务账号 → 下载 JSON 密钥 文档: https://developers.google.com/search/apis/indexing-api/v3/using-rest """ import json, sys, io, os, time sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') try: from google.oauth2 import service_account from google.auth.transport.requests import AuthorizedSession except ImportError: print("需要安装: pip install google-auth google-auth-oauthlib google-auth-httplib2") sys.exit(1) SITE = 'https://blog.quant-view.xyz' SCOPES = ['https://www.googleapis.com/auth/indexing'] # === 配置: 你的服务账号 JSON 密钥路径 === KEY_FILE = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'gothic-venture-498218-u0-15afe4efe6f3.json')) def get_authenticated_session(): """OAuth 2.0 认证 + 代理支持""" if not os.path.exists(KEY_FILE): print(f"❌ 密钥文件不存在: {KEY_FILE}") sys.exit(1) credentials = service_account.Credentials.from_service_account_file(KEY_FILE, scopes=SCOPES) # 走本地代理访问 Google API (否则被墙 SSL EOF) session = AuthorizedSession(credentials) session.trust_env = False # RackNerd US server, no proxy needed return session def notify_google(url, notify_type='URL_UPDATED'): """向 Indexing API 发送通知""" endpoint = 'https://indexing.googleapis.com/v3/urlNotifications:publish' body = json.dumps({'url': url, 'type': notify_type}) session = get_authenticated_session() resp = session.post(endpoint, data=body, headers={'Content-Type': 'application/json'}) result = resp.json() if resp.status_code == 200: notify_time = result.get('urlNotificationMetadata', {}).get('latestUpdate', {}).get('notifyTime', 'N/A') print(f' ✅ {url} → Indexed at {notify_time}') return True elif resp.status_code == 403: print(f' ❌ {url} → 403 Forbidden (可能需要验证域名所有权)') return False else: print(f' ❌ {url} → HTTP {resp.status_code}: {result}') return False def push_sitemap_urls(): """从 sitemap 读取所有 URL 并批量推送""" import urllib.request, re sitemap_url = f'{SITE}/sitemap.xml' try: req = urllib.request.Request(sitemap_url, headers={'User-Agent': 'GFIL-IndexingAPI/1.0'}) with urllib.request.urlopen(req, timeout=30) as r: xml = r.read().decode() urls = re.findall(r'(https://[^<]+)', xml) print(f'从 sitemap 读取到 {len(urls)} 个 URL\n') except Exception as e: print(f'❌ 无法读取 sitemap: {e}') sys.exit(1) # Google 限制: 每天 200 个 URL # Publish endpoint quota: 200 URLs/day per service account limit = min(200, len(urls)) ok = 0 for url in urls[:limit]: if notify_google(url): ok += 1 time.sleep(1) # 1 second between requests print(f'\n成功推送 {ok}/{limit} URLs') def push_single_url(url): """推送单个 URL""" return notify_google(url) if __name__ == '__main__': import argparse p = argparse.ArgumentParser(description='Google Indexing API — 15分钟秒收录') p.add_argument('--url', help='推送单个 URL') p.add_argument('--all', action='store_true', help='从 sitemap 批量推送(每天限200个)') p.add_argument('--daily', action='store_true', help='每日管线模式: 推送核心页面(不超配额)') args = p.parse_args() print('=== Google Indexing API Push ===') print(f'Site: {SITE}\n') if args.url: push_single_url(args.url) elif args.all: push_sitemap_urls() elif args.daily: # Gemini: 只推 8 个筒仓枢纽页,让 Google 顺内链自然爬取其余页 # 高频推送会触发 Indexing API 滥用封禁 silo_hubs = [ f'{SITE}/tools/', f'{SITE}/tools/position-sizing-ultimate-guide.html', f'{SITE}/tools/forex-trading-beginners.html', f'{SITE}/tools/forex-market-hours.html', f'{SITE}/tools/candlestick-trading-guide.html', f'{SITE}/tools/terminal-tools.html', f'{SITE}/tools/live-market-overview.html', f'{SITE}/tools/forex-trading-glossary.html', ] ok = 0 for url in silo_hubs: if notify_google(url): ok += 1 time.sleep(1.0) print(f'\nDaily push: {ok}/{len(silo_hubs)} silo hubs (sub-pages via natural crawl)') else: print('用法:') print(' python indexing_api_push.py --url URL') print(' python indexing_api_push.py --all (批量sitemap, 每天限200)') print(' python indexing_api_push.py --daily (8个筒仓枢纽)')