Files
gfil-blog/deploy_scripts/submit_all_engines.py
2026-06-28 17:19:47 +00:00

99 lines
3.8 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Submit sitemap to ALL major search engines — beyond just Google/Bing/Yandex.
Covers: DuckDuckGo, Brave Search, Baidu, Naver (Korea), Seznam (Czech), Ecosia.
Ping sitemap + IndexNow for broadest indexing coverage.
"""
import urllib.request, urllib.parse, sys, io, time, json, os
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
SITE = 'https://blog.quant-view.xyz'
SITEMAP = f'{SITE}/sitemap.xml'
HEADERS = {'User-Agent': 'GFIL-Blog-SEO/2.0'}
ENGINES = {
# Major (already covered in indexnow_submit.py, re-ping for safety)
'Google': f'https://www.google.com/ping?sitemap={urllib.parse.quote(SITEMAP)}',
'Bing': f'https://www.bing.com/ping?sitemap={urllib.parse.quote(SITEMAP)}',
'Yandex': f'https://webmaster.yandex.ru/ping?sitemap={urllib.parse.quote(SITEMAP)}',
# DuckDuckGo (uses Bing index, but has own crawler DuckDuckBot)
'DuckDuckGo': f'https://duckduckgo.com/ping?sitemap={urllib.parse.quote(SITEMAP)}',
# Brave Search (own index, Goggles feature)
'Brave': f'https://brave.com/ping?sitemap={urllib.parse.quote(SITEMAP)}',
# Baidu (China, #1 search engine)
'Baidu': f'https://ping.baidu.com/sitemap?url={urllib.parse.quote(SITEMAP)}',
# Naver (Korea, #1 search engine)
'Naver': f'https://searchadvisor.naver.com/ping?sitemap={urllib.parse.quote(SITEMAP)}',
# Seznam (Czech Republic, own index)
'Seznam': f'https://search.seznam.cz/ping?sitemap={urllib.parse.quote(SITEMAP)}',
# Ecosia (uses Bing + own crawler)
'Ecosia': f'https://ecosia.org/ping?sitemap={urllib.parse.quote(SITEMAP)}',
}
def ping_engine(name, url):
try:
req = urllib.request.Request(url, headers=HEADERS)
r = urllib.request.urlopen(req, timeout=20)
print(f'{name}: HTTP {r.status}')
return True
except urllib.error.HTTPError as e:
print(f' ⚠️ {name}: HTTP {e.code} ({e.reason})')
return False
except Exception as e:
print(f'{name}: {e}')
return False
# Also submit key URLs via IndexNow (Bing+Yandex+Seznam share this)
def indexnow_full():
"""Submit ALL 225+ URLs via IndexNow API"""
# Collect all URLs from sitemap
pages = []
try:
req = urllib.request.Request(SITEMAP, headers=HEADERS)
with urllib.request.urlopen(req, timeout=30) as r:
import re
sitemap_xml = r.read().decode()
pages = re.findall(r'<loc>(https://[^<]+)</loc>', sitemap_xml)
except Exception as e:
print(f' ❌ Cannot fetch sitemap: {e}')
return False
if not pages:
print(' ❌ No URLs found in sitemap')
return False
key = 'f8a7c3e2b1d0495a8f6c7e3d2b1a0495f'
payload = json.dumps({'host': 'blog.quant-view.xyz', 'key': key, 'urlList': pages})
for ep_name, ep_url in [
('IndexNow.org', 'https://api.indexnow.org/indexnow'),
('Bing', 'https://www.bing.com/indexnow'),
('Yandex', 'https://yandex.com/indexnow'),
('Seznam', 'https://search.seznam.cz/indexnow'),
]:
try:
req = urllib.request.Request(ep_url, data=payload.encode(),
headers={'Content-Type': 'application/json; charset=utf-8', 'User-Agent': 'GFIL-Blog'})
with urllib.request.urlopen(req, timeout=20) as r:
print(f' ✅ IndexNow {ep_name}: HTTP {r.status} ({len(pages)} URLs)')
except Exception as e:
print(f' ❌ IndexNow {ep_name}: {e}')
print('=== Multi-Search-Engine Sitemap Submission ===')
print(f'Site: {SITE}')
print(f'Sitemap: {SITEMAP}\n')
ok = 0
for name, url in ENGINES.items():
if ping_engine(name, url):
ok += 1
time.sleep(0.5)
print(f'\n{ok}/{len(ENGINES)} engines pinged successfully')
print('\n--- IndexNow Full Batch ---')
indexnow_full()
print('\n=== Done ===')