Files
gfil-blog/deploy_robots_v2.py

179 lines
5.0 KiB
Python
Raw Permalink Normal View History

"""Deploy robots.txt to gfil-lab.com via Nginx location block + reload"""
import paramiko
import time
JD_HOST = "111.228.37.165"
JD_USER = "root"
JD_PASS = "Liudecai110"
LAB_HOST = "216.144.233.14"
LAB_USER = "root"
LAB_PASS = "Kt9V72Tx2c48ChikKU"
ROBOTS_CONTENT = """User-agent: *
Allow: /
# === AI Search Crawlers (GEO) ===
User-agent: OAI-SearchBot
Allow: /
User-agent: ChatGPT-User
Allow: /
User-agent: GPTBot
Allow: /
User-agent: ClaudeBot
Allow: /
User-agent: anthropic-ai
Allow: /
User-agent: PerplexityBot
Allow: /
User-agent: Google-Extended
Allow: /
User-agent: GoogleOther
Allow: /
# === China AI Search Crawlers ===
User-agent: Bytespider
Allow: /
User-agent: DeepSeekBot
Allow: /
User-agent: KimiBot
Allow: /
User-agent: Baiduspider
Allow: /
# === Russia ===
User-agent: YandexBot
Allow: /
Sitemap: https://gfil-lab.com/sitemap.xml
"""
jd = paramiko.SSHClient()
jd.set_missing_host_key_policy(paramiko.AutoAddPolicy())
jd.connect(JD_HOST, port=22, username=JD_USER, password=JD_PASS,
timeout=20, banner_timeout=60, allow_agent=False, look_for_keys=False)
print("[1/3] Creating robots.txt and updating Nginx on gfil-lab.com...")
# Write robots.txt and add Nginx location block
# Use sed to add location block before the existing "location /" block
cmd = f"""sshpass -p '{LAB_PASS}' ssh -o StrictHostKeyChecking=no {LAB_USER}@{LAB_HOST} '
# Create robots.txt file
mkdir -p /var/www/gfil-lab
cat > /var/www/gfil-lab/robots.txt << '"'"'ROBOTSEOF'"'"'
{ROBOTS_CONTENT}
ROBOTSEOF
# Verify file was created
echo "=== robots.txt content ==="
cat /var/www/gfil-lab/robots.txt
# Add Nginx location block for robots.txt
# Insert before "location /" in the gfil config
if ! grep -q "robots.txt" /etc/nginx/sites-enabled/gfil; then
sed -i "/location \\/ {{/i\\\\n location = /robots.txt {{\\n alias /var/www/gfil-lab/robots.txt;\\n default_type text/plain;\\n }}" /etc/nginx/sites-enabled/gfil
echo "Nginx config updated"
else
echo "robots.txt location already exists"
fi
echo "=== Updated Nginx config ==="
cat /etc/nginx/sites-enabled/gfil
# Test and reload Nginx
nginx -t 2>&1
if [ $? -eq 0 ]; then
systemctl reload nginx
echo "Nginx reloaded successfully"
else
echo "Nginx config test FAILED - not reloading"
fi
# Verify robots.txt is accessible
sleep 1
curl -s -o /dev/null -w "%{{http_code}}" http://localhost/robots.txt 2>/dev/null || echo "curl failed"
'"""
stdin, stdout, stderr = jd.exec_command(cmd, timeout=30)
output = stdout.read().decode()
print(output[:3000])
print("\n[2/3] Verifying gfil-intel.xyz also gets robots.txt (it proxies gfil-lab.com)...")
cmd2 = f"""sshpass -p '{RN_PASS}' ssh -o StrictHostKeyChecking=no {RN_USER}@107.174.186.162 '
# gfil-intel.xyz proxies to gfil-lab.com, so robots.txt should come through
# But it also has sub_filter that replaces gfil-lab.com -> $host
# Test locally
curl -s -o /dev/null -w "%{{http_code}}" -H "Host: gfil-intel.xyz" http://localhost/robots.txt 2>/dev/null || echo "direct test failed"
# Also check if gfil-mask nginx config needs a separate robots.txt location
if ! grep -q "robots.txt" /etc/nginx/sites-available/gfil-mask; then
echo "Need to add robots.txt location to gfil-mask config too"
mkdir -p /var/www/gfil-intel
cat > /var/www/gfil-intel/robots.txt << '"'"'ROBOTSEOF'"'"'
User-agent: *
Allow: /
User-agent: OAI-SearchBot
Allow: /
User-agent: ChatGPT-User
Allow: /
User-agent: GPTBot
Allow: /
User-agent: ClaudeBot
Allow: /
User-agent: anthropic-ai
Allow: /
User-agent: PerplexityBot
Allow: /
User-agent: Google-Extended
Allow: /
User-agent: GoogleOther
Allow: /
User-agent: Bytespider
Allow: /
User-agent: DeepSeekBot
Allow: /
User-agent: KimiBot
Allow: /
User-agent: Baiduspider
Allow: /
User-agent: YandexBot
Allow: /
Sitemap: https://gfil-intel.xyz/sitemap.xml
ROBOTSEOF
# Add location block before the main location /
sed -i "/location \\/ {{/i\\\\n location = /robots.txt {{\\n alias /var/www/gfil-intel/robots.txt;\\n default_type text/plain;\\n }}" /etc/nginx/sites-available/gfil-mask
echo "gfil-mask config updated"
nginx -t 2>&1
if [ $? -eq 0 ]; then
systemctl reload nginx
echo "Nginx reloaded on RackNerd"
else
echo "Nginx config FAILED on RackNerd"
fi
else
echo "robots.txt location already in gfil-mask"
fi
'"""
stdin, stdout, stderr = jd.exec_command(cmd2, timeout=30)
output = stdout.read().decode()
print(output[:2000])
print("\n[3/3] Final verification from external...")
jd.close()
# Test from local
import urllib.request
for domain in ['gfil-lab.com', 'gfil-intel.xyz']:
try:
req = urllib.request.Request(f'https://{domain}/robots.txt',
headers={'User-Agent': 'Mozilla/5.0'})
r = urllib.request.urlopen(req, timeout=10)
content = r.read().decode()
has_gptbot = 'GPTBot' in content
print(f" {domain}: {r.status}, GPTBot={has_gptbot}, {len(content)} chars")
except Exception as e:
print(f" {domain}: {e}")