Update README
This commit is contained in:
178
deploy_robots_v2.py
Normal file
178
deploy_robots_v2.py
Normal file
@ -0,0 +1,178 @@
|
||||
"""Deploy robots.txt to gfil-lab.com via Nginx location block + reload"""
|
||||
import paramiko
|
||||
import time
|
||||
|
||||
JD_HOST = "111.228.37.165"
|
||||
JD_USER = "root"
|
||||
JD_PASS = "Liudecai110"
|
||||
|
||||
LAB_HOST = "216.144.233.14"
|
||||
LAB_USER = "root"
|
||||
LAB_PASS = "Kt9V72Tx2c48ChikKU"
|
||||
|
||||
ROBOTS_CONTENT = """User-agent: *
|
||||
Allow: /
|
||||
|
||||
# === AI Search Crawlers (GEO) ===
|
||||
User-agent: OAI-SearchBot
|
||||
Allow: /
|
||||
User-agent: ChatGPT-User
|
||||
Allow: /
|
||||
User-agent: GPTBot
|
||||
Allow: /
|
||||
User-agent: ClaudeBot
|
||||
Allow: /
|
||||
User-agent: anthropic-ai
|
||||
Allow: /
|
||||
User-agent: PerplexityBot
|
||||
Allow: /
|
||||
User-agent: Google-Extended
|
||||
Allow: /
|
||||
User-agent: GoogleOther
|
||||
Allow: /
|
||||
# === China AI Search Crawlers ===
|
||||
User-agent: Bytespider
|
||||
Allow: /
|
||||
User-agent: DeepSeekBot
|
||||
Allow: /
|
||||
User-agent: KimiBot
|
||||
Allow: /
|
||||
User-agent: Baiduspider
|
||||
Allow: /
|
||||
# === Russia ===
|
||||
User-agent: YandexBot
|
||||
Allow: /
|
||||
|
||||
Sitemap: https://gfil-lab.com/sitemap.xml
|
||||
"""
|
||||
|
||||
jd = paramiko.SSHClient()
|
||||
jd.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||
jd.connect(JD_HOST, port=22, username=JD_USER, password=JD_PASS,
|
||||
timeout=20, banner_timeout=60, allow_agent=False, look_for_keys=False)
|
||||
|
||||
print("[1/3] Creating robots.txt and updating Nginx on gfil-lab.com...")
|
||||
|
||||
# Write robots.txt and add Nginx location block
|
||||
# Use sed to add location block before the existing "location /" block
|
||||
cmd = f"""sshpass -p '{LAB_PASS}' ssh -o StrictHostKeyChecking=no {LAB_USER}@{LAB_HOST} '
|
||||
# Create robots.txt file
|
||||
mkdir -p /var/www/gfil-lab
|
||||
cat > /var/www/gfil-lab/robots.txt << '"'"'ROBOTSEOF'"'"'
|
||||
{ROBOTS_CONTENT}
|
||||
ROBOTSEOF
|
||||
|
||||
# Verify file was created
|
||||
echo "=== robots.txt content ==="
|
||||
cat /var/www/gfil-lab/robots.txt
|
||||
|
||||
# Add Nginx location block for robots.txt
|
||||
# Insert before "location /" in the gfil config
|
||||
if ! grep -q "robots.txt" /etc/nginx/sites-enabled/gfil; then
|
||||
sed -i "/location \\/ {{/i\\\\n location = /robots.txt {{\\n alias /var/www/gfil-lab/robots.txt;\\n default_type text/plain;\\n }}" /etc/nginx/sites-enabled/gfil
|
||||
echo "Nginx config updated"
|
||||
else
|
||||
echo "robots.txt location already exists"
|
||||
fi
|
||||
|
||||
echo "=== Updated Nginx config ==="
|
||||
cat /etc/nginx/sites-enabled/gfil
|
||||
|
||||
# Test and reload Nginx
|
||||
nginx -t 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
systemctl reload nginx
|
||||
echo "Nginx reloaded successfully"
|
||||
else
|
||||
echo "Nginx config test FAILED - not reloading"
|
||||
fi
|
||||
|
||||
# Verify robots.txt is accessible
|
||||
sleep 1
|
||||
curl -s -o /dev/null -w "%{{http_code}}" http://localhost/robots.txt 2>/dev/null || echo "curl failed"
|
||||
'"""
|
||||
|
||||
stdin, stdout, stderr = jd.exec_command(cmd, timeout=30)
|
||||
output = stdout.read().decode()
|
||||
print(output[:3000])
|
||||
|
||||
print("\n[2/3] Verifying gfil-intel.xyz also gets robots.txt (it proxies gfil-lab.com)...")
|
||||
cmd2 = f"""sshpass -p '{RN_PASS}' ssh -o StrictHostKeyChecking=no {RN_USER}@107.174.186.162 '
|
||||
# gfil-intel.xyz proxies to gfil-lab.com, so robots.txt should come through
|
||||
# But it also has sub_filter that replaces gfil-lab.com -> $host
|
||||
# Test locally
|
||||
curl -s -o /dev/null -w "%{{http_code}}" -H "Host: gfil-intel.xyz" http://localhost/robots.txt 2>/dev/null || echo "direct test failed"
|
||||
|
||||
# Also check if gfil-mask nginx config needs a separate robots.txt location
|
||||
if ! grep -q "robots.txt" /etc/nginx/sites-available/gfil-mask; then
|
||||
echo "Need to add robots.txt location to gfil-mask config too"
|
||||
|
||||
mkdir -p /var/www/gfil-intel
|
||||
cat > /var/www/gfil-intel/robots.txt << '"'"'ROBOTSEOF'"'"'
|
||||
User-agent: *
|
||||
Allow: /
|
||||
|
||||
User-agent: OAI-SearchBot
|
||||
Allow: /
|
||||
User-agent: ChatGPT-User
|
||||
Allow: /
|
||||
User-agent: GPTBot
|
||||
Allow: /
|
||||
User-agent: ClaudeBot
|
||||
Allow: /
|
||||
User-agent: anthropic-ai
|
||||
Allow: /
|
||||
User-agent: PerplexityBot
|
||||
Allow: /
|
||||
User-agent: Google-Extended
|
||||
Allow: /
|
||||
User-agent: GoogleOther
|
||||
Allow: /
|
||||
User-agent: Bytespider
|
||||
Allow: /
|
||||
User-agent: DeepSeekBot
|
||||
Allow: /
|
||||
User-agent: KimiBot
|
||||
Allow: /
|
||||
User-agent: Baiduspider
|
||||
Allow: /
|
||||
User-agent: YandexBot
|
||||
Allow: /
|
||||
|
||||
Sitemap: https://gfil-intel.xyz/sitemap.xml
|
||||
ROBOTSEOF
|
||||
|
||||
# Add location block before the main location /
|
||||
sed -i "/location \\/ {{/i\\\\n location = /robots.txt {{\\n alias /var/www/gfil-intel/robots.txt;\\n default_type text/plain;\\n }}" /etc/nginx/sites-available/gfil-mask
|
||||
echo "gfil-mask config updated"
|
||||
|
||||
nginx -t 2>&1
|
||||
if [ $? -eq 0 ]; then
|
||||
systemctl reload nginx
|
||||
echo "Nginx reloaded on RackNerd"
|
||||
else
|
||||
echo "Nginx config FAILED on RackNerd"
|
||||
fi
|
||||
else
|
||||
echo "robots.txt location already in gfil-mask"
|
||||
fi
|
||||
'"""
|
||||
stdin, stdout, stderr = jd.exec_command(cmd2, timeout=30)
|
||||
output = stdout.read().decode()
|
||||
print(output[:2000])
|
||||
|
||||
print("\n[3/3] Final verification from external...")
|
||||
jd.close()
|
||||
|
||||
# Test from local
|
||||
import urllib.request
|
||||
for domain in ['gfil-lab.com', 'gfil-intel.xyz']:
|
||||
try:
|
||||
req = urllib.request.Request(f'https://{domain}/robots.txt',
|
||||
headers={'User-Agent': 'Mozilla/5.0'})
|
||||
r = urllib.request.urlopen(req, timeout=10)
|
||||
content = r.read().decode()
|
||||
has_gptbot = 'GPTBot' in content
|
||||
print(f" {domain}: {r.status}, GPTBot={has_gptbot}, {len(content)} chars")
|
||||
except Exception as e:
|
||||
print(f" {domain}: {e}")
|
||||
Reference in New Issue
Block a user