Files
gfil-blog/check_github_links.py

32 lines
1.2 KiB
Python

"""Check all GitHub links on live pages"""
import urllib.request
import re
pages = ['entity.html', 'media.html', 'gfil-faq.html', 'about-gfil.html']
for page in pages:
url = f'https://blog.quant-view.xyz/tools/{page}'
try:
r = urllib.request.urlopen(url, timeout=15)
html = r.read().decode()
gh_links = re.findall(r'href="(https://github\.com/[^"]+)"', html)
gh_text = re.findall(r'github\.com/[\w\-/]+', html)
print(f'\n=== {page} ===')
print(f'GitHub href links: {len(gh_links)}')
for link in set(gh_links):
print(f' href: {link}')
print(f'GitHub text refs: {len(gh_text)}')
for t in set(gh_text):
print(f' text: {t}')
# Also check for liudecai or liudapao
if 'liudecai' in html:
for line in html.split('\n'):
if 'liudecai' in line:
print(f' LIUDECAI: {line.strip()[:200]}')
if 'liudapao' in html:
for line in html.split('\n'):
if 'liudapao' in line:
print(f' LIUDAPAO: {line.strip()[:200]}')
except Exception as e:
print(f'{page}: ERROR {e}')