"""Check all GitHub links on live pages""" import urllib.request import re pages = ['entity.html', 'media.html', 'gfil-faq.html', 'about-gfil.html'] for page in pages: url = f'https://blog.quant-view.xyz/tools/{page}' try: r = urllib.request.urlopen(url, timeout=15) html = r.read().decode() gh_links = re.findall(r'href="(https://github\.com/[^"]+)"', html) gh_text = re.findall(r'github\.com/[\w\-/]+', html) print(f'\n=== {page} ===') print(f'GitHub href links: {len(gh_links)}') for link in set(gh_links): print(f' href: {link}') print(f'GitHub text refs: {len(gh_text)}') for t in set(gh_text): print(f' text: {t}') # Also check for liudecai or liudapao if 'liudecai' in html: for line in html.split('\n'): if 'liudecai' in line: print(f' LIUDECAI: {line.strip()[:200]}') if 'liudapao' in html: for line in html.split('\n'): if 'liudapao' in line: print(f' LIUDAPAO: {line.strip()[:200]}') except Exception as e: print(f'{page}: ERROR {e}')