#!/usr/bin/env python3 """ visits_report.py - Gopher hole visitor report generator ========================================================= Reads gopher/logs/visits.log, looks up geo and ISP info for each unique IP, classifies each one, and writes a self-contained visits_report.html with a Leaflet world map and a sortable table. Usage ----- python3 visits_report.py Log format expected ------------------- 2026-06-04 05:52:00 | 192.168.1.1 | gophermap Fields: timestamp | ip | path Output files ------------ visits_report.html ip_cache.json Dependencies ------------ pip install requests Classification -------------- IPs are assigned one of four categories (checked in order): sdf - ASN AS14300, 205.166.94.*, *.sdf.org, *.sdf.net, *.freeshell.org hostname, or ISP/org containing "sdf" tor - known Tor exit-node ASNs or ISP/org keywords bot - cloud/hosting provider ASNs (AWS, GCP, Azure, Hetzner, etc.) or matching ISP/org keywords visitor - everything else IP geo data from ip-api.com/batch (free, no key, up to 100 IPs per request). Reverse DNS via socket.gethostbyaddr() (best-effort, failures silently ignored). """ import collections import datetime import json import pathlib import socket import requests # Log format: timestamp | ip | path LOG_FILE = pathlib.Path("gopher/logs/visits.log") CACHE_FILE = pathlib.Path("ip_cache.json") OUTPUT_FILE = pathlib.Path("visits_report.html") IP_API_BATCH_URL = "http://ip-api.com/batch" IP_API_FIELDS = "status,message,country,regionName,city,lat,lon,isp,org,as,query" SDF_ASN = "AS14300" TOR_ASNS = {"AS60729", "AS205100", "AS44925", "AS198093"} BOT_ASNS = { "AS16509", "AS14618", "AS15169", "AS8075", "AS14061", "AS20473", "AS16276", "AS24940", "AS398722", } CATEGORIES = { "sdf": {"color": "#4a90d9", "label": "SDF"}, "tor": {"color": "#f5a623", "label": "Tor"}, "bot": {"color": "#9b9b9b", "label": "Bot"}, "visitor": {"color": "#e74c3c", "label": "Visitor"}, } def parse_log(log_file): visits = [] with open(log_file) as f: for line in f: line = line.strip() if not line: continue parts = [p.strip() for p in line.split("|")] if len(parts) != 3: continue timestamp, ip, path = parts visits.append((timestamp, ip, path)) return visits def load_cache(cache_file): if cache_file.exists(): with open(cache_file) as f: return json.load(f) return {} def save_cache(cache, cache_file): with open(cache_file, "w") as f: json.dump(cache, f, indent=2) def fetch_ip_info(ips, cache): missing = [ip for ip in ips if ip not in cache] print(f" {len(ips) - len(missing)} cached, {len(missing)} to fetch") for i in range(0, len(missing), 100): batch = missing[i : i + 100] payload = [{"query": ip, "fields": IP_API_FIELDS} for ip in batch] resp = requests.post(IP_API_BATCH_URL, json=payload, timeout=30) resp.raise_for_status() for result in resp.json(): ip_addr = result.get("query") if ip_addr: cache[ip_addr] = result return cache def reverse_dns(ip): try: return socket.gethostbyaddr(ip)[0] except Exception: return None SDF_HOSTNAME_SUFFIXES = (".sdf.org", ".sdf.net", ".freeshell.org") def classify(info, ip, hostname=None): asn_str = info.get("as") or "" asn_code = asn_str.split()[0] if asn_str else "" isp = (info.get("isp") or "").lower() org = (info.get("org") or "").lower() host = (hostname or "").lower() if ( asn_code == SDF_ASN or ip.startswith("205.166.94.") or "sdf" in isp or "sdf" in org or "super dimension fortress" in isp or "super dimension fortress" in org or any(host == s.lstrip(".") or host.endswith(s) for s in SDF_HOSTNAME_SUFFIXES) ): return "sdf" if ( asn_code in TOR_ASNS or "tor project" in isp or "tor project" in org or "torservers" in isp or "torservers" in org or "anonymization" in isp or "anonymization" in org ): return "tor" bot_keywords = [ "amazon", "google", "microsoft", "digitalocean", "linode", "vultr", "ovh", "hetzner", "scaleway", "cloudflare", "censys", "shodan", "rapid7", ] if asn_code in BOT_ASNS or any(kw in isp or kw in org for kw in bot_keywords): return "bot" return "visitor" def build_records(visits, cache): ip_visits = collections.defaultdict(list) for timestamp, ip, path in visits: ip_visits[ip].append({"timestamp": timestamp, "path": path}) print(" Running reverse DNS...") records = [] for ip, visit_list in ip_visits.items(): info = cache.get(ip, {}) hostname = reverse_dns(ip) category = classify(info, ip, hostname) if info.get("status") == "success" else classify({}, ip, hostname) sorted_visits = sorted(visit_list, key=lambda v: v["timestamp"], reverse=True) last_seen = sorted_visits[0]["timestamp"] unique_pages = len({v["path"] for v in visit_list}) city = info.get("city") or "" region = info.get("regionName") or "" country = info.get("country") or "" location = ", ".join(p for p in [city, region, country] if p) or "Unknown" records.append({ "ip": ip, "hostname": hostname, "category": category, "location": location, "lat": info.get("lat"), "lon": info.get("lon"), "isp": info.get("isp") or "", "org": info.get("org") or "", "asn": info.get("as") or "", "visitCount": len(visit_list), "pages": unique_pages, "lastSeen": last_seen, "visits": sorted_visits, "hasGeo": ( info.get("status") == "success" and info.get("lat") is not None and info.get("lon") is not None ), }) records.sort(key=lambda r: r["visitCount"], reverse=True) return records CSS = """ * { box-sizing: border-box; margin: 0; padding: 0; } body { background: #1a1a1a; color: #e0e0e0; font-family: monospace; font-size: 13px; } #summary { padding: 12px 16px; background: #111; border-bottom: 1px solid #333; display: flex; gap: 20px; align-items: center; flex-wrap: wrap; } .summary-total { color: #aaa; } .summary-generated { color: #555; margin-left: auto; } .cat-ips { color: #555; font-size: 11px; } #map { height: 500px; border-bottom: 1px solid #333; } .leaflet-container { background: #1a1a1a !important; } #table-wrapper { padding: 16px; overflow-x: auto; } table { width: 100%; border-collapse: collapse; } th { background: #222; color: #aaa; padding: 8px 10px; text-align: left; cursor: pointer; white-space: nowrap; user-select: none; border-bottom: 1px solid #333; } th:hover { background: #2a2a2a; color: #e0e0e0; } th.sort-asc::after { content: " ▲"; font-size: 10px; } th.sort-desc::after { content: " ▼"; font-size: 10px; } td { padding: 7px 10px; border-bottom: 1px solid #222; vertical-align: middle; } tr:hover td { background: #242424; cursor: pointer; } .badge { display: inline-block; padding: 2px 7px; border-radius: 3px; font-size: 11px; font-weight: bold; color: #fff; } .hostname { color: #888; font-size: 11px; display: block; } .popup-visits { max-height: 180px; overflow-y: auto; margin-top: 6px; font-size: 11px; } .popup-visit-row { padding: 2px 0; border-bottom: 1px solid #333; } .popup-visits::-webkit-scrollbar { width: 6px; } .popup-visits::-webkit-scrollbar-track { background: #333; } .popup-visits::-webkit-scrollbar-thumb { background: #555; } .leaflet-popup-content-wrapper { background: #222; color: #e0e0e0; border: 1px solid #444; border-radius: 4px; box-shadow: none; } .leaflet-popup-tip { background: #222; } .leaflet-popup-content { font-family: monospace; font-size: 12px; line-height: 1.5; } .leaflet-popup-close-button { color: #aaa !important; } """ JS = """ function esc(s) { return String(s || '').replace(/&/g,'&').replace(//g,'>').replace(/"/g,'"'); } // --- Map --- const map = L.map('map', { zoom: 2, center: [20, 0] }); L.tileLayer('https://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}{r}.png', { attribution: '© OpenStreetMap © CARTO', subdomains: 'abcd', maxZoom: 19 }).addTo(map); // Legend const legend = L.control({ position: 'bottomright' }); legend.onAdd = function() { const div = L.DomUtil.create('div'); div.style.cssText = 'background:#222;padding:10px 14px;border:1px solid #444;border-radius:4px;font-family:monospace;font-size:12px;color:#e0e0e0;line-height:1.8'; div.innerHTML = Object.entries(CATS).map(([k, v]) => `
| IP / Hostname | \n' 'Type | \n' 'Location | \n' 'ISP / Org | \n' 'ASN | \n' 'Visits | \n' 'Pages | \n' 'Last Seen | \n' '
|---|