import requests from bs4 import BeautifulSoup from urllib.parse import urljoin from collections import defaultdict def extract_gesamt_links_club(base_url, club_name="Eschweiler SG"): resp = requests.get(base_url) resp.raise_for_status() soup = BeautifulSoup(resp.text, "html.parser") grouped_results = defaultdict(list) current_section = "Unbekannt" for tr in soup.select("table tr"): tds = tr.find_all("td") # Abschnittsüberschrift (z.B. "Nordrhein 25/26") if len(tds) == 1 or (len(tds) == 2 and tds[0].has_attr("colspan")): current_section = tds[0].get_text(strip=True) continue if len(tds) >= 2: team_name = tds[0].get_text(strip=True) # Liga-Link liga_link_tag = tds[1].find("a", href=True) if not liga_link_tag: continue liga_url = urljoin(base_url, liga_link_tag["href"]) # Liga-Seite abrufen liga_resp = requests.get(liga_url) liga_resp.raise_for_status() liga_soup = BeautifulSoup(liga_resp.text, "html.parser") # Gesamt-Spielplan gesamt = liga_soup.find("a", string=lambda s: s and "Gesamt" in s) gesamt_url = urljoin(liga_url, gesamt["href"]) if gesamt else None # Mannschafts-Portrait portrait = liga_soup.find("a", href=True, string=lambda s: s and club_name in s) portrait_url = urljoin(liga_url, portrait["href"]) if portrait else None grouped_results[current_section].append({ "team": team_name, "portrait_url": portrait_url, "liga_url": liga_url, "gesamt_url": gesamt_url }) return grouped_results if __name__ == "__main__": club_url = "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/clubTeams?club=74726" grouped = extract_gesamt_links_club(club_url, club_name="Eschweiler SG") output_file = "ligenlinks.md" with open(output_file, "w", encoding="utf-8") as f: for section, teams in grouped.items(): f.write(f"## {section}\n\n") for item in teams: f.write(f"### {item['team']}\n") f.write(f"- [Mannschafts-Portrait]({item['portrait_url']})\n" if item['portrait_url'] else "- Mannschafts-Portrait: Nicht gefunden\n") f.write(f"- [Liga-Seite]({item['liga_url']})\n") f.write(f"- [Gesamt-Spielplan]({item['gesamt_url']})\n\n") print(f"✅ Markdown-Datei erstellt: {output_file}")