69 lines
2.6 KiB
Python
69 lines
2.6 KiB
Python
import requests
|
|
from bs4 import BeautifulSoup
|
|
from urllib.parse import urljoin
|
|
from collections import defaultdict
|
|
|
|
def extract_gesamt_links_club(base_url, club_name="Eschweiler SG"):
|
|
resp = requests.get(base_url)
|
|
resp.raise_for_status()
|
|
soup = BeautifulSoup(resp.text, "html.parser")
|
|
|
|
grouped_results = defaultdict(list)
|
|
current_section = "Unbekannt"
|
|
|
|
for tr in soup.select("table tr"):
|
|
tds = tr.find_all("td")
|
|
|
|
# Abschnittsüberschrift (z.B. "Nordrhein 25/26")
|
|
if len(tds) == 1 or (len(tds) == 2 and tds[0].has_attr("colspan")):
|
|
current_section = tds[0].get_text(strip=True)
|
|
continue
|
|
|
|
if len(tds) >= 2:
|
|
team_name = tds[0].get_text(strip=True)
|
|
|
|
# Liga-Link
|
|
liga_link_tag = tds[1].find("a", href=True)
|
|
if not liga_link_tag:
|
|
continue
|
|
liga_url = urljoin(base_url, liga_link_tag["href"])
|
|
|
|
# Liga-Seite abrufen
|
|
liga_resp = requests.get(liga_url)
|
|
liga_resp.raise_for_status()
|
|
liga_soup = BeautifulSoup(liga_resp.text, "html.parser")
|
|
|
|
# Gesamt-Spielplan
|
|
gesamt = liga_soup.find("a", string=lambda s: s and "Gesamt" in s)
|
|
gesamt_url = urljoin(liga_url, gesamt["href"]) if gesamt else None
|
|
|
|
# Mannschafts-Portrait
|
|
portrait = liga_soup.find("a", href=True, string=lambda s: s and club_name in s)
|
|
portrait_url = urljoin(liga_url, portrait["href"]) if portrait else None
|
|
|
|
grouped_results[current_section].append({
|
|
"team": team_name,
|
|
"portrait_url": portrait_url,
|
|
"liga_url": liga_url,
|
|
"gesamt_url": gesamt_url
|
|
})
|
|
|
|
return grouped_results
|
|
|
|
|
|
if __name__ == "__main__":
|
|
club_url = "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/clubTeams?club=74726"
|
|
grouped = extract_gesamt_links_club(club_url, club_name="Eschweiler SG")
|
|
|
|
output_file = "ligenlinks.md"
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
for section, teams in grouped.items():
|
|
f.write(f"## {section}\n\n")
|
|
for item in teams:
|
|
f.write(f"### {item['team']}\n")
|
|
f.write(f"- [Mannschafts-Portrait]({item['portrait_url']})\n" if item['portrait_url'] else "- Mannschafts-Portrait: Nicht gefunden\n")
|
|
f.write(f"- [Liga-Seite]({item['liga_url']})\n")
|
|
f.write(f"- [Gesamt-Spielplan]({item['gesamt_url']})\n\n")
|
|
|
|
print(f"✅ Markdown-Datei erstellt: {output_file}")
|