ligenparser hinzugefügt

This commit is contained in:
Norbert
2025-09-03 14:09:15 +02:00
parent cb16d47f50
commit c4791605b1
6 changed files with 410 additions and 282 deletions

68
Ligenlinks.py Normal file
View File

@@ -0,0 +1,68 @@
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from collections import defaultdict
def extract_gesamt_links_club(base_url, club_name="Eschweiler SG"):
resp = requests.get(base_url)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
grouped_results = defaultdict(list)
current_section = "Unbekannt"
for tr in soup.select("table tr"):
tds = tr.find_all("td")
# Abschnittsüberschrift (z.B. "Nordrhein 25/26")
if len(tds) == 1 or (len(tds) == 2 and tds[0].has_attr("colspan")):
current_section = tds[0].get_text(strip=True)
continue
if len(tds) >= 2:
team_name = tds[0].get_text(strip=True)
# Liga-Link
liga_link_tag = tds[1].find("a", href=True)
if not liga_link_tag:
continue
liga_url = urljoin(base_url, liga_link_tag["href"])
# Liga-Seite abrufen
liga_resp = requests.get(liga_url)
liga_resp.raise_for_status()
liga_soup = BeautifulSoup(liga_resp.text, "html.parser")
# Gesamt-Spielplan
gesamt = liga_soup.find("a", string=lambda s: s and "Gesamt" in s)
gesamt_url = urljoin(liga_url, gesamt["href"]) if gesamt else None
# Mannschafts-Portrait
portrait = liga_soup.find("a", href=True, string=lambda s: s and club_name in s)
portrait_url = urljoin(liga_url, portrait["href"]) if portrait else None
grouped_results[current_section].append({
"team": team_name,
"portrait_url": portrait_url,
"liga_url": liga_url,
"gesamt_url": gesamt_url
})
return grouped_results
if __name__ == "__main__":
club_url = "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/clubTeams?club=74726"
grouped = extract_gesamt_links_club(club_url, club_name="Eschweiler SG")
output_file = "ligenlinks.md"
with open(output_file, "w", encoding="utf-8") as f:
for section, teams in grouped.items():
f.write(f"## {section}\n\n")
for item in teams:
f.write(f"### {item['team']}\n")
f.write(f"- [Mannschafts-Portrait]({item['portrait_url']})\n" if item['portrait_url'] else "- Mannschafts-Portrait: Nicht gefunden\n")
f.write(f"- [Liga-Seite]({item['liga_url']})\n")
f.write(f"- [Gesamt-Spielplan]({item['gesamt_url']})\n\n")
print(f"✅ Markdown-Datei erstellt: {output_file}")