ligenparser hinzugefügt
This commit is contained in:
68
Ligenlinks.py
Normal file
68
Ligenlinks.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin
|
||||
from collections import defaultdict
|
||||
|
||||
def extract_gesamt_links_club(base_url, club_name="Eschweiler SG"):
|
||||
resp = requests.get(base_url)
|
||||
resp.raise_for_status()
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
grouped_results = defaultdict(list)
|
||||
current_section = "Unbekannt"
|
||||
|
||||
for tr in soup.select("table tr"):
|
||||
tds = tr.find_all("td")
|
||||
|
||||
# Abschnittsüberschrift (z.B. "Nordrhein 25/26")
|
||||
if len(tds) == 1 or (len(tds) == 2 and tds[0].has_attr("colspan")):
|
||||
current_section = tds[0].get_text(strip=True)
|
||||
continue
|
||||
|
||||
if len(tds) >= 2:
|
||||
team_name = tds[0].get_text(strip=True)
|
||||
|
||||
# Liga-Link
|
||||
liga_link_tag = tds[1].find("a", href=True)
|
||||
if not liga_link_tag:
|
||||
continue
|
||||
liga_url = urljoin(base_url, liga_link_tag["href"])
|
||||
|
||||
# Liga-Seite abrufen
|
||||
liga_resp = requests.get(liga_url)
|
||||
liga_resp.raise_for_status()
|
||||
liga_soup = BeautifulSoup(liga_resp.text, "html.parser")
|
||||
|
||||
# Gesamt-Spielplan
|
||||
gesamt = liga_soup.find("a", string=lambda s: s and "Gesamt" in s)
|
||||
gesamt_url = urljoin(liga_url, gesamt["href"]) if gesamt else None
|
||||
|
||||
# Mannschafts-Portrait
|
||||
portrait = liga_soup.find("a", href=True, string=lambda s: s and club_name in s)
|
||||
portrait_url = urljoin(liga_url, portrait["href"]) if portrait else None
|
||||
|
||||
grouped_results[current_section].append({
|
||||
"team": team_name,
|
||||
"portrait_url": portrait_url,
|
||||
"liga_url": liga_url,
|
||||
"gesamt_url": gesamt_url
|
||||
})
|
||||
|
||||
return grouped_results
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
club_url = "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/clubTeams?club=74726"
|
||||
grouped = extract_gesamt_links_club(club_url, club_name="Eschweiler SG")
|
||||
|
||||
output_file = "ligenlinks.md"
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
for section, teams in grouped.items():
|
||||
f.write(f"## {section}\n\n")
|
||||
for item in teams:
|
||||
f.write(f"### {item['team']}\n")
|
||||
f.write(f"- [Mannschafts-Portrait]({item['portrait_url']})\n" if item['portrait_url'] else "- Mannschafts-Portrait: Nicht gefunden\n")
|
||||
f.write(f"- [Liga-Seite]({item['liga_url']})\n")
|
||||
f.write(f"- [Gesamt-Spielplan]({item['gesamt_url']})\n\n")
|
||||
|
||||
print(f"✅ Markdown-Datei erstellt: {output_file}")
|
||||
Reference in New Issue
Block a user