diff --git a/.ipynb_checkpoints/ids-checkpoint.py b/.ipynb_checkpoints/ids-checkpoint.py new file mode 100644 index 0000000..6982aa4 --- /dev/null +++ b/.ipynb_checkpoints/ids-checkpoint.py @@ -0,0 +1,346 @@ +import requests +import re +from bs4 import BeautifulSoup +#from hallen import * +NameTabelle=0 +Alias=1 +KürzelLiga=2 +Shortname=3 +LigaLink=4 +TeamLink=5 + +#team = [[0 for x in range(5)] for y in range(12)] +team = [] +team.append(["Eschweiler SG","1. Herren","VL M", "H1", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=HNR+25%2F26&group=423989","https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2106481&pageState=vorrunde&championship=HNR+25%2F26&group=423989"]) +team.append(["Eschweiler SG II","2. Herren","ROL M", "H2", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424090","https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2102102&pageState=vorrunde&championship=AD+25%2F26&group=424090"]) +team.append(["Eschweiler SG III","3. Herren","RL M", "H3", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424285", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2105342&pageState=vorrunde&championship=AD+25%2F26&group=424285"]) +team.append(["Eschweiler SG IV","4. Herren","RK M", "H4", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424285", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2105340&pageState=vorrunde&championship=AD+25%2F26&group=424285"]) +#team.append(["Eschweiler SG V","5. Herren","RK M", "H5", "", ""]) +team.append(["Eschweiler SG", "1. Damen","ROL F", "D1", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424164", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2110804&pageState=vorrunde&championship=AD+25%2F26&group=424164"]) +team.append(["Eschweiler SG II", "2. Damen","RL F", "D2", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424341", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2110809&pageState=vorrunde&championship=AD+25%2F26&group=424341"]) +#team.append(["Eschweiler SG", "mA-Jugend", "OLNR MJA", "mA", "https://hvmittelrhein-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=RLNR+22%2F23&group=304425","https://hvmittelrhein-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=1822430&pageState=vorrunde&championship=RLNR+22%2F23&group=304425"] +team.append(["Eschweiler SG", "mB-Jugend", "ROL MJB", "mB", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424304", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2106066&pageState=vorrunde&championship=AD+25%2F26&group=424304"]) +#team.append(["Eschweiler SG", "wB-Jugend", "ROL WJB", "wB", "", ""]) +team.append(["Eschweiler SG", "mC-Jugend", "ROL MJC", "mC", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424048", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2107124&pageState=vorrunde&championship=AD+25%2F26&group=424048"]) +team.append(["Eschweiler SG II", "mC-Jugend II", "RL MJC", "mC2", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424095", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2107128&pageState=vorrunde&championship=AD+25%2F26&group=424095"]) +#team.append(["Eschweiler SG", "wC-Jugend", "KL WJC", "wC", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+23%2F24&group=332524", ""]) +team.append(["Eschweiler SG", "gD-Jugend", "ROL gJD", "gJD", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424295", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2118271&pageState=vorrunde&championship=AD+25%2F26&group=424295"]) +team.append(["Eschweiler SG II", "gD-Jugend II", "ROL gJD", "gJD2", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=450985", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2118273&pageState=vorrunde&championship=AD+25%2F26&group=450985"]) +#team.append(["Eschweiler SG", "wD-Jugend", "ROL WJD", "wD", "", ""]) +team.append(["Eschweiler SG", "gE-Jugend", "ROL gJE", "gJE", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424179", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2118337&pageState=vorrunde&championship=AD+25%2F26&group=424179"]) +team.append(["Eschweiler SG II", "gE-Jugend II", "RL gJE", "gJE2", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424147", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2118343&pageState=vorrunde&championship=AD+25%2F26&group=424147"]) +team.append(["Eschweiler SG III", "gE-Jugend III", "1.RK gJE", "gJE3", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=444505", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2118359&pageState=vorrunde&championship=AD+25%2F26&group=444505"]) +#team.append(["Eschweiler SG", "F-Jugend", "ROL gJF", "F", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=424334", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2122602&pageState=vorrunde&championship=AD+25%2F26&group=424334"]) +#team.append(["Eschweiler SG II", "F-Jugend II", "RL gJF", "F", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=452865", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2122619&pageState=vorrunde&championship=AD+25%2F26&group=452865"]) +#team.append(["Eschweiler SG II", "F-Jugend III", "1.RK gJF", "F", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/groupPage?championship=AD+25%2F26&group=452867", "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/teamPortrait?teamtable=2122632&pageState=vorrunde&championship=AD+25%2F26&group=452867"]) + +#hallen = [[0 for x in range(2)] for y in range(71)] +hallen = [] +hallen.append(["02001", "AC2"]) +hallen.append(["02002", "AC1"]) +hallen.append(["02003", "AC1A"]) +hallen.append(["02005", "AC3"]) +hallen.append(["02006", "AC4"]) +hallen.append(["02007", "M2"]) +hallen.append(["02012", "W1"]) +hallen.append(["02014", "W3"]) +hallen.append(["02018", "R"]) +hallen.append(["02019", "ST1"]) +hallen.append(["02020", "ST2"]) +hallen.append(["02022", "E1"]) +hallen.append(["02023", "E2"]) +hallen.append(["02029", "KSPH"]) +hallen.append(["02030", "BIRK"]) +hallen.append(["02031", "GÜRZ"]) +hallen.append(["02034", "NIE Gesamtschule"]) +hallen.append(["02035", "JÜL Schulzentrum"]) +hallen.append(["02036", "JÜL/B Berufsschule"]) +hallen.append(["02039", "A1 Gesamtschule"]) +hallen.append(["02042", "S1 Am Weiher"]) +hallen.append(["02044", "ÜP1 Barbarastrasse"]) +hallen.append(["02048", "EY Eynatten/B"]) +hallen.append(["02050", "AC7 Bergische Gasse"]) +hallen.append(["02051", "LIN/B Bendenweg"]) +hallen.append(["02061", "E5 Berufsschule"]) +hallen.append(["02062", "R1 Rosentalstrasse 38a"]) +hallen.append(["2106", "Tann."]) +hallen.append(["2110", "Ringstr."]) +hallen.append(["2115", "Königswinter"]) +hallen.append(["2118", "Neuenhof"]) +hallen.append(["2123", "Niederpleis II"]) +hallen.append(["2124", "Hennef West"]) +hallen.append(["2126", "Gymn."]) +hallen.append(["2138", "Marienschule EU"]) +hallen.append(["2140", "Eusk."]) +hallen.append(["2145", "Heimer."]) +hallen.append(["2147", "Hauptschule Kall"]) +hallen.append(["2154", "Bertold-Brecht-Gesamtschule"]) +hallen.append(["2155", "Oberpl."]) +hallen.append(['02013', 'W2 Krottstrasse']) +hallen.append(['1170', 'E6 Bergrath']) +hallen.append(['4112', 'Bergn.']) +hallen.append(['3161', 'BergGl.']) +hallen.append(['4122', 'Vossbr.']) +hallen.append(['2112', 'Beuel']) +hallen.append(['4104', 'GWN']) +hallen.append(['4101', 'Nut.']) +hallen.append(['3117', 'Pulh.']) +hallen.append(['2122', 'Nieder']) +hallen.append(['2131', 'Rund.']) +hallen.append(['02025', 'E4']) +hallen.append(['4120', 'SpH DBG']) +hallen.append(['4106', 'Eugen.']) +hallen.append(['3109', 'Europ.']) +hallen.append(['02024', 'E3']) +hallen.append(["5025", "GHZ2"]) +hallen.append(["4231", "Mühlh."]) +hallen.append(["3431", "Haan"]) +hallen.append(["7011", "Mönch."]) +hallen.append(["3152", "Wahn"]) +hallen.append(["3137", "Bockl."]) +hallen.append(["4112", "Bergn."]) +hallen.append(["3142", "Europ."]) +hallen.append(["3154", "Rheind."]) +hallen.append(["4114", "Marien."]) +hallen.append(["2114", "Rheinb."]) +hallen.append(["1133", "Lange."]) +hallen.append(["6801", "Kerken"]) +hallen.append(["6904", "Lank"]) +hallen.append(["2142", "HGH BM"]) +hallen.append(["03043", "Zülp. I"]) +hallen.append(["03039", "Kuch."]) +hallen.append(["03044", "Heim."]) +hallen.append(["06012", "FBH"]) +hallen.append(["06078", "Nipp."]) +hallen.append(["07002", "Scharf."]) +hallen.append(["08035", "Hilfarth"]) +hallen.append(["02049", "LAII"]) +hallen.append(["02046", "R1"]) +hallen.append(["06051", "Wahn"]) +hallen.append(["06024", "Wess."]) +hallen.append(["06076", "BTV SZ."]) +hallen.append(["06034", "Bicken."]) + + +link_nächsteSpiele = "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/clubMeetings?searchType=0&searchTimeRange=5&searchTimeRangeFrom=&searchTimeRangeTo=&selectedTeamId=WONoSelectionString&club=74726&searchMeetings=Suchen" +REQUEST_TIMEOUT = 20 +REQUEST_SESSION = requests.Session() +WHITESPACE_RE = re.compile(r"(\s{2,})") +POKAL_RE = re.compile(r".\-(\w*)\s.*Jgd") +LEAGUE_CLASS_RE = re.compile( + r"\b(Regionalliga|Regionsoberliga|Regionsliga|Verbandsliga|Oberliga|Landesliga|" + r"Bezirksliga|Kreisliga|Kreisliga|Kreisklasse)(?:\s+\d+)?\b", + re.IGNORECASE, +) +REGION_SEASON_PREFIX_RE = re.compile( + r"^[A-Za-zÄÖÜäöüß]+(?:/[A-Za-zÄÖÜäöüß]+)+\s+\d{4}/\d{2}\s*", + re.IGNORECASE, +) + +def halle(nummer): + for x in range(len(hallen)): + if str(nummer)==hallen[x][0]: + answer = re.split(r'\s+',hallen[x][1])[0] + break + else: + answer = nummer + return(answer) + +def tabelle(team): + #print(team[Alias] +' '+ team[LigaLink]) + data = fetch_table(team[LigaLink], 0) + return [row[1:10] for row in data] + +def getliga(team): + soup = fetch_soup(team[LigaLink]) + abschnitt = soup.find(id="content-col1") + if abschnitt is None: + return "" + + text = abschnitt.get_text(" ", strip=True) + text = re.split(r"\bBemerkungen?\b", text, maxsplit=1, flags=re.IGNORECASE)[0].strip() + text = re.split(r"\bTabelle\s+und\s+Spielplan\b", text, maxsplit=1, flags=re.IGNORECASE)[0].strip() + text = REGION_SEASON_PREFIX_RE.sub("", text).strip() + + match = LEAGUE_CLASS_RE.search(text) + if match: + name = match.group(0) + else: + # Fallback: only first compact chunk, never full text block. + name = re.split(r"\s{2,}| - | \| ", text, maxsplit=1)[0].strip() + + name=re.sub(r"m[ABCDEF]-Jugend", '', name) + name=name.replace(' Männer','') + name=name.replace(' Frauen','') + name=re.sub(r"\w\-Jugend", '', name) + name=re.sub(r"Jugend\sB", '', name) + name=re.sub(r"(männliche)|(weibliche)", '', name) + name=re.sub(r"[mw][ABCDEF]\s", '', name) + name=name.replace(' Spielform','') + #print(name) + return(name) + +def teamspielplan(team): + data = fetch_table(team[TeamLink], 1) + return [row[0:8] for row in data] + +def Teamspielplan_kuerzen(Mannschaft): + #if "Eschweiler" in Mannschaft: + # Mannschaft ='\\textbf{'+Mannschaft+'}' + if "Borussia Brand (o.W.)" in Mannschaft: + Mannschaft="Bor. Brand (o.W.)" + if "VfR Übach-Palenberg (o.W.)" in Mannschaft: + Mannschaft="Übach-Palenberg (oW)" + if "Schwarz-Rot Aachen (o.W.)" in Mannschaft: + Mannschaft="SR Aachen(o.W.)" + if "SSV Nümbrecht Handball II" in Mannschaft: + Mannschaft="SSV Nümbrecht II" + if "HSG Marienheide/Müllenbach" in Mannschaft: + Mannschaft="HSG Marienheide/Mb" + if "HSG Geislar-Oberkassel" in Mannschaft: + Mannschaft="Geislar-Oberkassel" + if "HBD Löwen Oberberg II" in Mannschaft: + Mannschaft="HBD Oberberg II" + if "JSG Hiesfeld/Aldenrade" in Mannschaft: + Mannschaft="JSG Hiesfeld/Alden." + if "VfR Übach-Palenberg (a.K.)" in Mannschaft: + Mannschaft="VfR Übach-Palenberg a.K." + if "VfR Übach-Palenberg Handball e.V." in Mannschaft: + Mannschaft="VfR Übach-Palenberg" + if "ASV Rurtal Hückelhoven" in Mannschaft: + Mannschaft="ASV Rurtal" + if "HSG Merkstein / Bardenberg III" in Mannschaft: + Mannschaft="HSG Merk./Barden. III" + if "HSG Merkstein / Bardenberg IV" in Mannschaft: + Mannschaft="HSG Merk./Barden. IV" + if "JSG Bardenberg / Merkstein" in Mannschaft: + Mannschaft="JSG Bardenb./Merk." + return(Mannschaft) + +def fetch_table(Link, nummer): + soup = fetch_soup(Link) + tables = soup.find_all('table', attrs={'class':'result-set'}) + if nummer >= len(tables): + return [] + + table = tables[nummer] + rows = table.find_all('tr') + if not rows: + return [] + + header_cells = rows[0].find_all(['th', 'td']) + header = [cell.text.strip().lower() for cell in header_cells] + skip_indices = {index for index, title in enumerate(header) if "bemerkung" in title} + + data = [] + for row in rows[1:]: + cols = [ele.text.strip() for ele in row.find_all('td')] + if skip_indices: + cols = [value for index, value in enumerate(cols) if index not in skip_indices] + data.append(cols) + return(data) + +def fetch_soup(link): + try: + response = REQUEST_SESSION.get(link, timeout=REQUEST_TIMEOUT) + except requests.RequestException: + print(link) + return BeautifulSoup("", 'html.parser') + Fi = str(response.content.decode('utf-8', 'ignore')) + Fi_cleaned = WHITESPACE_RE.sub(' ', str(Fi)) + return BeautifulSoup(Fi_cleaned, 'html.parser') + +def naechsteSpiele(number): + data=fetch_table(link_nächsteSpiele,0) + for x in range(len(data)): + data[x]=data[x][0:8] + CTR = 0 + for x in range(len(data)): + if data[x][0] != '': + CTR+=1 + if CTR == number: + break + tabelle=data[:x] + for x in range(len(tabelle)): + if tabelle[x][3] != '': + tabelle[x][3]=halle(tabelle[x][3]) + for x in range(len(tabelle)): + match=list(filter(lambda y:y[KürzelLiga]==tabelle[x][5], team)) + if match != []: + #Zwei Mannschaften in einer Liga + if (tabelle[x][5]=="KK 2 M"): + if (tabelle[x][6] =="Eschweiler SG IV"): + tabelle[x][6] = '\\textbf{4.Herren}' + elif (tabelle[x][7] =="Eschweiler SG IV"): + tabelle[x][7] = '\\textbf{4.Herren}' + if (tabelle[x][5]=="KL M"): + if (tabelle[x][6] =="Eschweiler SG II"): + tabelle[x][6] = '\\textbf{2.Herren}' + elif (tabelle[x][7] =="Eschweiler SG II"): + tabelle[x][7] = '\\textbf{2.Herren}' + if tabelle[x][6].startswith('Eschweiler'): + tabelle[x][6]= '\\textbf{' + match[0][Alias] + '}' + elif tabelle[x][7].startswith('Eschweiler'): + tabelle[x][7]= '\\textbf{' + match[0][Alias] + '}' + else: + print(tabelle[x][5]) + continue + if (tabelle[x][5]=="FSF"): + if tabelle[x][6].startswith('Eschweiler'): + tabelle[x][6] = '\\textbf{F-Jugend}' + elif tabelle[x][7].startswith('Eschweiler'): + tabelle[x][7] = '\\textbf{F-Jugend}' + if (tabelle[x][5]=="Mi"): + if tabelle[x][6].startswith('Eschweiler'): + tabelle[x][6] = '\\textbf{Minis}' + else: + tabelle[x][7] = '\\textbf{Minis}' + pokal=POKAL_RE.findall(str(tabelle[x][5])) + if (len(pokal) != 0 and tabelle[x][6] !="spielfrei" and tabelle[x][7] !="spielfrei"): + #print('POKAL!!!', pokal) + parse=re.compile(r"\sSG\s(I.*)") + #print(tabelle[x][5], tabelle[x][6], tabelle[x][7]) + if tabelle[x][6].startswith('Eschweiler'): + #print("erl.: ", str(pokal[0][0]), " ", str(pokal[0][1])) + nummer=parse.findall(str(tabelle[x][6])) + if len(nummer) == 0: + nummer = 'I' + else: + nummer=nummer[0] + #print('Nummer', nummer) + if pokal[0][1] == 'M': + nummer=len(nummer)-1 + tabelle[x][6] = "\\textbf{Pokal "+ team[nummer][Alias]+"}" + elif pokal[0][1] == 'F': + tabelle[x][6] = "\\textbf{Pokal Damen}" + else: + tabelle[x][6] = "\\textbf{"+ tabelle[x][5]+"}" + if tabelle[x][7].startswith('Eschweiler'): + nummer=parse.findall(str(tabelle[x][7])) + #print('Nummer', nummer) + if pokal[0][1] == 'M': + nummer=len(nummer)-1 + tabelle[x][6] = "\\textbf{Pokal "+ team[nummer][Alias]+"}" + elif pokal[0][1] == 'F': + tabelle[x][7] = "\\textbf{Damen Pokal}" + else: + tabelle[x][7] = "\\textbf{"+ tabelle[x][5]+"}" + tabelle[x][6]=Teamspielplan_kuerzen(tabelle[x][6]) + tabelle[x][7]=Teamspielplan_kuerzen(tabelle[x][7]) + tabelle = [x for x in tabelle if x[0] != 'Termin offen'] + last_date='' + for x in range(len(tabelle)): + if x == 0: + last_date=tabelle[x][1] + else: + if tabelle[x][1] == '': + #print('cont') + continue + else: + if tabelle[x][1] == last_date: + tabelle[x][0] = '' + tabelle[x][1] = '' + else: + last_date = tabelle[x][1] + #print(last_date) + return(tabelle) diff --git a/ids.py b/ids.py index 7eebbef..6982aa4 100644 --- a/ids.py +++ b/ids.py @@ -1,8 +1,6 @@ import requests import re from bs4 import BeautifulSoup -import pandas as pd -import numpy as np #from hallen import * NameTabelle=0 Alias=1 @@ -125,6 +123,19 @@ hallen.append(["06034", "Bicken."]) link_nächsteSpiele = "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/clubMeetings?searchType=0&searchTimeRange=5&searchTimeRangeFrom=&searchTimeRangeTo=&selectedTeamId=WONoSelectionString&club=74726&searchMeetings=Suchen" +REQUEST_TIMEOUT = 20 +REQUEST_SESSION = requests.Session() +WHITESPACE_RE = re.compile(r"(\s{2,})") +POKAL_RE = re.compile(r".\-(\w*)\s.*Jgd") +LEAGUE_CLASS_RE = re.compile( + r"\b(Regionalliga|Regionsoberliga|Regionsliga|Verbandsliga|Oberliga|Landesliga|" + r"Bezirksliga|Kreisliga|Kreisliga|Kreisklasse)(?:\s+\d+)?\b", + re.IGNORECASE, +) +REGION_SEASON_PREFIX_RE = re.compile( + r"^[A-Za-zÄÖÜäöüß]+(?:/[A-Za-zÄÖÜäöüß]+)+\s+\d{4}/\d{2}\s*", + re.IGNORECASE, +) def halle(nummer): for x in range(len(hallen)): @@ -137,29 +148,27 @@ def halle(nummer): def tabelle(team): #print(team[Alias] +' '+ team[LigaLink]) - data=fetch_table(team[LigaLink], 0) - for x in range(len(data)): - data[x]=data[x][1:10] - return(data) + data = fetch_table(team[LigaLink], 0) + return [row[1:10] for row in data] def getliga(team): - try: - response = requests.get(team[LigaLink]) - except: - print(team) - Fi =str(response.content.decode('utf-8', 'ignore')) - Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi)) - soup = BeautifulSoup(Fi_cleaned, 'html.parser') + soup = fetch_soup(team[LigaLink]) abschnitt = soup.find(id="content-col1") - #print("abschnitt") - #print(abschnitt) - parse=re.compile(r"br\/>\s(.*)\s\
") - name=str(parse.findall(str(abschnitt))[0]) - #print("Name") - #print(name) - if len(name) > 20: - if re.search(r'((.*)\s)\
',str(name)) is not None: - name=re.search(r'((.*)\s)\
',str(name)).group(1) + if abschnitt is None: + return "" + + text = abschnitt.get_text(" ", strip=True) + text = re.split(r"\bBemerkungen?\b", text, maxsplit=1, flags=re.IGNORECASE)[0].strip() + text = re.split(r"\bTabelle\s+und\s+Spielplan\b", text, maxsplit=1, flags=re.IGNORECASE)[0].strip() + text = REGION_SEASON_PREFIX_RE.sub("", text).strip() + + match = LEAGUE_CLASS_RE.search(text) + if match: + name = match.group(0) + else: + # Fallback: only first compact chunk, never full text block. + name = re.split(r"\s{2,}| - | \| ", text, maxsplit=1)[0].strip() + name=re.sub(r"m[ABCDEF]-Jugend", '', name) name=name.replace(' Männer','') name=name.replace(' Frauen','') @@ -172,27 +181,8 @@ def getliga(team): return(name) def teamspielplan(team): - data=fetch_table(team[TeamLink],1) - for x in range(len(data)): - data[x]=data[x][0:8] - response = requests.get(team[TeamLink]) - Fi =str(response.content.decode('utf-8', 'ignore')) - Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi)) - soup = BeautifulSoup(Fi_cleaned, 'html.parser') - nummern=[] - #print(Fi_cleaned) - #print(soup) - for link in soup.find_all('a'): - #print(link.contents) - try: - if len(link.contents[0]) == 4: - nummern.append(link.contents[0]) - except: - pass - del nummern[0] - #for x in range(len(nummern)): - # data[x][3]=halle(nummern[x]) - return(data) + data = fetch_table(team[TeamLink], 1) + return [row[0:8] for row in data] def Teamspielplan_kuerzen(Mannschaft): #if "Eschweiler" in Mannschaft: @@ -228,30 +218,40 @@ def Teamspielplan_kuerzen(Mannschaft): return(Mannschaft) def fetch_table(Link, nummer): - #print(Link) - try: - response = requests.get(Link) - except: - print(Link) - Fi =str(response.content.decode('utf-8', 'ignore')) - Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi)) - soup = BeautifulSoup(Fi_cleaned, 'html.parser') - data = [] - table = soup.find_all('table', attrs={'class':'result-set'})[nummer] + soup = fetch_soup(Link) + tables = soup.find_all('table', attrs={'class':'result-set'}) + if nummer >= len(tables): + return [] + + table = tables[nummer] rows = table.find_all('tr') - for row in rows: - cols = row.find_all('td') - cols = [ele.text.strip() for ele in cols] - data.append([ele for ele in cols]) - del data[0] + if not rows: + return [] + + header_cells = rows[0].find_all(['th', 'td']) + header = [cell.text.strip().lower() for cell in header_cells] + skip_indices = {index for index, title in enumerate(header) if "bemerkung" in title} + + data = [] + for row in rows[1:]: + cols = [ele.text.strip() for ele in row.find_all('td')] + if skip_indices: + cols = [value for index, value in enumerate(cols) if index not in skip_indices] + data.append(cols) return(data) +def fetch_soup(link): + try: + response = REQUEST_SESSION.get(link, timeout=REQUEST_TIMEOUT) + except requests.RequestException: + print(link) + return BeautifulSoup("", 'html.parser') + Fi = str(response.content.decode('utf-8', 'ignore')) + Fi_cleaned = WHITESPACE_RE.sub(' ', str(Fi)) + return BeautifulSoup(Fi_cleaned, 'html.parser') + def naechsteSpiele(number): data=fetch_table(link_nächsteSpiele,0) - response = requests.get(link_nächsteSpiele) - Fi =str(response.content.decode('utf-8', 'ignore')) - Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi)) - soup = BeautifulSoup(Fi_cleaned, 'html.parser') for x in range(len(data)): data[x]=data[x][0:8] CTR = 0 @@ -261,12 +261,6 @@ def naechsteSpiele(number): if CTR == number: break tabelle=data[:x] - links=[] - nummern=[] - #for link in soup.find_all('a'): - # if len(link.contents[0]) == 4: - # nummern.append(link.contents[0]) - #del nummern[0] for x in range(len(tabelle)): if tabelle[x][3] != '': tabelle[x][3]=halle(tabelle[x][3]) @@ -301,9 +295,7 @@ def naechsteSpiele(number): tabelle[x][6] = '\\textbf{Minis}' else: tabelle[x][7] = '\\textbf{Minis}' - parse=re.compile(r".\-(\w*)\s.*Jgd") - pokal=parse.findall(str(tabelle[x][5])) - print(pokal) + pokal=POKAL_RE.findall(str(tabelle[x][5])) if (len(pokal) != 0 and tabelle[x][6] !="spielfrei" and tabelle[x][7] !="spielfrei"): #print('POKAL!!!', pokal) parse=re.compile(r"\sSG\s(I.*)") @@ -346,9 +338,9 @@ def naechsteSpiele(number): continue else: if tabelle[x][1] == last_date: - tabelle[x][0] =='' - tabelle[x][1] == '' + tabelle[x][0] = '' + tabelle[x][1] = '' else: last_date = tabelle[x][1] #print(last_date) - return(tabelle) \ No newline at end of file + return(tabelle) diff --git a/tabelle.pdf b/tabelle.pdf index c2f34b9..d68955d 100644 Binary files a/tabelle.pdf and b/tabelle.pdf differ diff --git a/tabelle.tex b/tabelle.tex index 0be0806..87f9a7b 100644 --- a/tabelle.tex +++ b/tabelle.tex @@ -38,7 +38,7 @@ Link zur liga\end{figure}\end{textblock} \begin{center} \vspace{0cm} - {\scalefont{3}1. Herren}\\\scalefont{2}Verbandsliga Gr. 5\\ + {\scalefont{3}1. Herren}\\\scalefont{2}Verbandsliga\\ \vspace{0cm} \end{center} \rowcolors{2}{gray!25}{white} @@ -356,7 +356,7 @@ Sa.&21.03.2026&17:00&VfR Übach-Palenberg&\textbf{Eschweiler SG II}&&ÜP1\\ Link zur liga\end{figure}\end{textblock} \begin{center} \vspace{0cm} - {\scalefont{3}mB-Jugend}\\\scalefont{2}Regionsoberliga
Tabelle und Spielplan (Aktuell)

Bemerkungen

Die Finalturniere finden am Wochenende des 14./15.03.26 statt. Über die Zusammenstellung der Turniere wird Ende des Jahres entschieden. \\ + {\scalefont{3}mB-Jugend}\\\scalefont{2}Regionsoberliga\\ \vspace{0cm} \end{center} \rowcolors{2}{gray!25}{white} @@ -400,7 +400,7 @@ Di.&10.03.2026&19:00&HSG Münsterbachtal&\textbf{Eschweiler SG}&&AC3\\ Link zur liga\end{figure}\end{textblock} \begin{center} \vspace{0cm} - {\scalefont{3}mC-Jugend}\\\scalefont{2}Regionsoberliga Jugend C\\ + {\scalefont{3}mC-Jugend}\\\scalefont{2}Regionsoberliga\\ \vspace{0cm} \end{center} \rowcolors{2}{gray!25}{white} @@ -443,7 +443,7 @@ Sa.&14.03.2026&11:00&HC Eynatten/Raeren&\textbf{Eschweiler SG}&&EY\\ Link zur liga\end{figure}\end{textblock} \begin{center} \vspace{0cm} - {\scalefont{3}mC-Jugend II}\\\scalefont{2}Regionsliga Jugend C\\ + {\scalefont{3}mC-Jugend II}\\\scalefont{2}Regionsliga\\ \vspace{0cm} \end{center} \rowcolors{2}{gray!25}{white} @@ -489,7 +489,7 @@ So.&15.03.2026&15:15&\textbf{Eschweiler SG II}&VfR Übach-Palenberg&&E2\\ Link zur liga\end{figure}\end{textblock} \begin{center} \vspace{0cm} - {\scalefont{3}gD-Jugend}\\\scalefont{2}Regionsoberliga gemischte Jugend D Gr.1\\ + {\scalefont{3}gD-Jugend}\\\scalefont{2}Regionsoberliga\\ \vspace{0cm} \end{center} \rowcolors{2}{gray!25}{white} @@ -532,7 +532,7 @@ Sa.&28.02.2026&13:30&SV Eilendorf&\textbf{Eschweiler SG}&23:33&AC3\\ Link zur liga\end{figure}\end{textblock} \begin{center} \vspace{0cm} - {\scalefont{3}gD-Jugend II}\\\scalefont{2}Regionsoberliga gemischte Jugend D Gr.2\\ + {\scalefont{3}gD-Jugend II}\\\scalefont{2}Regionsoberliga\\ \vspace{0cm} \end{center} \rowcolors{2}{gray!25}{white} @@ -575,7 +575,7 @@ Sa.&28.02.2026&09:30&HC Eynatten/Raeren&\textbf{Eschweiler SG II}&17:16&EY\\ Link zur liga\end{figure}\end{textblock} \begin{center} \vspace{0cm} - {\scalefont{3}gE-Jugend}\\\scalefont{2}Regionsoberliga gemischte Jugend E\\ + {\scalefont{3}gE-Jugend}\\\scalefont{2}Regionsoberliga\\ \vspace{0cm} \end{center} \rowcolors{2}{gray!25}{white} @@ -618,7 +618,7 @@ So.&15.03.2026&09:30&BTB Aachen&\textbf{Eschweiler SG}&&AC2\\ Link zur liga\end{figure}\end{textblock} \begin{center} \vspace{0cm} - {\scalefont{3}gE-Jugend II}\\\scalefont{2}Regionsliga gemischte Jugend E\\ + {\scalefont{3}gE-Jugend II}\\\scalefont{2}Regionsliga\\ \vspace{0cm} \end{center} \rowcolors{2}{gray!25}{white}