optimized

This commit is contained in:
Norbert
2026-03-07 09:23:52 +01:00
parent d343a36b97
commit 8efbace2ec
4 changed files with 420 additions and 82 deletions

140
ids.py
View File

@@ -1,8 +1,6 @@
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
#from hallen import *
NameTabelle=0
Alias=1
@@ -125,6 +123,19 @@ hallen.append(["06034", "Bicken."])
link_nächsteSpiele = "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/clubMeetings?searchType=0&searchTimeRange=5&searchTimeRangeFrom=&searchTimeRangeTo=&selectedTeamId=WONoSelectionString&club=74726&searchMeetings=Suchen"
REQUEST_TIMEOUT = 20
REQUEST_SESSION = requests.Session()
WHITESPACE_RE = re.compile(r"(\s{2,})")
POKAL_RE = re.compile(r".\-(\w*)\s.*Jgd")
LEAGUE_CLASS_RE = re.compile(
r"\b(Regionalliga|Regionsoberliga|Regionsliga|Verbandsliga|Oberliga|Landesliga|"
r"Bezirksliga|Kreisliga|Kreisliga|Kreisklasse)(?:\s+\d+)?\b",
re.IGNORECASE,
)
REGION_SEASON_PREFIX_RE = re.compile(
r"^[A-Za-zÄÖÜäöüß]+(?:/[A-Za-zÄÖÜäöüß]+)+\s+\d{4}/\d{2}\s*",
re.IGNORECASE,
)
def halle(nummer):
for x in range(len(hallen)):
@@ -137,29 +148,27 @@ def halle(nummer):
def tabelle(team):
#print(team[Alias] +' '+ team[LigaLink])
data=fetch_table(team[LigaLink], 0)
for x in range(len(data)):
data[x]=data[x][1:10]
return(data)
data = fetch_table(team[LigaLink], 0)
return [row[1:10] for row in data]
def getliga(team):
try:
response = requests.get(team[LigaLink])
except:
print(team)
Fi =str(response.content.decode('utf-8', 'ignore'))
Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi))
soup = BeautifulSoup(Fi_cleaned, 'html.parser')
soup = fetch_soup(team[LigaLink])
abschnitt = soup.find(id="content-col1")
#print("abschnitt")
#print(abschnitt)
parse=re.compile(r"br\/>\s(.*)\s\<br/>")
name=str(parse.findall(str(abschnitt))[0])
#print("Name")
#print(name)
if len(name) > 20:
if re.search(r'((.*)\s)\<br/>',str(name)) is not None:
name=re.search(r'((.*)\s)\<br/>',str(name)).group(1)
if abschnitt is None:
return ""
text = abschnitt.get_text(" ", strip=True)
text = re.split(r"\bBemerkungen?\b", text, maxsplit=1, flags=re.IGNORECASE)[0].strip()
text = re.split(r"\bTabelle\s+und\s+Spielplan\b", text, maxsplit=1, flags=re.IGNORECASE)[0].strip()
text = REGION_SEASON_PREFIX_RE.sub("", text).strip()
match = LEAGUE_CLASS_RE.search(text)
if match:
name = match.group(0)
else:
# Fallback: only first compact chunk, never full text block.
name = re.split(r"\s{2,}| - | \| ", text, maxsplit=1)[0].strip()
name=re.sub(r"m[ABCDEF]-Jugend", '', name)
name=name.replace(' Männer','')
name=name.replace(' Frauen','')
@@ -172,27 +181,8 @@ def getliga(team):
return(name)
def teamspielplan(team):
data=fetch_table(team[TeamLink],1)
for x in range(len(data)):
data[x]=data[x][0:8]
response = requests.get(team[TeamLink])
Fi =str(response.content.decode('utf-8', 'ignore'))
Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi))
soup = BeautifulSoup(Fi_cleaned, 'html.parser')
nummern=[]
#print(Fi_cleaned)
#print(soup)
for link in soup.find_all('a'):
#print(link.contents)
try:
if len(link.contents[0]) == 4:
nummern.append(link.contents[0])
except:
pass
del nummern[0]
#for x in range(len(nummern)):
# data[x][3]=halle(nummern[x])
return(data)
data = fetch_table(team[TeamLink], 1)
return [row[0:8] for row in data]
def Teamspielplan_kuerzen(Mannschaft):
#if "Eschweiler" in Mannschaft:
@@ -228,30 +218,40 @@ def Teamspielplan_kuerzen(Mannschaft):
return(Mannschaft)
def fetch_table(Link, nummer):
#print(Link)
try:
response = requests.get(Link)
except:
print(Link)
Fi =str(response.content.decode('utf-8', 'ignore'))
Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi))
soup = BeautifulSoup(Fi_cleaned, 'html.parser')
data = []
table = soup.find_all('table', attrs={'class':'result-set'})[nummer]
soup = fetch_soup(Link)
tables = soup.find_all('table', attrs={'class':'result-set'})
if nummer >= len(tables):
return []
table = tables[nummer]
rows = table.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [ele.text.strip() for ele in cols]
data.append([ele for ele in cols])
del data[0]
if not rows:
return []
header_cells = rows[0].find_all(['th', 'td'])
header = [cell.text.strip().lower() for cell in header_cells]
skip_indices = {index for index, title in enumerate(header) if "bemerkung" in title}
data = []
for row in rows[1:]:
cols = [ele.text.strip() for ele in row.find_all('td')]
if skip_indices:
cols = [value for index, value in enumerate(cols) if index not in skip_indices]
data.append(cols)
return(data)
def fetch_soup(link):
try:
response = REQUEST_SESSION.get(link, timeout=REQUEST_TIMEOUT)
except requests.RequestException:
print(link)
return BeautifulSoup("", 'html.parser')
Fi = str(response.content.decode('utf-8', 'ignore'))
Fi_cleaned = WHITESPACE_RE.sub(' ', str(Fi))
return BeautifulSoup(Fi_cleaned, 'html.parser')
def naechsteSpiele(number):
data=fetch_table(link_nächsteSpiele,0)
response = requests.get(link_nächsteSpiele)
Fi =str(response.content.decode('utf-8', 'ignore'))
Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi))
soup = BeautifulSoup(Fi_cleaned, 'html.parser')
for x in range(len(data)):
data[x]=data[x][0:8]
CTR = 0
@@ -261,12 +261,6 @@ def naechsteSpiele(number):
if CTR == number:
break
tabelle=data[:x]
links=[]
nummern=[]
#for link in soup.find_all('a'):
# if len(link.contents[0]) == 4:
# nummern.append(link.contents[0])
#del nummern[0]
for x in range(len(tabelle)):
if tabelle[x][3] != '':
tabelle[x][3]=halle(tabelle[x][3])
@@ -301,9 +295,7 @@ def naechsteSpiele(number):
tabelle[x][6] = '\\textbf{Minis}'
else:
tabelle[x][7] = '\\textbf{Minis}'
parse=re.compile(r".\-(\w*)\s.*Jgd")
pokal=parse.findall(str(tabelle[x][5]))
print(pokal)
pokal=POKAL_RE.findall(str(tabelle[x][5]))
if (len(pokal) != 0 and tabelle[x][6] !="spielfrei" and tabelle[x][7] !="spielfrei"):
#print('POKAL!!!', pokal)
parse=re.compile(r"\sSG\s(I.*)")
@@ -346,9 +338,9 @@ def naechsteSpiele(number):
continue
else:
if tabelle[x][1] == last_date:
tabelle[x][0] ==''
tabelle[x][1] == ''
tabelle[x][0] = ''
tabelle[x][1] = ''
else:
last_date = tabelle[x][1]
#print(last_date)
return(tabelle)
return(tabelle)