optimized
This commit is contained in:
140
ids.py
140
ids.py
@@ -1,8 +1,6 @@
|
||||
import requests
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
#from hallen import *
|
||||
NameTabelle=0
|
||||
Alias=1
|
||||
@@ -125,6 +123,19 @@ hallen.append(["06034", "Bicken."])
|
||||
|
||||
|
||||
link_nächsteSpiele = "https://hnr-handball.liga.nu/cgi-bin/WebObjects/nuLigaHBDE.woa/wa/clubMeetings?searchType=0&searchTimeRange=5&searchTimeRangeFrom=&searchTimeRangeTo=&selectedTeamId=WONoSelectionString&club=74726&searchMeetings=Suchen"
|
||||
REQUEST_TIMEOUT = 20
|
||||
REQUEST_SESSION = requests.Session()
|
||||
WHITESPACE_RE = re.compile(r"(\s{2,})")
|
||||
POKAL_RE = re.compile(r".\-(\w*)\s.*Jgd")
|
||||
LEAGUE_CLASS_RE = re.compile(
|
||||
r"\b(Regionalliga|Regionsoberliga|Regionsliga|Verbandsliga|Oberliga|Landesliga|"
|
||||
r"Bezirksliga|Kreisliga|Kreisliga|Kreisklasse)(?:\s+\d+)?\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
REGION_SEASON_PREFIX_RE = re.compile(
|
||||
r"^[A-Za-zÄÖÜäöüß]+(?:/[A-Za-zÄÖÜäöüß]+)+\s+\d{4}/\d{2}\s*",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
def halle(nummer):
|
||||
for x in range(len(hallen)):
|
||||
@@ -137,29 +148,27 @@ def halle(nummer):
|
||||
|
||||
def tabelle(team):
|
||||
#print(team[Alias] +' '+ team[LigaLink])
|
||||
data=fetch_table(team[LigaLink], 0)
|
||||
for x in range(len(data)):
|
||||
data[x]=data[x][1:10]
|
||||
return(data)
|
||||
data = fetch_table(team[LigaLink], 0)
|
||||
return [row[1:10] for row in data]
|
||||
|
||||
def getliga(team):
|
||||
try:
|
||||
response = requests.get(team[LigaLink])
|
||||
except:
|
||||
print(team)
|
||||
Fi =str(response.content.decode('utf-8', 'ignore'))
|
||||
Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi))
|
||||
soup = BeautifulSoup(Fi_cleaned, 'html.parser')
|
||||
soup = fetch_soup(team[LigaLink])
|
||||
abschnitt = soup.find(id="content-col1")
|
||||
#print("abschnitt")
|
||||
#print(abschnitt)
|
||||
parse=re.compile(r"br\/>\s(.*)\s\<br/>")
|
||||
name=str(parse.findall(str(abschnitt))[0])
|
||||
#print("Name")
|
||||
#print(name)
|
||||
if len(name) > 20:
|
||||
if re.search(r'((.*)\s)\<br/>',str(name)) is not None:
|
||||
name=re.search(r'((.*)\s)\<br/>',str(name)).group(1)
|
||||
if abschnitt is None:
|
||||
return ""
|
||||
|
||||
text = abschnitt.get_text(" ", strip=True)
|
||||
text = re.split(r"\bBemerkungen?\b", text, maxsplit=1, flags=re.IGNORECASE)[0].strip()
|
||||
text = re.split(r"\bTabelle\s+und\s+Spielplan\b", text, maxsplit=1, flags=re.IGNORECASE)[0].strip()
|
||||
text = REGION_SEASON_PREFIX_RE.sub("", text).strip()
|
||||
|
||||
match = LEAGUE_CLASS_RE.search(text)
|
||||
if match:
|
||||
name = match.group(0)
|
||||
else:
|
||||
# Fallback: only first compact chunk, never full text block.
|
||||
name = re.split(r"\s{2,}| - | \| ", text, maxsplit=1)[0].strip()
|
||||
|
||||
name=re.sub(r"m[ABCDEF]-Jugend", '', name)
|
||||
name=name.replace(' Männer','')
|
||||
name=name.replace(' Frauen','')
|
||||
@@ -172,27 +181,8 @@ def getliga(team):
|
||||
return(name)
|
||||
|
||||
def teamspielplan(team):
|
||||
data=fetch_table(team[TeamLink],1)
|
||||
for x in range(len(data)):
|
||||
data[x]=data[x][0:8]
|
||||
response = requests.get(team[TeamLink])
|
||||
Fi =str(response.content.decode('utf-8', 'ignore'))
|
||||
Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi))
|
||||
soup = BeautifulSoup(Fi_cleaned, 'html.parser')
|
||||
nummern=[]
|
||||
#print(Fi_cleaned)
|
||||
#print(soup)
|
||||
for link in soup.find_all('a'):
|
||||
#print(link.contents)
|
||||
try:
|
||||
if len(link.contents[0]) == 4:
|
||||
nummern.append(link.contents[0])
|
||||
except:
|
||||
pass
|
||||
del nummern[0]
|
||||
#for x in range(len(nummern)):
|
||||
# data[x][3]=halle(nummern[x])
|
||||
return(data)
|
||||
data = fetch_table(team[TeamLink], 1)
|
||||
return [row[0:8] for row in data]
|
||||
|
||||
def Teamspielplan_kuerzen(Mannschaft):
|
||||
#if "Eschweiler" in Mannschaft:
|
||||
@@ -228,30 +218,40 @@ def Teamspielplan_kuerzen(Mannschaft):
|
||||
return(Mannschaft)
|
||||
|
||||
def fetch_table(Link, nummer):
|
||||
#print(Link)
|
||||
try:
|
||||
response = requests.get(Link)
|
||||
except:
|
||||
print(Link)
|
||||
Fi =str(response.content.decode('utf-8', 'ignore'))
|
||||
Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi))
|
||||
soup = BeautifulSoup(Fi_cleaned, 'html.parser')
|
||||
data = []
|
||||
table = soup.find_all('table', attrs={'class':'result-set'})[nummer]
|
||||
soup = fetch_soup(Link)
|
||||
tables = soup.find_all('table', attrs={'class':'result-set'})
|
||||
if nummer >= len(tables):
|
||||
return []
|
||||
|
||||
table = tables[nummer]
|
||||
rows = table.find_all('tr')
|
||||
for row in rows:
|
||||
cols = row.find_all('td')
|
||||
cols = [ele.text.strip() for ele in cols]
|
||||
data.append([ele for ele in cols])
|
||||
del data[0]
|
||||
if not rows:
|
||||
return []
|
||||
|
||||
header_cells = rows[0].find_all(['th', 'td'])
|
||||
header = [cell.text.strip().lower() for cell in header_cells]
|
||||
skip_indices = {index for index, title in enumerate(header) if "bemerkung" in title}
|
||||
|
||||
data = []
|
||||
for row in rows[1:]:
|
||||
cols = [ele.text.strip() for ele in row.find_all('td')]
|
||||
if skip_indices:
|
||||
cols = [value for index, value in enumerate(cols) if index not in skip_indices]
|
||||
data.append(cols)
|
||||
return(data)
|
||||
|
||||
def fetch_soup(link):
|
||||
try:
|
||||
response = REQUEST_SESSION.get(link, timeout=REQUEST_TIMEOUT)
|
||||
except requests.RequestException:
|
||||
print(link)
|
||||
return BeautifulSoup("", 'html.parser')
|
||||
Fi = str(response.content.decode('utf-8', 'ignore'))
|
||||
Fi_cleaned = WHITESPACE_RE.sub(' ', str(Fi))
|
||||
return BeautifulSoup(Fi_cleaned, 'html.parser')
|
||||
|
||||
def naechsteSpiele(number):
|
||||
data=fetch_table(link_nächsteSpiele,0)
|
||||
response = requests.get(link_nächsteSpiele)
|
||||
Fi =str(response.content.decode('utf-8', 'ignore'))
|
||||
Fi_cleaned = re.sub(r'(\s{2,})',' ',str(Fi))
|
||||
soup = BeautifulSoup(Fi_cleaned, 'html.parser')
|
||||
for x in range(len(data)):
|
||||
data[x]=data[x][0:8]
|
||||
CTR = 0
|
||||
@@ -261,12 +261,6 @@ def naechsteSpiele(number):
|
||||
if CTR == number:
|
||||
break
|
||||
tabelle=data[:x]
|
||||
links=[]
|
||||
nummern=[]
|
||||
#for link in soup.find_all('a'):
|
||||
# if len(link.contents[0]) == 4:
|
||||
# nummern.append(link.contents[0])
|
||||
#del nummern[0]
|
||||
for x in range(len(tabelle)):
|
||||
if tabelle[x][3] != '':
|
||||
tabelle[x][3]=halle(tabelle[x][3])
|
||||
@@ -301,9 +295,7 @@ def naechsteSpiele(number):
|
||||
tabelle[x][6] = '\\textbf{Minis}'
|
||||
else:
|
||||
tabelle[x][7] = '\\textbf{Minis}'
|
||||
parse=re.compile(r".\-(\w*)\s.*Jgd")
|
||||
pokal=parse.findall(str(tabelle[x][5]))
|
||||
print(pokal)
|
||||
pokal=POKAL_RE.findall(str(tabelle[x][5]))
|
||||
if (len(pokal) != 0 and tabelle[x][6] !="spielfrei" and tabelle[x][7] !="spielfrei"):
|
||||
#print('POKAL!!!', pokal)
|
||||
parse=re.compile(r"\sSG\s(I.*)")
|
||||
@@ -346,9 +338,9 @@ def naechsteSpiele(number):
|
||||
continue
|
||||
else:
|
||||
if tabelle[x][1] == last_date:
|
||||
tabelle[x][0] ==''
|
||||
tabelle[x][1] == ''
|
||||
tabelle[x][0] = ''
|
||||
tabelle[x][1] = ''
|
||||
else:
|
||||
last_date = tabelle[x][1]
|
||||
#print(last_date)
|
||||
return(tabelle)
|
||||
return(tabelle)
|
||||
|
||||
Reference in New Issue
Block a user