157 lines
6.8 KiB
Python
157 lines
6.8 KiB
Python
from urllib.parse import urlsplit
|
|
from bs4 import BeautifulSoup
|
|
from datetime import datetime
|
|
import locale
|
|
|
|
from providers.base import BaseProvider
|
|
from lib.player import Player
|
|
|
|
|
|
class TransferMarkt(BaseProvider):
|
|
|
|
DOMAINS = {'www.transfermarkt.fr'}
|
|
CHARSET = 'UTF-8'
|
|
|
|
ROLES = {
|
|
'Gardien': Player.ROLE_GOALKEEPER,
|
|
'Défense': Player.ROLE_DEFENDER,
|
|
'Milieu de terrain': Player.ROLE_MIDFIELDER,
|
|
'Attaquant': Player.ROLE_ATTACKER
|
|
}
|
|
FEET = {
|
|
'droit': Player.FOOT_RIGHT,
|
|
'gauche': Player.FOOT_LEFT,
|
|
'des deux pieds': Player.FOOT_BOTH
|
|
}
|
|
|
|
@classmethod
|
|
def get_team_staff(cls, data):
|
|
html = data.decode(cls.CHARSET)
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
select = soup.find('select', id='spieler_select_breadcrumb')
|
|
staff = dict()
|
|
for optgroup in select.find_all('optgroup'):
|
|
players = list()
|
|
for option in optgroup.find_all('option'):
|
|
player_split = option.text.strip().split(' ')
|
|
player_name = ' '.join(player_split[1:])
|
|
players.append(player_name)
|
|
if optgroup.attrs['label'] == 'Gardien':
|
|
staff['goalkeepers'] = players
|
|
elif optgroup.attrs['label'] == 'Défense':
|
|
staff['defenders'] = players
|
|
elif optgroup.attrs['label'] == 'Milieu de terrain':
|
|
staff['midfielders'] = players
|
|
else:
|
|
staff['attackers'] = players
|
|
return staff
|
|
|
|
@classmethod
|
|
def get_team_players(cls, data, team, countries):
|
|
locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8')
|
|
html = data.decode(cls.CHARSET)
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
table = soup.find('div', id='yw1')
|
|
if table is not None:
|
|
for tr in table.find_all('tr'):
|
|
tds = tr.find_all('td', recursive=False)
|
|
if len(tds) == 10:
|
|
|
|
player = Player(team=team, error=list())
|
|
|
|
number = tds[0].text.strip()
|
|
if number.isnumeric():
|
|
player.number = int(number)
|
|
elif number != '-':
|
|
player.error.append("bad format number '{}'".format(number))
|
|
|
|
role = tds[0].attrs.get('title')
|
|
if role in cls.ROLES:
|
|
player.role = cls.ROLES[role]
|
|
else:
|
|
player.error.append("bad format role '{}'".format(role))
|
|
|
|
birth_date = tds[2].text.split('(')[0].strip().replace('avr.', 'avril')
|
|
try:
|
|
player.set_age(datetime.strptime(birth_date, '%d %b %Y'))
|
|
except ValueError:
|
|
player.error.append("bad format birth_date '{}'".format(birth_date))
|
|
|
|
imgs = tds[3].find_all('img')
|
|
if len(imgs) > 0:
|
|
for country in countries:
|
|
if country.names['transfermarkt'] == imgs[0].attrs.get('alt'):
|
|
player.country1 = country
|
|
break
|
|
else:
|
|
player.error.append("unknown country1 '{}'".format(imgs[0].attrs.get('title')))
|
|
if len(imgs) > 1:
|
|
for country in countries:
|
|
if country.names['transfermarkt'] == imgs[1].attrs.get('alt'):
|
|
player.country2 = country
|
|
break
|
|
else:
|
|
player.error.append("unknown country2 '{}'".format(imgs[1].attrs.get('title')))
|
|
else:
|
|
player.error.append("no country found")
|
|
|
|
size = tds[4].text.split('m')[0].strip().replace(',', '')
|
|
if size.isnumeric():
|
|
player.size = int(size)
|
|
elif size:
|
|
player.error.append("bad format size '{}'".format(size))
|
|
|
|
foot = tds[5].text.strip()
|
|
if foot in cls.FEET:
|
|
player.foot = cls.FEET[foot]
|
|
elif foot != '-':
|
|
player.error.append("bad format foot '{}'".format(foot))
|
|
|
|
contract_date = tds[8].text.strip().replace('avr.', 'avril')
|
|
if contract_date != '-':
|
|
try:
|
|
player.contract_end = datetime.strptime(contract_date, '%d.%m.%Y')
|
|
except ValueError:
|
|
player.error.append("bad format contract_end '{}'".format(contract_date))
|
|
|
|
price = tds[9].text.strip()
|
|
if price.endswith('mio. €'):
|
|
try:
|
|
player.set_price(int(float(price.split(' ')[0].replace(',', '.')) * 1e6))
|
|
except ValueError:
|
|
player.error.append("price '{}' bad format".format(price))
|
|
elif price.endswith('K €'):
|
|
try:
|
|
player.set_price(int(float(price.split(' ')[0].replace(',', '.')) * 1e3))
|
|
except ValueError:
|
|
player.error.append("price '{}' bad format".format(price))
|
|
elif price != '-':
|
|
player.error.append("bad format price '{}'".format(price))
|
|
|
|
name_trs = tds[1].find_all('tr')
|
|
if len(name_trs) > 0:
|
|
span_name = name_trs[0].find('span', class_='hide-for-small')
|
|
if span_name is not None:
|
|
player.set_names(span_name.text.strip())
|
|
else:
|
|
player.error.append("span containing full name not found")
|
|
|
|
image = name_trs[0].find('img', class_='bilderrahmen-fixed')
|
|
if image is not None:
|
|
image_url = image.attrs.get('src')
|
|
player.set_image(image_url)
|
|
player.image.set_lm(urlsplit(image_url).query.replace('lm=', ''))
|
|
else:
|
|
player.error.append('no image found')
|
|
|
|
if len(name_trs) > 1:
|
|
player.position = name_trs[1].text.strip()
|
|
else:
|
|
player.error.append("tr containing position not found")
|
|
else:
|
|
player.error.append("tr containing full name not found")
|
|
|
|
if not player.error:
|
|
player.error = None
|
|
yield player
|