from urllib.parse import urlsplit from bs4 import BeautifulSoup from datetime import datetime import locale from providers.base import BaseProvider from lib.player import Player class TransferMarkt(BaseProvider): DOMAINS = {'www.transfermarkt.fr'} CHARSET = 'UTF-8' ROLES = { 'Gardien': Player.ROLE_GOALKEEPER, 'Défense': Player.ROLE_DEFENDER, 'Milieu de terrain': Player.ROLE_MIDFIELDER, 'Attaquant': Player.ROLE_ATTACKER } FEET = { 'droit': Player.FOOT_RIGHT, 'gauche': Player.FOOT_LEFT, 'des deux pieds': Player.FOOT_BOTH } @classmethod def get_team_staff(cls, data): html = data.decode(cls.CHARSET) soup = BeautifulSoup(html, 'html.parser') select = soup.find('select', id='spieler_select_breadcrumb') staff = dict() for optgroup in select.find_all('optgroup'): players = list() for option in optgroup.find_all('option'): player_split = option.text.strip().split(' ') player_name = ' '.join(player_split[1:]) players.append(player_name) if optgroup.attrs['label'] == 'Gardien': staff['goalkeepers'] = players elif optgroup.attrs['label'] == 'Défense': staff['defenders'] = players elif optgroup.attrs['label'] == 'Milieu de terrain': staff['midfielders'] = players else: staff['attackers'] = players return staff @classmethod def get_team_players(cls, data, team, countries): locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8') html = data.decode(cls.CHARSET) soup = BeautifulSoup(html, 'html.parser') table = soup.find('div', id='yw1') if table is not None: for tr in table.find_all('tr'): tds = tr.find_all('td', recursive=False) if len(tds) == 10: player = Player(team=team, error=list()) number = tds[0].text.strip() if number.isnumeric(): player.number = int(number) elif number != '-': player.error.append("bad format number '{}'".format(number)) role = tds[0].attrs.get('title') if role in cls.ROLES: player.role = cls.ROLES[role] else: player.error.append("bad format role '{}'".format(role)) birth_date = tds[2].text.split('(')[0].strip().replace('avr.', 'avril') try: player.set_age(datetime.strptime(birth_date, '%d %b %Y')) except ValueError: player.error.append("bad format birth_date '{}'".format(birth_date)) imgs = tds[3].find_all('img') if len(imgs) > 0: for country in countries: if country.names['transfermarkt'] == imgs[0].attrs.get('alt'): player.country1 = country break else: player.error.append("unknown country1 '{}'".format(imgs[0].attrs.get('title'))) if len(imgs) > 1: for country in countries: if country.names['transfermarkt'] == imgs[1].attrs.get('alt'): player.country2 = country break else: player.error.append("unknown country2 '{}'".format(imgs[1].attrs.get('title'))) else: player.error.append("no country found") size = tds[4].text.split('m')[0].strip().replace(',', '') if size.isnumeric(): player.size = int(size) elif size: player.error.append("bad format size '{}'".format(size)) foot = tds[5].text.strip() if foot in cls.FEET: player.foot = cls.FEET[foot] elif foot != '-': player.error.append("bad format foot '{}'".format(foot)) contract_date = tds[8].text.strip().replace('avr.', 'avril') if contract_date != '-': try: player.contract_end = datetime.strptime(contract_date, '%d.%m.%Y') except ValueError: player.error.append("bad format contract_end '{}'".format(contract_date)) price = tds[9].text.strip() if price.endswith('mio. €'): try: player.set_price(int(float(price.split(' ')[0].replace(',', '.')) * 1e6)) except ValueError: player.error.append("price '{}' bad format".format(price)) elif price.endswith('K €'): try: player.set_price(int(float(price.split(' ')[0].replace(',', '.')) * 1e3)) except ValueError: player.error.append("price '{}' bad format".format(price)) elif price != '-': player.error.append("bad format price '{}'".format(price)) name_trs = tds[1].find_all('tr') if len(name_trs) > 0: span_name = name_trs[0].find('span', class_='hide-for-small') if span_name is not None: player.set_names(span_name.text.strip()) else: player.error.append("span containing full name not found") image = name_trs[0].find('img', class_='bilderrahmen-fixed') if image is not None: image_url = image.attrs.get('src') player.set_image(image_url) player.image.set_lm(urlsplit(image_url).query.replace('lm=', '')) else: player.error.append('no image found') if len(name_trs) > 1: player.position = name_trs[1].text.strip() else: player.error.append("tr containing position not found") else: player.error.append("tr containing full name not found") if not player.error: player.error = None yield player