from datetime import datetime, timedelta from urllib.parse import urljoin import locale # noinspection PyProtectedMember from bs4 import BeautifulSoup, NavigableString from lib.match import Match, Event, Squad, Stat, Comm from providers.base import BaseProvider from lib.country import Country from lib.league import Group from lib.team import Team class Matchendirect(BaseProvider): DOMAINS = {'www.matchendirect.fr'} CHARSET = 'utf-8' ROLES = [ [ [None, 'DLG', 'DLG', 'MDG', 'MG', 'MOG', 'ALG'], [None, 'DCG', None, 'MDG', 'MCG', 'MOG', 'AG'], ['G', 'DC', None, 'MDC', 'MC', 'MOC', 'AC'], [None, 'DCD', None, 'MDD', 'MCD', 'MOD', 'AD'], [None, 'DLD', 'DLD', 'MDD', 'MD', 'MOD', 'ALD'] ], [ ['ALD', 'MOD', 'MD', 'MDD', 'DLD', 'DLD', None], ['AD', 'MOD', 'MCD', 'MDD', None, 'DCD', None], ['AC', 'MOC', 'MC', 'MDC', None, 'DC', 'G'], ['AG', 'MOG', 'MCG', 'MDG', None, 'DCG', None], ['ALG', 'MOG', 'MG', 'MDG', 'DLG', 'DLG', None] ] ] MONTH_NUMBERS = { 'janvier': '01', 'février': '02', 'mars': '03', 'avril': '04', 'mai': '05', 'juin': '06', 'juillet': '07', 'août': '08', 'septembre': '09', 'octobre': '10', 'novembre': '11', 'décembre': '12' } EVENT_TYPES = { 'ico_evenement1': ('goal', None), 'ico_evenement2': ('goal', 'P'), 'ico_evenement3': ('red-card', None), 'ico_evenement4': ('yellow-card', None), 'ico_evenement5': ('yellow-red-card', None), 'ico_evenement7': ('goal', 'CSC'), 'ico_evenement81': ('switch-out', None), 'ico_evenement82': ('switch-out', None), 'ico_evenement91': ('switch-in', None), 'ico_evenement92': ('switch-in', None) } STAT_NAMES = { 'Possession': 'possession', 'Buts': 'goals', 'Tirs': 'attempts', 'Corners': 'corners', 'Hors-jeu': 'offsides', 'Fautes': 'fouls', 'Carton jaune': 'yellow_cards', 'Carton rouge': 'red_cards' } COMM_TYPES = { 'ico_com_occasion': 'chance', 'ico_com_but': 'goal', 'ico_com_carton-jaune': 'yellow-card', 'ico_com_remplacement': 'switch', 'ico_com_sifflet': 'whistle', 'ico_com_carton-rouge': 'red-card' } @classmethod def get_match_info(cls, match, data): html = data.decode(cls.CHARSET) soup = BeautifulSoup(html, 'html.parser') div_match = soup.find(id='ajax-match-detail-1') if div_match is None: raise NameError('div ajax-match-detail-1 not found') # check team names div_teams = div_match.find_all('div', class_='team') if len(div_teams) < 2: raise NameError('divs team not found') if div_teams[0].a.text.strip() == match.away.names['matchendirect'] \ and div_teams[1].a.text.strip() == match.home.names['matchendirect']: raise NameError('team names are inverted') if div_teams[0].a.text.strip() != match.home.names['matchendirect']: raise NameError('home name does not match') if div_teams[1].a.text.strip() != match.away.names['matchendirect']: raise NameError('away name does not match') # check start_date div_info = div_match.find('div', class_='info1') if div_info is None: raise NameError('div info1 is not found') content_date = div_info.contents[0] exp_date = content_date.text.split(' ') day = exp_date[1] month = cls.MONTH_NUMBERS[exp_date[2]] year = exp_date[3] content_time = div_info.contents[1] exp_time = content_time.strip(' à').split('h') if len(exp_time) == 2: hour = exp_time[0] minute = exp_time[1] else: hour = 0 minute = 0 match.start_date = datetime.strptime( '{}-{}-{} {}:{}:00'.format(year, month, day, hour, minute), '%Y-%m-%d %H:%M:%S' ) # get shootout table_shootout = soup.find('table', id='match_evenement_score') if table_shootout is not None: for tr in table_shootout.find_all('tr'): tds = tr.find_all('td') if len(tds) == 3: if tds[0].text.strip() == 'Score après prolongation': match.extra_time = 'extratime' if tds[0].text.strip() == 'Tirs au but': exp_shootout = tds[1].text.split(' - ') match.extra_time = 'shootout' match.shootout_home = int(exp_shootout[0]) match.shootout_away = int(exp_shootout[1]) # get score span_scores = div_match.find_all('span', class_='score') if len(span_scores) < 2: raise NameError('spans score not found') if span_scores[0].text.strip().isnumeric(): match.score_home = int(span_scores[0].text.strip()) if span_scores[1].text.strip().isnumeric(): match.score_away = int(span_scores[1].text.strip()) # get minute div_status = div_match.find('div', class_='status') if div_status is None: raise NameError('div status not found') content_minute = div_status.contents[-1] if isinstance(content_minute, NavigableString): match.minute = content_minute.strip().lower() else: match.minute = content_minute.text.strip().lower() # get events table_events = soup.find('table', id='match_evenement') if table_events is not None: nb_goals = {'home': 0, 'away': 0} for span_event in table_events.find_all('span'): if span_event['class'][2] in cls.EVENT_TYPES: type_, particularity = cls.EVENT_TYPES[span_event['class'][2]] event = Event(type_=type_) td_event = span_event.parent if td_event['class'][0] == 'c1': event.side = 'home' else: event.side = 'away' event.player = td_event.find('a').text.strip() if particularity is not None: event.player += ' ({})'.format(particularity) event.minute = td_event.parent.find('td', class_='c2').text if event.type == 'goal': nb_goals[event.side] += 1 if nb_goals[event.side] > getattr(match, 'score_' + event.side): continue match.events.append(event) # get squad div_squad = soup.find('div', class_='MEDpanelcomposition') if div_squad is not None: td_squads = div_squad.find_all('td') if len(td_squads) > 1: for span_squad in td_squads[0].find_all('span'): squad_name = span_squad.previous_sibling.previous_sibling.text.strip() if \ span_squad.previous_sibling.previous_sibling is not None else span_squad.previous_sibling if 'ico_compo_titulaire' in span_squad.attrs['class']: role = 'STR' else: role = 'SUB' squad = Squad(role=role, name=squad_name, side='home') for event in match.events: if event.player.replace(' (P)', '') == squad.name: squad.events.append(event.type) match.squad.append(squad) for span_squad in td_squads[1].find_all('span'): squad_name = span_squad.next_sibling.next_sibling.text.strip() if \ span_squad.next_sibling.next_sibling is not None else span_squad.next_sibling if 'ico_compo_titulaire' in span_squad.attrs['class']: role = 'STR' else: role = 'SUB' squad = Squad(role=role, name=squad_name, side='away') for event in match.events: if event.player.replace(' (P)', '') == squad.name: squad.events.append(event.type) match.squad.append(squad) # get squad roles table_squad = soup.find('table', id='schema_compo') sides = [None, None] if table_squad is not None: tables = table_squad.find_all('table') for id_table in range(len(tables)): table = tables[id_table] tds = table.find_all('td') for id_td in range(len(tds)): td = tds[id_td].find('b') if td is not None and td.text: for id_squad in range(len(match.squad)): squad = match.squad[id_squad] if squad.role in ('SUB', 'STR') and all([name in squad.name for name in td.text.split()]): if squad.side not in sides: sides[id_table // 5] = squad.side sides[1 - id_table // 5] = 'home' if squad.side == 'away' else 'away' if squad.side == sides[id_table // 5]: squad.lastname = td.text.strip() squad.role = cls.ROLES[id_table // 5][id_table % 5][id_td] break # re-order squad lines for side in ('home', 'away'): if len([squad for squad in match.squad if squad.role in ('DCG', 'DC', 'DCD') and squad.side == side]) > 2: for ids in range(len(match.squad)): squad = match.squad[ids] squad.role = 'DG' if squad.side == side and squad.role == 'DCG' else squad.role squad.role = 'DD' if squad.side == side and squad.role == 'DCD' else squad.role if len([squad for squad in match.squad if squad.role.startswith('D') and squad.side == side]) < 4: for ids in range(len(match.squad)): squad = match.squad[ids] squad.role = 'DLG' if squad.side == side and squad.role == 'MG' else squad.role squad.role = 'DLD' if squad.side == side and squad.role == 'MD' else squad.role if len([squad for squad in match.squad if squad.role in ('MDG', 'MDC', 'MDD') and squad.side == side]) > 2: for ids in range(len(match.squad)): squad = match.squad[ids] squad.role = 'MG' if squad.side == side and squad.role == 'MDG' else squad.role squad.role = 'MD' if squad.side == side and squad.role == 'MDD' else squad.role if len([squad for squad in match.squad if squad.role in ('MCG', 'MC', 'MCD') and squad.side == side]) > 2: if len([sq for sq in match.squad if sq.role in ('MDG', 'MDC', 'MDD') and sq.side == side]) == 0: for ids in range(len(match.squad)): squad = match.squad[ids] squad.role = 'MDC' if squad.side == side and squad.role == 'MC' else squad.role else: for ids in range(len(match.squad)): squad = match.squad[ids] squad.role = 'MG' if squad.side == side and squad.role == 'MCG' else squad.role squad.role = 'MD' if squad.side == side and squad.role == 'MCD' else squad.role if len([sq for sq in match.squad if sq.role in ('MG', 'MCG', 'MC', 'MCD', 'MD') and sq.side == side]) > 3: if len([sq for sq in match.squad if sq.role in ('MDG', 'MDC', 'MDD') and sq.side == side]) == 0: for ids in range(len(match.squad)): squad = match.squad[ids] squad.role = 'MDC' if squad.side == side and squad.role == 'MC' else squad.role squad.role = 'MDG' if squad.side == side and squad.role == 'MCG' else squad.role squad.role = 'MDD' if squad.side == side and squad.role == 'MCD' else squad.role else: for ids in range(len(match.squad)): squad = match.squad[ids] squad.role = 'MOC' if squad.side == side and squad.role == 'MC' else squad.role squad.role = 'MOG' if squad.side == side and squad.role == 'MG' else squad.role squad.role = 'MOD' if squad.side == side and squad.role == 'MD' else squad.role if len([squad for squad in match.squad if squad.role in ('AG', 'AC', 'AD') and squad.side == side]) > 2: for id_squad in range(len(match.squad)): squad = match.squad[id_squad] squad.role = 'ALG' if squad.side == side and squad.role == 'AG' else squad.role squad.role = 'ALD' if squad.side == side and squad.role == 'AD' else squad.role # get stats div_stats = soup.find('div', class_='MEDpanelstats') if div_stats is not None: match.stats = {name: Stat() for name in cls.STAT_NAMES.values()} for tr in div_stats.find_all('tr'): tds = tr.find_all('td') stat_name = tds[2].text.strip() stat = Stat(home=int(tds[0].text.strip()), away=int(tds[4].text.strip())) if stat_name in cls.STAT_NAMES: match.stats[cls.STAT_NAMES[stat_name]] = stat elif stat_name == 'Tirs cadrés': match.stats['attempts'].home += stat.home match.stats['attempts'].away += stat.away match.stats['in_attempts'] = Stat(home=stat.home, away=stat.away) elif stat_name == 'Tirs non cadrés': match.stats['attempts'].home += stat.home match.stats['attempts'].away += stat.away elif stat_name == 'Tirs arrêtés': match.stats['block_attempts'] = stat match.stats['attempts'].home += stat.home match.stats['attempts'].away += stat.away elif stat_name == 'Tirs sur le poteau': match.stats['pole_attempts'] = stat match.stats['attempts'].home += stat.home match.stats['attempts'].away += stat.away # get live comments table_comms = soup.find('table', id='commentaire') if table_comms is not None: for tr in table_comms.find_all('tr'): tds = tr.find_all('td') if len(tds) == 3: span_icon = tds[1].find('span') minute = tds[0].text.strip().replace('+', "'+") if minute and not minute.endswith("'") and '+' not in minute: minute += "'" comm_type = '' if span_icon is not None: icon_class = span_icon.attrs['class'][1] if icon_class in cls.COMM_TYPES: comm_type = cls.COMM_TYPES[icon_class] else: continue match.comms.append(Comm(minute=minute, type_=comm_type, text=tds[2].text.strip())) @classmethod def get_league_ranking(cls, league, data): html = data.decode(cls.CHARSET) soup = BeautifulSoup(html, 'html.parser') table = soup.find('table', id='tableau_classement') groups = list() group = Group(name='0', url=None, league=league) for tr in table.find_all('tr'): tds = tr.find_all('td') # row titles if len(tds) == 0: continue # row group if len(tds) == 1: group_name = tds[0].text.strip() for grp in groups: if grp.name == group_name: group = grp break else: group = Group(name=group_name, url=None, league=league) groups.append(group) continue # get team med_name = tds[0].find('a').contents[-1].strip() for tm in league.teams: if 'matchendirect' in tm.names and tm.names['matchendirect'] == med_name: team = tm break else: continue # get rank th = tr.find('th') span = th.find('span') if span is not None: team.rank = int(span.text.strip()) else: team.rank = int(th.text.strip()) # get stats team.group = group team.points = int(tds[1].text.strip()) team.played = int(tds[2].text.strip()) team.wins = int(tds[3].text.strip()) team.ties = int(tds[4].text.strip()) team.loss = int(tds[5].text.strip()) team.g_for = int(tds[6].text.strip()) team.g_against = int(tds[7].text.strip()) team.g_diff = int(tds[8].text.strip()) return groups @classmethod def get_schedule_url(cls, match): # In matchendirect.fr dates are shifted in 2019 shift_date = match.start_date + timedelta(days=7) return '{}/{}'.format(match.league.url.rstrip('/'), shift_date.strftime('%Y-%W')) @classmethod def get_schedule(cls, scheduler, data): locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8') html = data.decode() soup = BeautifulSoup(html, 'html.parser') date = None for table in soup.find_all('table', class_='table table-striped table-hover'): for tr in table.find_all('tr'): th = tr.find('th') if th is not None: date = datetime.strptime(th.text.strip(), '%A %d %B %Y') elif 'data-matchid' in tr.attrs and date is not None: td_hour = tr.find('td', class_='lm1') hours, minutes = td_hour.text.strip().split(':') if hours.isnumeric() and minutes.isnumeric(): start_date = date + timedelta(hours=int(hours), minutes=int(minutes)) else: start_date = date td_score = tr.find('td', class_='lm3') home = td_score.find('span', class_='lm3_eq1').contents[0].strip(' \n\t*') away = td_score.find('span', class_='lm3_eq2').contents[-1].strip(' \n\t*') url = urljoin('http://www.matchendirect.fr/', td_score.find('a').attrs['href']) for match in scheduler.matches: if match.home.names['matchendirect'] == home and match.away.names['matchendirect'] == away: match.new_url = url match.new_start_date = start_date match.task_done = True break a_previous = soup.find('a', class_='objselect_prevnext objselect_prec') if a_previous is not None: scheduler.previous_url = urljoin('http://www.matchendirect.fr/', a_previous.attrs['href']) a_next = soup.find('a', class_='objselect_prevnext objselect_suiv') if a_next is not None: scheduler.next_url = urljoin('http://www.matchendirect.fr/', a_next.attrs['href']) @classmethod def create_schedule(cls, league, data): html = data.decode(cls.CHARSET) soup = BeautifulSoup(html, 'html.parser') div_top = soup.find('div', id='filtre_haut') select_url = div_top.find('select') if select_url is not None: selected = False for option_url in select_url.find_all('option'): if selected or 'selected' in option_url.attrs: yield urljoin(league.url, option_url.attrs['value']) selected = True elif 'selected' in option_url.attrs: selected = True @classmethod def _current_mday_round_leg(cls, league, date): _mday = 0 _round = None _leg = 0 if league.round_dates is not None: for key, value in league.round_dates.items(): if date >= datetime.strptime(key, '%Y-%m-%d'): _mday = value['mday'] _round = value['round'] _leg = value['leg'] return _mday, _round, _leg @classmethod def create_schedule_from_url(cls, league, data): locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8') html = data.decode(cls.CHARSET) soup = BeautifulSoup(html, 'html.parser') date = None current_mday = 0 current_round = None current_leg = 0 idof10 = 0 for table in soup.find_all('table', class_='table table-striped table-hover'): for tr in table.find_all('tr'): th = tr.find('th') if th is not None: date = datetime.strptime(th.text.strip(), '%A %d %B %Y') _mday, _round, _leg = cls._current_mday_round_leg(league, date) if _mday != current_mday or _round != current_round or _leg != current_leg: current_mday, current_round, current_leg = _mday, _round, _leg idof10 = 0 elif 'data-matchid' in tr.attrs and date is not None: match = Match(idt=0) match.idof10 = idof10 match.league = league match.mday = current_mday match.round = current_round match.leg = current_leg td_hour = tr.find('td', class_='lm1') hours, minutes = td_hour.text.strip().split(':') if hours.isnumeric() and minutes.isnumeric(): match.start_date = date + timedelta(hours=int(hours), minutes=int(minutes)) else: match.start_date = date td_score = tr.find('td', class_='lm3') match.url = urljoin('http://www.matchendirect.fr/', td_score.find('a').attrs['href']) home_name = td_score.find('span', class_='lm3_eq1').contents[0].strip(' \n\t*') match.home = Team(idt=0) match.home.league = league match.home.name = home_name + ' F' if league.gender == 'F' else home_name match.home.short_name = match.home.name[:3].upper() match.home.long_name = match.home.name match.home.names = {cls.__name__.lower(): home_name} match.home.id_sport = league.sport.id match.home.country = Country(idt=league.country.id) match.home.gender = league.gender match.home.images = {'png': 'default-team.png', '50': 'h50-default-team.svg', '30': 'h30-default-team.svg', '80': 'h80-default-team.svg'} away_name = td_score.find('span', class_='lm3_eq2').contents[-1].strip(' \n\t*') match.away = Team(idt=0) match.away.league = league match.away.name = away_name + ' F' if league.gender == 'F' else away_name match.away.short_name = match.away.name[:3].upper() match.away.long_name = match.away.name match.away.names = {cls.__name__.lower(): away_name} match.away.id_sport = league.sport.id match.away.country = Country(idt=league.country.id) match.away.gender = league.gender match.away.images = {'png': 'default-team.png', '50': 'h50-default-team.svg', '30': 'h30-default-team.svg', '80': 'h80-default-team.svg'} idof10 += 1 yield match