515 lines
24 KiB
Python
515 lines
24 KiB
Python
from datetime import datetime, timedelta
|
|
from urllib.parse import urljoin
|
|
import locale
|
|
|
|
# noinspection PyProtectedMember
|
|
from bs4 import BeautifulSoup, NavigableString
|
|
|
|
from lib.match import Match, Event, Squad, Stat, Comm
|
|
from providers.base import BaseProvider
|
|
from lib.country import Country
|
|
from lib.league import Group
|
|
from lib.team import Team
|
|
|
|
|
|
class Matchendirect(BaseProvider):
|
|
|
|
DOMAINS = {'www.matchendirect.fr'}
|
|
CHARSET = 'utf-8'
|
|
ROLES = [
|
|
[
|
|
[None, 'DLG', 'DLG', 'MDG', 'MG', 'MOG', 'ALG'],
|
|
[None, 'DCG', None, 'MDG', 'MCG', 'MOG', 'AG'],
|
|
['G', 'DC', None, 'MDC', 'MC', 'MOC', 'AC'],
|
|
[None, 'DCD', None, 'MDD', 'MCD', 'MOD', 'AD'],
|
|
[None, 'DLD', 'DLD', 'MDD', 'MD', 'MOD', 'ALD']
|
|
],
|
|
[
|
|
['ALD', 'MOD', 'MD', 'MDD', 'DLD', 'DLD', None],
|
|
['AD', 'MOD', 'MCD', 'MDD', None, 'DCD', None],
|
|
['AC', 'MOC', 'MC', 'MDC', None, 'DC', 'G'],
|
|
['AG', 'MOG', 'MCG', 'MDG', None, 'DCG', None],
|
|
['ALG', 'MOG', 'MG', 'MDG', 'DLG', 'DLG', None]
|
|
]
|
|
]
|
|
MONTH_NUMBERS = {
|
|
'janvier': '01',
|
|
'février': '02',
|
|
'mars': '03',
|
|
'avril': '04',
|
|
'mai': '05',
|
|
'juin': '06',
|
|
'juillet': '07',
|
|
'août': '08',
|
|
'septembre': '09',
|
|
'octobre': '10',
|
|
'novembre': '11',
|
|
'décembre': '12'
|
|
}
|
|
EVENT_TYPES = {
|
|
'ico_evenement1': ('goal', None),
|
|
'ico_evenement2': ('goal', 'P'),
|
|
'ico_evenement3': ('red-card', None),
|
|
'ico_evenement4': ('yellow-card', None),
|
|
'ico_evenement5': ('yellow-red-card', None),
|
|
'ico_evenement7': ('goal', 'CSC'),
|
|
'ico_evenement81': ('switch-out', None),
|
|
'ico_evenement82': ('switch-out', None),
|
|
'ico_evenement91': ('switch-in', None),
|
|
'ico_evenement92': ('switch-in', None)
|
|
}
|
|
STAT_NAMES = {
|
|
'Possession': 'possession',
|
|
'Buts': 'goals',
|
|
'Tirs': 'attempts',
|
|
'Corners': 'corners',
|
|
'Hors-jeu': 'offsides',
|
|
'Fautes': 'fouls',
|
|
'Carton jaune': 'yellow_cards',
|
|
'Carton rouge': 'red_cards'
|
|
}
|
|
COMM_TYPES = {
|
|
'ico_com_occasion': 'chance',
|
|
'ico_com_but': 'goal',
|
|
'ico_com_carton-jaune': 'yellow-card',
|
|
'ico_com_remplacement': 'switch',
|
|
'ico_com_sifflet': 'whistle',
|
|
'ico_com_carton-rouge': 'red-card'
|
|
}
|
|
|
|
@classmethod
|
|
def get_match_info(cls, match, data):
|
|
html = data.decode(cls.CHARSET)
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
div_match = soup.find(id='ajax-match-detail-1')
|
|
if div_match is None:
|
|
raise NameError('div ajax-match-detail-1 not found')
|
|
|
|
# check team names
|
|
div_teams = div_match.find_all('div', class_='team')
|
|
if len(div_teams) < 2:
|
|
raise NameError('divs team not found')
|
|
if div_teams[0].a.text.strip() == match.away.names['matchendirect'] \
|
|
and div_teams[1].a.text.strip() == match.home.names['matchendirect']:
|
|
raise NameError('team names are inverted')
|
|
if div_teams[0].a.text.strip() != match.home.names['matchendirect']:
|
|
raise NameError('home name does not match')
|
|
if div_teams[1].a.text.strip() != match.away.names['matchendirect']:
|
|
raise NameError('away name does not match')
|
|
|
|
# check start_date
|
|
div_info = div_match.find('div', class_='info1')
|
|
if div_info is None:
|
|
raise NameError('div info1 is not found')
|
|
content_date = div_info.contents[0]
|
|
exp_date = content_date.text.split(' ')
|
|
day = exp_date[1]
|
|
month = cls.MONTH_NUMBERS[exp_date[2]]
|
|
year = exp_date[3]
|
|
content_time = div_info.contents[1]
|
|
exp_time = content_time.strip(' à').split('h')
|
|
if len(exp_time) == 2:
|
|
hour = exp_time[0]
|
|
minute = exp_time[1]
|
|
else:
|
|
hour = 0
|
|
minute = 0
|
|
match.start_date = datetime.strptime(
|
|
'{}-{}-{} {}:{}:00'.format(year, month, day, hour, minute), '%Y-%m-%d %H:%M:%S'
|
|
)
|
|
|
|
# get shootout
|
|
table_shootout = soup.find('table', id='match_evenement_score')
|
|
if table_shootout is not None:
|
|
for tr in table_shootout.find_all('tr'):
|
|
tds = tr.find_all('td')
|
|
if len(tds) == 3:
|
|
if tds[0].text.strip() == 'Score après prolongation':
|
|
match.extra_time = 'extratime'
|
|
if tds[0].text.strip() == 'Tirs au but':
|
|
exp_shootout = tds[1].text.split(' - ')
|
|
match.extra_time = 'shootout'
|
|
match.shootout_home = int(exp_shootout[0])
|
|
match.shootout_away = int(exp_shootout[1])
|
|
|
|
# get score
|
|
span_scores = div_match.find_all('span', class_='score')
|
|
if len(span_scores) < 2:
|
|
raise NameError('spans score not found')
|
|
if span_scores[0].text.strip().isnumeric():
|
|
match.score_home = int(span_scores[0].text.strip())
|
|
if span_scores[1].text.strip().isnumeric():
|
|
match.score_away = int(span_scores[1].text.strip())
|
|
|
|
# get minute
|
|
div_status = div_match.find('div', class_='status')
|
|
if div_status is None:
|
|
raise NameError('div status not found')
|
|
content_minute = div_status.contents[-1]
|
|
if isinstance(content_minute, NavigableString):
|
|
match.minute = content_minute.strip().lower()
|
|
else:
|
|
match.minute = content_minute.text.strip().lower()
|
|
|
|
# get events
|
|
table_events = soup.find('table', id='match_evenement')
|
|
if table_events is not None:
|
|
nb_goals = {'home': 0, 'away': 0}
|
|
for span_event in table_events.find_all('span'):
|
|
if span_event['class'][2] in cls.EVENT_TYPES:
|
|
type_, particularity = cls.EVENT_TYPES[span_event['class'][2]]
|
|
event = Event(type_=type_)
|
|
td_event = span_event.parent
|
|
if td_event['class'][0] == 'c1':
|
|
event.side = 'home'
|
|
else:
|
|
event.side = 'away'
|
|
event.player = td_event.find('a').text.strip()
|
|
if particularity is not None:
|
|
event.player += ' ({})'.format(particularity)
|
|
event.minute = td_event.parent.find('td', class_='c2').text
|
|
if event.type == 'goal':
|
|
nb_goals[event.side] += 1
|
|
if nb_goals[event.side] > getattr(match, 'score_' + event.side):
|
|
continue
|
|
match.events.append(event)
|
|
|
|
# get squad
|
|
div_squad = soup.find('div', class_='MEDpanelcomposition')
|
|
if div_squad is not None:
|
|
td_squads = div_squad.find_all('td')
|
|
if len(td_squads) > 1:
|
|
for span_squad in td_squads[0].find_all('span'):
|
|
squad_name = span_squad.previous_sibling.previous_sibling.text.strip() if \
|
|
span_squad.previous_sibling.previous_sibling is not None else span_squad.previous_sibling
|
|
if 'ico_compo_titulaire' in span_squad.attrs['class']:
|
|
role = 'STR'
|
|
else:
|
|
role = 'SUB'
|
|
squad = Squad(role=role, name=squad_name, side='home')
|
|
for event in match.events:
|
|
if event.player.replace(' (P)', '') == squad.name:
|
|
squad.events.append(event.type)
|
|
match.squad.append(squad)
|
|
for span_squad in td_squads[1].find_all('span'):
|
|
squad_name = span_squad.next_sibling.next_sibling.text.strip() if \
|
|
span_squad.next_sibling.next_sibling is not None else span_squad.next_sibling
|
|
if 'ico_compo_titulaire' in span_squad.attrs['class']:
|
|
role = 'STR'
|
|
else:
|
|
role = 'SUB'
|
|
squad = Squad(role=role, name=squad_name, side='away')
|
|
for event in match.events:
|
|
if event.player.replace(' (P)', '') == squad.name:
|
|
squad.events.append(event.type)
|
|
match.squad.append(squad)
|
|
|
|
# get squad roles
|
|
table_squad = soup.find('table', id='schema_compo')
|
|
sides = [None, None]
|
|
if table_squad is not None:
|
|
tables = table_squad.find_all('table')
|
|
for id_table in range(len(tables)):
|
|
table = tables[id_table]
|
|
tds = table.find_all('td')
|
|
for id_td in range(len(tds)):
|
|
td = tds[id_td].find('b')
|
|
if td is not None and td.text:
|
|
for id_squad in range(len(match.squad)):
|
|
squad = match.squad[id_squad]
|
|
if squad.role in ('SUB', 'STR') and all([name in squad.name for name in td.text.split()]):
|
|
if squad.side not in sides:
|
|
sides[id_table // 5] = squad.side
|
|
sides[1 - id_table // 5] = 'home' if squad.side == 'away' else 'away'
|
|
if squad.side == sides[id_table // 5]:
|
|
squad.lastname = td.text.strip()
|
|
squad.role = cls.ROLES[id_table // 5][id_table % 5][id_td]
|
|
break
|
|
|
|
# re-order squad lines
|
|
for side in ('home', 'away'):
|
|
if len([squad for squad in match.squad if squad.role in ('DCG', 'DC', 'DCD') and squad.side == side]) > 2:
|
|
for ids in range(len(match.squad)):
|
|
squad = match.squad[ids]
|
|
squad.role = 'DG' if squad.side == side and squad.role == 'DCG' else squad.role
|
|
squad.role = 'DD' if squad.side == side and squad.role == 'DCD' else squad.role
|
|
|
|
if len([squad for squad in match.squad if squad.role.startswith('D') and squad.side == side]) < 4:
|
|
for ids in range(len(match.squad)):
|
|
squad = match.squad[ids]
|
|
squad.role = 'DLG' if squad.side == side and squad.role == 'MG' else squad.role
|
|
squad.role = 'DLD' if squad.side == side and squad.role == 'MD' else squad.role
|
|
|
|
if len([squad for squad in match.squad if squad.role in ('MDG', 'MDC', 'MDD') and squad.side == side]) > 2:
|
|
for ids in range(len(match.squad)):
|
|
squad = match.squad[ids]
|
|
squad.role = 'MG' if squad.side == side and squad.role == 'MDG' else squad.role
|
|
squad.role = 'MD' if squad.side == side and squad.role == 'MDD' else squad.role
|
|
|
|
if len([squad for squad in match.squad if squad.role in ('MCG', 'MC', 'MCD') and squad.side == side]) > 2:
|
|
if len([sq for sq in match.squad if sq.role in ('MDG', 'MDC', 'MDD') and sq.side == side]) == 0:
|
|
for ids in range(len(match.squad)):
|
|
squad = match.squad[ids]
|
|
squad.role = 'MDC' if squad.side == side and squad.role == 'MC' else squad.role
|
|
else:
|
|
for ids in range(len(match.squad)):
|
|
squad = match.squad[ids]
|
|
squad.role = 'MG' if squad.side == side and squad.role == 'MCG' else squad.role
|
|
squad.role = 'MD' if squad.side == side and squad.role == 'MCD' else squad.role
|
|
|
|
if len([sq for sq in match.squad if sq.role in ('MG', 'MCG', 'MC', 'MCD', 'MD') and sq.side == side]) > 3:
|
|
if len([sq for sq in match.squad if sq.role in ('MDG', 'MDC', 'MDD') and sq.side == side]) == 0:
|
|
for ids in range(len(match.squad)):
|
|
squad = match.squad[ids]
|
|
squad.role = 'MDC' if squad.side == side and squad.role == 'MC' else squad.role
|
|
squad.role = 'MDG' if squad.side == side and squad.role == 'MCG' else squad.role
|
|
squad.role = 'MDD' if squad.side == side and squad.role == 'MCD' else squad.role
|
|
else:
|
|
for ids in range(len(match.squad)):
|
|
squad = match.squad[ids]
|
|
squad.role = 'MOC' if squad.side == side and squad.role == 'MC' else squad.role
|
|
squad.role = 'MOG' if squad.side == side and squad.role == 'MG' else squad.role
|
|
squad.role = 'MOD' if squad.side == side and squad.role == 'MD' else squad.role
|
|
|
|
if len([squad for squad in match.squad if squad.role in ('AG', 'AC', 'AD') and squad.side == side]) > 2:
|
|
for id_squad in range(len(match.squad)):
|
|
squad = match.squad[id_squad]
|
|
squad.role = 'ALG' if squad.side == side and squad.role == 'AG' else squad.role
|
|
squad.role = 'ALD' if squad.side == side and squad.role == 'AD' else squad.role
|
|
|
|
# get stats
|
|
div_stats = soup.find('div', class_='MEDpanelstats')
|
|
if div_stats is not None:
|
|
match.stats = {name: Stat() for name in cls.STAT_NAMES.values()}
|
|
for tr in div_stats.find_all('tr'):
|
|
tds = tr.find_all('td')
|
|
stat_name = tds[2].text.strip()
|
|
stat = Stat(home=int(tds[0].text.strip()), away=int(tds[4].text.strip()))
|
|
if stat_name in cls.STAT_NAMES:
|
|
match.stats[cls.STAT_NAMES[stat_name]] = stat
|
|
elif stat_name == 'Tirs cadrés':
|
|
match.stats['attempts'].home += stat.home
|
|
match.stats['attempts'].away += stat.away
|
|
match.stats['in_attempts'] = Stat(home=stat.home, away=stat.away)
|
|
elif stat_name == 'Tirs non cadrés':
|
|
match.stats['attempts'].home += stat.home
|
|
match.stats['attempts'].away += stat.away
|
|
elif stat_name == 'Tirs arrêtés':
|
|
match.stats['block_attempts'] = stat
|
|
match.stats['attempts'].home += stat.home
|
|
match.stats['attempts'].away += stat.away
|
|
elif stat_name == 'Tirs sur le poteau':
|
|
match.stats['pole_attempts'] = stat
|
|
match.stats['attempts'].home += stat.home
|
|
match.stats['attempts'].away += stat.away
|
|
|
|
# get live comments
|
|
table_comms = soup.find('table', id='commentaire')
|
|
if table_comms is not None:
|
|
for tr in table_comms.find_all('tr'):
|
|
tds = tr.find_all('td')
|
|
if len(tds) == 3:
|
|
span_icon = tds[1].find('span')
|
|
minute = tds[0].text.strip().replace('+', "'+")
|
|
if minute and not minute.endswith("'") and '+' not in minute:
|
|
minute += "'"
|
|
comm_type = ''
|
|
if span_icon is not None:
|
|
icon_class = span_icon.attrs['class'][1]
|
|
if icon_class in cls.COMM_TYPES:
|
|
comm_type = cls.COMM_TYPES[icon_class]
|
|
else:
|
|
continue
|
|
match.comms.append(Comm(minute=minute, type_=comm_type, text=tds[2].text.strip()))
|
|
|
|
@classmethod
|
|
def get_league_ranking(cls, league, data):
|
|
html = data.decode(cls.CHARSET)
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
table = soup.find('table', id='tableau_classement')
|
|
groups = list()
|
|
group = Group(name='0', url=None, league=league)
|
|
for tr in table.find_all('tr'):
|
|
tds = tr.find_all('td')
|
|
|
|
# row titles
|
|
if len(tds) == 0:
|
|
continue
|
|
|
|
# row group
|
|
if len(tds) == 1:
|
|
group_name = tds[0].text.strip()
|
|
for grp in groups:
|
|
if grp.name == group_name:
|
|
group = grp
|
|
break
|
|
else:
|
|
group = Group(name=group_name, url=None, league=league)
|
|
groups.append(group)
|
|
continue
|
|
|
|
# get team
|
|
med_name = tds[0].find('a').contents[-1].strip()
|
|
for tm in league.teams:
|
|
if 'matchendirect' in tm.names and tm.names['matchendirect'] == med_name:
|
|
team = tm
|
|
break
|
|
else:
|
|
continue
|
|
|
|
# get rank
|
|
th = tr.find('th')
|
|
span = th.find('span')
|
|
if span is not None:
|
|
team.rank = int(span.text.strip())
|
|
else:
|
|
team.rank = int(th.text.strip())
|
|
|
|
# get stats
|
|
team.group = group
|
|
team.points = int(tds[1].text.strip())
|
|
team.played = int(tds[2].text.strip())
|
|
team.wins = int(tds[3].text.strip())
|
|
team.ties = int(tds[4].text.strip())
|
|
team.loss = int(tds[5].text.strip())
|
|
team.g_for = int(tds[6].text.strip())
|
|
team.g_against = int(tds[7].text.strip())
|
|
team.g_diff = int(tds[8].text.strip())
|
|
return groups
|
|
|
|
@classmethod
|
|
def get_schedule_url(cls, match):
|
|
# In matchendirect.fr dates are shifted in 2019
|
|
shift_date = match.start_date + timedelta(days=7)
|
|
return '{}/{}'.format(match.league.url.rstrip('/'), shift_date.strftime('%Y-%W'))
|
|
|
|
@classmethod
|
|
def get_schedule(cls, scheduler, data):
|
|
locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8')
|
|
html = data.decode()
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
date = None
|
|
for table in soup.find_all('table', class_='table table-striped table-hover'):
|
|
for tr in table.find_all('tr'):
|
|
th = tr.find('th')
|
|
if th is not None:
|
|
date = datetime.strptime(th.text.strip(), '%A %d %B %Y')
|
|
elif 'data-matchid' in tr.attrs and date is not None:
|
|
td_hour = tr.find('td', class_='lm1')
|
|
hours, minutes = td_hour.text.strip().split(':')
|
|
if hours.isnumeric() and minutes.isnumeric():
|
|
start_date = date + timedelta(hours=int(hours), minutes=int(minutes))
|
|
else:
|
|
start_date = date
|
|
td_score = tr.find('td', class_='lm3')
|
|
home = td_score.find('span', class_='lm3_eq1').contents[0].strip(' \n\t*')
|
|
away = td_score.find('span', class_='lm3_eq2').contents[-1].strip(' \n\t*')
|
|
url = urljoin('http://www.matchendirect.fr/', td_score.find('a').attrs['href'])
|
|
for match in scheduler.matches:
|
|
if match.home.names['matchendirect'] == home and match.away.names['matchendirect'] == away:
|
|
match.new_url = url
|
|
match.new_start_date = start_date
|
|
match.task_done = True
|
|
break
|
|
|
|
a_previous = soup.find('a', class_='objselect_prevnext objselect_prec')
|
|
if a_previous is not None:
|
|
scheduler.previous_url = urljoin('http://www.matchendirect.fr/', a_previous.attrs['href'])
|
|
a_next = soup.find('a', class_='objselect_prevnext objselect_suiv')
|
|
if a_next is not None:
|
|
scheduler.next_url = urljoin('http://www.matchendirect.fr/', a_next.attrs['href'])
|
|
|
|
@classmethod
|
|
def create_schedule(cls, league, data):
|
|
html = data.decode(cls.CHARSET)
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
div_top = soup.find('div', id='filtre_haut')
|
|
select_url = div_top.find('select')
|
|
if select_url is not None:
|
|
selected = False
|
|
for option_url in select_url.find_all('option'):
|
|
if selected or 'selected' in option_url.attrs:
|
|
yield urljoin(league.url, option_url.attrs['value'])
|
|
selected = True
|
|
elif 'selected' in option_url.attrs:
|
|
selected = True
|
|
|
|
@classmethod
|
|
def _current_mday_round_leg(cls, league, date):
|
|
_mday = 0
|
|
_round = None
|
|
_leg = 0
|
|
if league.round_dates is not None:
|
|
for key, value in league.round_dates.items():
|
|
if date >= datetime.strptime(key, '%Y-%m-%d'):
|
|
_mday = value['mday']
|
|
_round = value['round']
|
|
_leg = value['leg']
|
|
return _mday, _round, _leg
|
|
|
|
@classmethod
|
|
def create_schedule_from_url(cls, league, data):
|
|
locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8')
|
|
html = data.decode(cls.CHARSET)
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
date = None
|
|
current_mday = 0
|
|
current_round = None
|
|
current_leg = 0
|
|
idof10 = 0
|
|
for table in soup.find_all('table', class_='table table-striped table-hover'):
|
|
for tr in table.find_all('tr'):
|
|
th = tr.find('th')
|
|
if th is not None:
|
|
date = datetime.strptime(th.text.strip(), '%A %d %B %Y')
|
|
_mday, _round, _leg = cls._current_mday_round_leg(league, date)
|
|
if _mday != current_mday or _round != current_round or _leg != current_leg:
|
|
current_mday, current_round, current_leg = _mday, _round, _leg
|
|
idof10 = 0
|
|
|
|
elif 'data-matchid' in tr.attrs and date is not None:
|
|
match = Match(idt=0)
|
|
match.idof10 = idof10
|
|
match.league = league
|
|
match.mday = current_mday
|
|
match.round = current_round
|
|
match.leg = current_leg
|
|
|
|
td_hour = tr.find('td', class_='lm1')
|
|
hours, minutes = td_hour.text.strip().split(':')
|
|
if hours.isnumeric() and minutes.isnumeric():
|
|
match.start_date = date + timedelta(hours=int(hours), minutes=int(minutes))
|
|
else:
|
|
match.start_date = date
|
|
|
|
td_score = tr.find('td', class_='lm3')
|
|
match.url = urljoin('http://www.matchendirect.fr/', td_score.find('a').attrs['href'])
|
|
|
|
home_name = td_score.find('span', class_='lm3_eq1').contents[0].strip(' \n\t*')
|
|
match.home = Team(idt=0)
|
|
match.home.league = league
|
|
match.home.name = home_name + ' F' if league.gender == 'F' else home_name
|
|
match.home.short_name = match.home.name[:3].upper()
|
|
match.home.long_name = match.home.name
|
|
match.home.names = {cls.__name__.lower(): home_name}
|
|
match.home.id_sport = league.sport.id
|
|
match.home.country = Country(idt=league.country.id)
|
|
match.home.gender = league.gender
|
|
match.home.images = {'png': 'default-team.png', '50': 'h50-default-team.svg',
|
|
'30': 'h30-default-team.svg', '80': 'h80-default-team.svg'}
|
|
|
|
away_name = td_score.find('span', class_='lm3_eq2').contents[-1].strip(' \n\t*')
|
|
match.away = Team(idt=0)
|
|
match.away.league = league
|
|
match.away.name = away_name + ' F' if league.gender == 'F' else away_name
|
|
match.away.short_name = match.away.name[:3].upper()
|
|
match.away.long_name = match.away.name
|
|
match.away.names = {cls.__name__.lower(): away_name}
|
|
match.away.id_sport = league.sport.id
|
|
match.away.country = Country(idt=league.country.id)
|
|
match.away.gender = league.gender
|
|
match.away.images = {'png': 'default-team.png', '50': 'h50-default-team.svg',
|
|
'30': 'h30-default-team.svg', '80': 'h80-default-team.svg'}
|
|
idof10 += 1
|
|
yield match
|