371 lines
17 KiB
Python
371 lines
17 KiB
Python
|
from datetime import datetime, timedelta
|
||
|
from urllib.parse import urljoin
|
||
|
import json
|
||
|
import re
|
||
|
|
||
|
from bs4 import BeautifulSoup
|
||
|
import bs4.element
|
||
|
import feedparser
|
||
|
|
||
|
from lib.news import News, NewsImage
|
||
|
from providers.base import BaseProvider
|
||
|
from lib.match import Comm, Match
|
||
|
from lib.country import Country
|
||
|
from lib.league import Group
|
||
|
from lib.team import Team
|
||
|
|
||
|
|
||
|
class Eurosport(BaseProvider):
|
||
|
|
||
|
DOMAINS = {
|
||
|
'www.eurosport.fr', 'www.rugbyrama.fr', 'video.eurosport.fr', 'video.rugbyrama.fr', 'web-api.eurosport.com'
|
||
|
}
|
||
|
CHARSET = 'utf-8'
|
||
|
IMAGE = 'big-eurosport.png'
|
||
|
|
||
|
@classmethod
|
||
|
def get_match_info(cls, match, data):
|
||
|
html = data.decode(cls.CHARSET)
|
||
|
|
||
|
# Get score in json
|
||
|
if match.json_parser:
|
||
|
json_body = json.loads(html)
|
||
|
|
||
|
player_home = json_body['match']['players'][0]
|
||
|
player_away = json_body['match']['players'][1]
|
||
|
if match.home.name != '{} {}'.format(player_home['firstname'], player_home['lastname']):
|
||
|
raise NameError('home name does not match')
|
||
|
if match.away.name != '{} {}'.format(player_away['firstname'], player_away['lastname']):
|
||
|
raise NameError('away name does not match')
|
||
|
|
||
|
match_datetime = '{} {}'.format(json_body['match']['date']['date'], json_body['match']['date']['time'])
|
||
|
match.start_date = datetime.strptime(match_datetime, '%Y-%m-%d %H:%M')
|
||
|
if 'score' in json_body['match']:
|
||
|
score_sets = {
|
||
|
json_body['match']['score'][0]['playerid']: json_body['match']['score'][0]['sets'],
|
||
|
json_body['match']['score'][1]['playerid']: json_body['match']['score'][1]['sets']
|
||
|
}
|
||
|
match.score_sets = {
|
||
|
'home': score_sets[player_home['id']],
|
||
|
'away': score_sets[player_away['id']]
|
||
|
}
|
||
|
if 'name' in json_body['match']['status']:
|
||
|
match.minute = json_body['match']['status']['name']
|
||
|
|
||
|
# Get all data in html
|
||
|
else:
|
||
|
soup = BeautifulSoup(html, 'html.parser')
|
||
|
div_match = soup.find(id='livehero')
|
||
|
if div_match is None:
|
||
|
raise NameError('div livehero not found')
|
||
|
|
||
|
# Check team names
|
||
|
div_teams = div_match.find_all('div', class_='heromatch__team-name')
|
||
|
if len(div_teams) != 2 or not div_teams[0].a.text or not div_teams[1].a.text:
|
||
|
raise NameError('divs team not found')
|
||
|
if div_teams[0].a.text.strip() != match.home.names['eurosport']:
|
||
|
raise NameError('home name does not match')
|
||
|
if div_teams[1].a.text.strip() != match.away.names['eurosport']:
|
||
|
raise NameError('away name does not match')
|
||
|
|
||
|
# Check start_date
|
||
|
div_date = div_match.find('div', class_='heromatch__date')
|
||
|
if div_date is None:
|
||
|
raise NameError('div date not found')
|
||
|
div_time = div_match.find('div', class_='heromatch__time')
|
||
|
if div_time is None:
|
||
|
raise NameError('div time not found')
|
||
|
date_ = div_date.text.strip()
|
||
|
time_ = div_time.text.strip()
|
||
|
match.start_date = datetime.strptime('{} {}:00'.format(date_, time_), '%d/%m/%y %H:%M:%S')
|
||
|
|
||
|
# Get score
|
||
|
div_scores = div_match.find_all('div', class_='heromatch__score')
|
||
|
if len(div_scores) < 2:
|
||
|
raise NameError('divs score not found')
|
||
|
if div_scores[0].text.strip().isnumeric():
|
||
|
match.score_home = int(div_scores[0].text.strip())
|
||
|
if div_scores[1].text.strip().isnumeric():
|
||
|
match.score_away = int(div_scores[1].text.strip())
|
||
|
|
||
|
# Get minute
|
||
|
div_minute = div_match.find('div', class_='heromatch__minute')
|
||
|
if div_minute is None or not div_minute.text:
|
||
|
div_minute = div_match.find('div', class_='heromatch__status')
|
||
|
if div_minute is None:
|
||
|
raise NameError('div minute not found')
|
||
|
match.minute = div_minute.text.strip().lower()
|
||
|
|
||
|
# Get live comments
|
||
|
div_comms = soup.find(class_='live_comments_v8_5_bis')
|
||
|
if div_comms is not None:
|
||
|
for article_comm in div_comms.find_all('article'):
|
||
|
comm = Comm(type_='', minute='', text='')
|
||
|
left_col = article_comm.find(class_='left-col')
|
||
|
if left_col is not None:
|
||
|
left_span = left_col.find('span')
|
||
|
if left_span is not None:
|
||
|
if len(left_span.contents) > 0 and isinstance(left_span.contents[0], str):
|
||
|
comm.minute = left_span.contents[0].strip()
|
||
|
right_col = article_comm.find(class_='right-col')
|
||
|
if right_col is not None:
|
||
|
right_p = right_col.find('p')
|
||
|
if right_p is not None:
|
||
|
comm.text = right_p.text.strip()
|
||
|
if comm.text:
|
||
|
if comm.text == comm.text.upper():
|
||
|
comm.text = '<strong>{}</strong>'.format(comm.text)
|
||
|
else:
|
||
|
continue
|
||
|
match.comms.append(comm)
|
||
|
|
||
|
@classmethod
|
||
|
def get_match_comms(cls, match, data):
|
||
|
html = data.decode(cls.CHARSET)
|
||
|
json_body = json.loads(html)
|
||
|
if json_body is not None:
|
||
|
for comment in json_body['livecomments']:
|
||
|
if all([key in comment for key in ('marker', 'text')]):
|
||
|
match.comms.append(
|
||
|
Comm(minute=comment['marker'], type_='', text=comment['text'])
|
||
|
)
|
||
|
|
||
|
@classmethod
|
||
|
def get_team_staff(cls, data):
|
||
|
html = data.decode(cls.CHARSET)
|
||
|
soup = BeautifulSoup(html, 'html.parser')
|
||
|
players = list()
|
||
|
staff = dict()
|
||
|
for li in soup.find_all('li'):
|
||
|
if 'class' in li.attrs and 'team_global_title' in li.attrs['class']:
|
||
|
if li.text == 'Défenseur(s)':
|
||
|
staff['goalkeepers'] = players
|
||
|
elif li.text == 'Milieu(x)':
|
||
|
staff['defenders'] = players
|
||
|
elif li.text == 'Attaquant(s)':
|
||
|
staff['midfielders'] = players
|
||
|
elif li.text == 'Entraîneur':
|
||
|
staff['attackers'] = players
|
||
|
break
|
||
|
players = list()
|
||
|
else:
|
||
|
players.append(li.find('a').text.replace('\xa0', ' '))
|
||
|
return staff
|
||
|
|
||
|
@classmethod
|
||
|
def get_league_ranking(cls, league, data):
|
||
|
html = data.decode(cls.CHARSET)
|
||
|
soup = BeautifulSoup(html, 'html.parser')
|
||
|
div_standing = soup.find('div', class_='standing_v8_5')
|
||
|
groups = list()
|
||
|
for div in div_standing.find_all('div', class_='tab-content'):
|
||
|
if 'data-ajax-url' in div.attrs:
|
||
|
link = div.attrs['data-ajax-url']
|
||
|
group_class = div.attrs['data-navtab-content-id'].split('_')[1]
|
||
|
group = soup.find('a', class_=group_class).find('span', class_='navtab-label').text.strip()
|
||
|
groups.append(Group(name=group, url=urljoin(league.url, link), league=league))
|
||
|
else:
|
||
|
groups.append(Group(name='0', url=league.url, league=league))
|
||
|
return groups
|
||
|
|
||
|
@classmethod
|
||
|
def get_group_ranking(cls, group, data):
|
||
|
html = data.decode(cls.CHARSET)
|
||
|
soup = BeautifulSoup(html, 'html.parser')
|
||
|
table = soup.find('table')
|
||
|
|
||
|
for tr in table.find_all('tr', class_='standing-table__row'):
|
||
|
eur_name = tr.find('span', class_='text').text.strip()
|
||
|
for tm in group.league.teams:
|
||
|
if 'eurosport' in tm.names and tm.names['eurosport'] == eur_name:
|
||
|
team = tm
|
||
|
break
|
||
|
else:
|
||
|
continue
|
||
|
|
||
|
tds = tr.find_all('td')
|
||
|
team.group = group
|
||
|
team.rank = int(tds[0].text.strip())
|
||
|
team.played = int(tds[-8].text.strip())
|
||
|
team.wins = int(tds[-7].text.strip())
|
||
|
team.ties = int(tds[-6].text.strip())
|
||
|
team.loss = int(tds[-5].text.strip())
|
||
|
team.g_for = int(tds[-4].text.strip())
|
||
|
team.g_against = int(tds[-3].text.strip())
|
||
|
team.g_diff = int(tds[-2].text.strip())
|
||
|
team.points = int(tds[-1].text.strip())
|
||
|
|
||
|
@classmethod
|
||
|
def get_newss_from_source(cls, news_source, data):
|
||
|
xml = data.decode()
|
||
|
tree = feedparser.parse(xml)
|
||
|
for item in tree.entries:
|
||
|
news = News()
|
||
|
news.source = 'eurosport'
|
||
|
news.sport = news_source.sport
|
||
|
news.title = item.title
|
||
|
news.url = item.link
|
||
|
news.description = json.loads(re.sub(r'<.*>', '', item.description))
|
||
|
|
||
|
summary = BeautifulSoup(item.summary, 'html.parser')
|
||
|
image_object = summary.find('img')
|
||
|
image_url = image_object.attrs['src'] if image_object and 'src' in image_object.attrs is not None else None
|
||
|
news.image = NewsImage(url=image_url, title=news.title, basename=cls.IMAGE, id_news=news.id)
|
||
|
|
||
|
if hasattr(item, 'tags'):
|
||
|
news.tags = [tag.term for tag in item.tags]
|
||
|
|
||
|
# Set current date as pub_date to prevent disorder between id and pub_date
|
||
|
news.pub_date = datetime.now()
|
||
|
|
||
|
yield news
|
||
|
|
||
|
@classmethod
|
||
|
def get_news_content(cls, news, data):
|
||
|
html = data.decode(encoding=cls.CHARSET, errors='ignore')
|
||
|
soup = BeautifulSoup(html, 'html.parser')
|
||
|
|
||
|
if news.url.endswith('/video.shtml'):
|
||
|
res = re.search(r'https://vod-eurosport.akamaized.net/[^"]*', html)
|
||
|
if res is not None:
|
||
|
news.video_src = res.group(0)
|
||
|
news.content = '<video controls src="{}" type="video/mp4"></video>'.format(news.video_src)
|
||
|
else:
|
||
|
news.content = ''
|
||
|
|
||
|
div_paraphs = soup.find('div', class_='teaser_container')
|
||
|
if div_paraphs is not None:
|
||
|
news.content += ''.join(
|
||
|
[str(c) for c in div_paraphs.contents if isinstance(c, bs4.element.Tag) and c.name != 'script']
|
||
|
)
|
||
|
|
||
|
else:
|
||
|
h2_teaser = soup.find('h2', class_='storyfull__teaser')
|
||
|
if h2_teaser is not None:
|
||
|
news.teaser = str(h2_teaser.text).strip()
|
||
|
|
||
|
div_paraphs = soup.find('div', class_='storyfull__paragraphs')
|
||
|
if div_paraphs is not None:
|
||
|
news.content = ''.join(
|
||
|
[str(c) for c in div_paraphs.contents if isinstance(c, bs4.element.Tag) and c.name != 'script']
|
||
|
)
|
||
|
|
||
|
div_author = soup.find('div', class_='storyfull__publisher-author-name')
|
||
|
if div_author is not None:
|
||
|
a_author = div_author.find('a')
|
||
|
if a_author is not None:
|
||
|
news.author = a_author.text.strip()
|
||
|
else:
|
||
|
news.author = str(div_author.contents[0]).strip()
|
||
|
if news.author.startswith('Par '):
|
||
|
news.author = news.author.replace('Par ', '', 1)
|
||
|
|
||
|
@classmethod
|
||
|
def get_schedule_url(cls, match):
|
||
|
return match.url
|
||
|
|
||
|
@classmethod
|
||
|
def get_schedule(cls, scheduler, data):
|
||
|
html = data.decode(cls.CHARSET)
|
||
|
soup = BeautifulSoup(html, 'html.parser')
|
||
|
divs_name = soup.find_all('span', class_='tennismatch--hidemobile')
|
||
|
if len(divs_name) == 2 and divs_name[0].text and divs_name[1].text:
|
||
|
home = divs_name[0].text.strip()
|
||
|
away = divs_name[1].text.strip()
|
||
|
for match in scheduler.matches:
|
||
|
if match.home.names['eurosport'] == home and match.away.names['eurosport'] == away:
|
||
|
date = datetime.strptime(soup.find('div', class_='livehero__date').contents[0], '%d/%m/%y')
|
||
|
hours, minutes = soup.find('div', class_='tennismatch__time-value').text.strip().split(':')
|
||
|
match.new_start_date = date + timedelta(hours=int(hours), minutes=int(minutes))
|
||
|
match.task_done = True
|
||
|
|
||
|
@classmethod
|
||
|
def create_schedule(cls, league, data):
|
||
|
html = data.decode(cls.CHARSET)
|
||
|
soup = BeautifulSoup(html, 'html.parser')
|
||
|
current_year = datetime.now().year
|
||
|
|
||
|
droplet_id = 0
|
||
|
if league.sport.id == 2:
|
||
|
ajax_container = soup.find('div', class_='ajax-container')
|
||
|
droplet_match = re.search(r'&dropletid=(\d+)&', ajax_container.attrs['data-ajax-url'])
|
||
|
if droplet_match is not None:
|
||
|
droplet_id = int(droplet_match.group(1))
|
||
|
else:
|
||
|
raise Exception('no droplet_id found')
|
||
|
|
||
|
rounds = list()
|
||
|
div_rounds = soup.find('div', class_='rounds-dropdown__rounds')
|
||
|
for div_round in div_rounds.find_all('div', class_='rounds-dropdown__round'):
|
||
|
rounds.append(div_round.text.strip())
|
||
|
|
||
|
div_matches = soup.find('div', class_='bracket-matches-wrapper')
|
||
|
for div_matches_round in div_matches.find_all('div', class_='bracket-matches'):
|
||
|
for class_ in div_matches_round.attrs['class']:
|
||
|
if class_.startswith('bracket-round--'):
|
||
|
nb_round = class_.replace('bracket-round--', '')
|
||
|
if nb_round.isnumeric():
|
||
|
nb_round = int(nb_round)
|
||
|
if nb_round <= len(rounds):
|
||
|
current_round = rounds[nb_round - 1]
|
||
|
idof10 = 0
|
||
|
|
||
|
for a_match in div_matches_round.find_all('a', class_='match-sets'):
|
||
|
match = Match(idt=0)
|
||
|
match.idof10 = idof10
|
||
|
match.url = urljoin(league.url, a_match.attrs['href'])
|
||
|
if droplet_id is not None:
|
||
|
match_id = int(a_match.attrs['href'].split('/')[-2].split('mtc')[-1])
|
||
|
score_url = 'https://web-api.eurosport.com/json/getmatchheaderweb.json'
|
||
|
comms_url = 'https://web-api.eurosport.com/json/getlivecomments.json'
|
||
|
match.url_score = '{}?d={}&ids={}'.format(score_url, droplet_id, match_id)
|
||
|
match.url_comms = '{}?d={}&ids={}'.format(comms_url, droplet_id, match_id)
|
||
|
|
||
|
match.league = league
|
||
|
match.leg = 0
|
||
|
match.round = current_round
|
||
|
match.mday = 0
|
||
|
|
||
|
div_time = a_match.find('div', class_='match-sets__start-time')
|
||
|
match_date = '{}/{}'.format(div_time.text.strip(), current_year)
|
||
|
match.start_date = datetime.strptime(match_date, '%d/%m/%Y')
|
||
|
|
||
|
divs_name = a_match.find_all('div', class_='player__name')
|
||
|
divs_logo = a_match.find_all('div', class_='player__logo')
|
||
|
if len(divs_name) == 2 and divs_name[0].text and divs_name[1].text:
|
||
|
|
||
|
match.home = Team(idt=0)
|
||
|
match.home.league = league
|
||
|
match.home.country = Country(idt=0)
|
||
|
img_country_home = divs_logo[0].find('img')
|
||
|
if img_country_home is None:
|
||
|
continue
|
||
|
match.home.country.name = img_country_home.attrs['title'].strip()
|
||
|
match.home.name = divs_name[0].text.strip()
|
||
|
match.home.long_name = match.home.name
|
||
|
words = match.home.name.split(' ')
|
||
|
for idx in range(len(words)):
|
||
|
if idx < len(words) - 1 and len(words[idx]) > 3:
|
||
|
words[idx] = words[idx][0] + '.'
|
||
|
match.home.short_name = ' '.join(words)
|
||
|
match.home.names = {'eurosport': match.home.name}
|
||
|
|
||
|
match.away = Team(idt=0)
|
||
|
match.away.league = league
|
||
|
match.away.country = Country(idt=0)
|
||
|
img_country_away = divs_logo[1].find('img')
|
||
|
if img_country_away is None:
|
||
|
continue
|
||
|
match.away.country.name = img_country_away.attrs['title'].strip()
|
||
|
match.away.name = divs_name[1].text.strip()
|
||
|
match.away.long_name = match.away.name
|
||
|
words = match.away.name.split(' ')
|
||
|
for idx in range(len(words)):
|
||
|
if idx < len(words) - 1 and len(words[idx]) > 3:
|
||
|
words[idx] = words[idx][0] + '.'
|
||
|
match.away.short_name = ' '.join(words)
|
||
|
match.away.names = {'eurosport': match.away.name}
|
||
|
|
||
|
idof10 += 1
|
||
|
yield match
|