commit a832ce2ea2947e5d5f04102642142da059786a9e Author: samuel Date: Sat Oct 3 23:17:53 2020 +0200 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..63c6e9e --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +hidden +venv +__pycache__ +.idea +*.log +*_local.py +sam_aiohttp diff --git a/aconvert_images.py b/aconvert_images.py new file mode 100644 index 0000000..0659521 --- /dev/null +++ b/aconvert_images.py @@ -0,0 +1,140 @@ +from urllib.request import urlopen +from os import listdir + +from selenium.webdriver.support.ui import Select, WebDriverWait +from selenium.webdriver.support import expected_conditions +from selenium.webdriver.common.by import By + +from lib.tvchannel import TvChannel +from core.webutils import WebUtils +from lib.country import Country +from lib.browser import Browser +from lib.league import League +from lib.team import Team +import setting + + +class ConvertForm: + def __init__(self, browser, choice): + self.form = browser.find_element_by_xpath("//form[@id='conversionform']") + + self.form.find_element_by_xpath("//button[@class='btn glow trigger2 dropdown-toggle']").click() + self.ul = self.form.find_element_by_xpath("//ul[@class='file-menu dropdown-menu']") + self.ul.find_elements_by_tag_name('a')[1].click() + self.file = self.form.find_element_by_xpath("//input[@id='file']") + + Select(self.form.find_element_by_xpath("//select[@id='targetformat']")).select_by_value('svg') + if choice == 'tvchannels': + Select(self.form.find_element_by_xpath("//select[@id='imagesize']")).select_by_value('option4') + browser.execute_script("document.getElementsByName('preserveaspect')[0].click()") + else: + Select(self.form.find_element_by_xpath("//select[@id='imagesize']")).select_by_value('option2') + + self.size = self.form.find_element_by_xpath("//input[@id='customsize']") + self.submit = self.form.find_element_by_xpath("//input[@id='submitbtn']") + + +def png2svg(browser, cform, png_url, svg_path, width, height): + cform.file.clear() + cform.file.send_keys(png_url) + cform.size.clear() + if width is None: + cform.size.send_keys(str(height)) + else: + cform.size.send_keys('{}x{}'.format(width, height)) + cform.submit.click() + + result_tr = WebDriverWait(browser, 30).until( + expected_conditions.presence_of_element_located((By.XPATH, "//tbody[@id='resultbody']//tr")) + ) + result_tds = result_tr.find_elements_by_tag_name('td') + request = urlopen(url=result_tds[1].find_element_by_tag_name('a').get_attribute('href')) + with open(svg_path, 'w') as svg_file: + svg_file.write(request.read().decode()) + result_tds[3].find_element_by_xpath("//a[@title='Delete']").click() + + +class AconvertImages(WebUtils): + def __init__(self): + super().__init__(module_='aconvert_images') + self.convert_images(choice=self.args.choice, head=self.args.head) + + def convert_images(self, choice, head): + self.logger.info('[*] Starting job {}'.format(self.module)) + user_agent = self.mysqldb.get_random_ua() + browser = Browser(user_agent=user_agent, headless=not head, tor=True) + + try: + browser.get('https://check.torproject.org/?lang=fr') + if browser.title.strip() == 'Félicitations. Ce navigateur est configuré pour utiliser Tor.': + browser.execute_script('window.stop();') + browser.get('https://www.aconvert.com/image/') + convert_form = ConvertForm(browser, choice) + + if choice == 'leagues': + svg_folder = '{}/league'.format(setting.IMAGES_FOLDER) + png_list = listdir('/tmp') + for league in League.get_leagues(self.mysqldb): + png_name = '{}.png'.format(league.id) + if png_name in png_list: + self.logger.info('[+] League {} : {}'.format(league.id, league.name)) + png_url = 'https://www.bestofbets.net/images/league/{}.png'.format(league.id) + for size in (35, 50, 80): + svg_path = '{}/h{}-{}.svg'.format(svg_folder, size, league.id) + png2svg(browser, convert_form, png_url, svg_path, size, size) + + elif choice == 'teams': + svg_folder = '{}/team'.format(setting.IMAGES_FOLDER) + png_list = listdir('/tmp') + for team in Team.get_teams(self.mysqldb): + png_name = '{}.png'.format(team.id) + if png_name in png_list: + self.logger.info('[+] Team {} : {}'.format(team.id, team.name)) + png_url = 'https://www.bestofbets.net/images/team/{}.png'.format(team.id) + for size in (30, 50, 80): + svg_path = '{}/h{}-{}.svg'.format(svg_folder, size, team.id) + png2svg(browser, convert_form, png_url, svg_path, size, size) + + elif choice == 'countries': + svg_folder = '{}/country'.format(setting.IMAGES_FOLDER) + png_list = listdir('/tmp') + for country in Country.get_countries(self.mysqldb): + png_name = '{}.png'.format(country.id) + if png_name in png_list: + self.logger.info('[+] Country {} : {}'.format(country.id, country.name)) + png_url = 'https://www.bestofbets.net/images/country/{}.png'.format(country.id) + for size in (30, 50, 80): + svg_path = '{}/h{}-{}.svg'.format(svg_folder, size, country.id) + png2svg(browser, convert_form, png_url, svg_path, size, size) + + elif choice == 'logos': + svg_folder = '{}/logo'.format(setting.IMAGES_FOLDER) + png_list = listdir('/tmp') + png_name = 'bob-logo.png' + if png_name in png_list: + png_url = 'https://www.bestofbets.net/images/logo/bob-logo.png' + for width, height in ((32, 40), (89, 110)): + svg_path = '{}/h{}-bob-logo.20svg'.format(svg_folder, height) + png2svg(browser, convert_form, png_url, svg_path, width, height) + + elif choice == 'tvchannels': + svg_folder = '{}/tvchannel'.format(setting.IMAGES_FOLDER) + png_list = listdir('/tmp') + for tvchannel in TvChannel.get_tvchannels(self.mysqldb): + png_name = '{}.png'.format(tvchannel.id) + if png_name in png_list: + self.logger.info('[+] TVChannel {} : {}'.format(tvchannel.id, tvchannel.name)) + png_url = 'https://www.bestofbets.net/images/tvchannel/{}.png'.format(tvchannel.id) + for height in (20, 30): + svg_path = '{}/h{}-{}.svg'.format(svg_folder, height, tvchannel.id) + png2svg(browser, convert_form, png_url, svg_path, None, height) + + except BaseException as e: + self.logger.error('[-] error : {} - {}'.format(type(e), str(e))) + finally: + browser.quit() + self.logger.info('[*] Job done {}'.format(self.module)) + + +if __name__ == '__main__': + AconvertImages() diff --git a/core/arg.py b/core/arg.py new file mode 100644 index 0000000..a82232c --- /dev/null +++ b/core/arg.py @@ -0,0 +1,24 @@ +from argparse import ArgumentParser + +import setting + + +class ArgParser(ArgumentParser): + + def __init__(self): + super().__init__() + self.add_argument('-il', '--id-league', type=int, help='League id') + self.add_argument('-im', '--id-match', type=int, help='Match id') + self.add_argument('-it', '--id-team', type=int, help='Team id') + self.add_argument('-iu', '--id-user', type=int, help='User id') + self.add_argument('-t', '--table', type=str, help='Table name') + self.add_argument('-c', '--choice', type=str, help='Choice for aconvert') + self.add_argument('-hd', '--head', action='store_true', help='Browser with head') + + def parse_args(self, args=None, namespace=None): + parsed = super().parse_args() + parsed.id_league = setting.ID_LEAGUE if parsed.id_league is None else parsed.id_league + parsed.id_match = setting.ID_MATCH if parsed.id_match is None else parsed.id_match + parsed.id_team = setting.ID_TEAM if parsed.id_team is None else parsed.id_team + parsed.id_user = setting.ID_USER if parsed.id_user is None else parsed.id_user + return parsed diff --git a/core/influxdb.py b/core/influxdb.py new file mode 100644 index 0000000..a17cf92 --- /dev/null +++ b/core/influxdb.py @@ -0,0 +1,26 @@ +import influxdb + +import setting + + +class InfluxDB: + + def __init__(self, host=setting.INFLUX_HOST, port=setting.INFLUX_PORT, base=setting.INFLUX_BASE): + self.host = host + self.port = port + self.base = base + self._client = influxdb.InfluxDBClient(host=self.host, port=self.port, database=self.base) + + def save_stats(self, module, start_time, nb_tasks, tasks_done, quantity, errors, total_time): + points = [{ + 'measurement': module, + 'time': start_time, + 'fields': { + 'nbt': nb_tasks, + 'tdn': tasks_done, + 'qty': quantity, + 'err': errors, + 'tti': total_time + } + }] + self._client.write_points(points, time_precision='s') diff --git a/core/log.py b/core/log.py new file mode 100644 index 0000000..017d3c9 --- /dev/null +++ b/core/log.py @@ -0,0 +1,15 @@ +import logging.handlers +import sys + +import setting + + +def get_logger(name, level=setting.LOG_LEVEL): + logger = logging.getLogger(name) + formatter = logging.Formatter('[%(asctime)s] %(levelname)s / %(name)s : %(message)s') + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(logging.DEBUG) + handler.setFormatter(formatter) + logger.setLevel(getattr(logging, level)) + logger.addHandler(handler) + return logger diff --git a/core/mysqldb.py b/core/mysqldb.py new file mode 100644 index 0000000..7a3b34d --- /dev/null +++ b/core/mysqldb.py @@ -0,0 +1,185 @@ +from datetime import datetime, date, timedelta +import re + +from MySQLdb.constants import FIELD_TYPE +import _mysql + +import setting + + +def _datetime_or_none(value): + space_split = value.split(' ') + if len(space_split[0]) == 10: + dash_split = space_split[0].split('-') + if len(dash_split) == 3 and all([x.isnumeric() for x in dash_split]): + year = int(dash_split[0]) + month = int(dash_split[1]) + day = int(dash_split[2]) + if len(space_split) > 1: + colon_split = space_split[1].split(':') + if len(colon_split) == 3 and all([x.isnumeric() for x in colon_split[:1]]): + hour = int(colon_split[0]) + minute = int(colon_split[1]) + if colon_split[2].isnumeric(): + second = int(colon_split[2]) + microsecond = 0 + else: + point_split = colon_split[2].split('.') + if len(point_split) == 2 and all([x.isnumeric() for x in point_split]): + second = int(point_split[0]) + microsecond = int(point_split[1]) * 10 ** (6 - len(point_split[1])) + else: + return None + return datetime(year, month, day, hour, minute, second, microsecond) + else: + return None + else: + return date(year, month, day) + else: + return None + else: + return None + + +def _timedelta_or_none(value): + colon_split = value.split(':') + if len(colon_split) == 3 and all([x.isnumeric() for x in colon_split]): + hours = int(colon_split[0]) + minutes = int(colon_split[1]) + seconds = int(colon_split[2]) + return timedelta(hours=hours, minutes=minutes, seconds=seconds) + else: + return None + + +def _year_to_datetime(value): + if value.isnumeric(): + return datetime(year=int(value), month=1, day=1) + else: + return None + + +def _bytes_to_str(value): + return value.decode(encoding='utf8', errors='replace') + + +def _none(_): + return None + + +class MysqlDB: + + CONVERTER = { + FIELD_TYPE.BIT: int, + FIELD_TYPE.BLOB: _bytes_to_str, + FIELD_TYPE.CHAR: _bytes_to_str, + FIELD_TYPE.DATE: _datetime_or_none, + FIELD_TYPE.DATETIME: _datetime_or_none, + FIELD_TYPE.DECIMAL: float, + FIELD_TYPE.DOUBLE: float, + FIELD_TYPE.ENUM: _bytes_to_str, + FIELD_TYPE.FLOAT: float, + FIELD_TYPE.GEOMETRY: _none, + FIELD_TYPE.INT24: int, + FIELD_TYPE.INTERVAL: _none, + FIELD_TYPE.LONG: int, + FIELD_TYPE.LONG_BLOB: _bytes_to_str, + FIELD_TYPE.LONGLONG: int, + FIELD_TYPE.MEDIUM_BLOB: _bytes_to_str, + FIELD_TYPE.NEWDATE: _datetime_or_none, + FIELD_TYPE.NEWDECIMAL: float, + FIELD_TYPE.NULL: _none, + FIELD_TYPE.SET: _bytes_to_str, + FIELD_TYPE.SHORT: int, + FIELD_TYPE.STRING: _bytes_to_str, + FIELD_TYPE.TIME: _timedelta_or_none, + FIELD_TYPE.TIMESTAMP: _datetime_or_none, + FIELD_TYPE.TINY: int, + FIELD_TYPE.TINY_BLOB: _bytes_to_str, + FIELD_TYPE.VAR_STRING: _bytes_to_str, + FIELD_TYPE.VARCHAR: _bytes_to_str, + FIELD_TYPE.YEAR: _year_to_datetime + } + + CHARSET = 'utf8' + + def __init__(self, host=setting.MYSQL_HOST, port=setting.MYSQL_PORT, user=setting.MYSQL_USER, + pass_=setting.MYSQL_PASS, base=setting.MYSQL_BASE, charset=CHARSET, autocommit=False): + self._session = None + self._host = host + self._port = port + self._user = user + self._pass = pass_ + self._base = base + self._charset = charset + self._autocommit = autocommit + + def connect(self): + if self._session is None: + self._session = _mysql.connect( + host=self._host, port=self._port, user=self._user, passwd=self._pass, db=self._base, conv=self.CONVERTER + ) + self._session.set_character_set(self._charset) + self._session.autocommit(self._autocommit) + + @staticmethod + def _build_stmt(stmt, args): + if args is not None: + for key in args: + if isinstance(args[key], str): + args[key] = "'{}'".format(args[key].replace("'", "''").replace('\\', '\\\\')) + elif isinstance(args[key], datetime): + args[key] = "'{}'".format(args[key].strftime('%Y-%m-%d %H:%M:%S')) + elif isinstance(args[key], date): + args[key] = "'{}'".format(args[key].strftime('%Y-%m-%d')) + elif isinstance(args[key], float): + args[key] = str(args[key]) + elif isinstance(args[key], int): + args[key] = str(args[key]) + elif args[key] is None: + args[key] = 'NULL' + else: + raise NameError('Argument type not allowed here: {} - {}'.format(type(args[key]), args[key])) + stmt = re.sub(r':([a-zA-Z0-9_]+)', r'%(\1)s', stmt) % args + return stmt + + def query(self, stmt, args=None): + self.connect() + self._session.query(self._build_stmt(stmt, args)) + + result = self._session.use_result() + fields = result.describe() + rows = list() + while True: + row = result.fetch_row() + if row: + rows.append({fields[x][0]: row[0][x] for x in range(len(row[0]))}) + else: + break + return rows + + def exec(self, stmt, args=None): + self.connect() + self._session.query(self._build_stmt(stmt, args)) + return self._session.insert_id() + + def rollback(self): + if self._session is not None: + self._session.rollback() + + def commit(self): + if self._session is not None and setting.MYSQL_SAVE: + self._session.commit() + + def close(self): + if self._session is not None: + self._session.close() + + def get_random_ua(self): + for row in self.query(""" SELECT useragents FROM user_agents ORDER BY RAND() LIMIT 1 """): + return row['useragents'] + + def get_tables(self, table_name=None): + for row in self.query(""" SHOW TABLES """): + if table_name is None or row['Tables_in_bob'] == table_name: + yield row['Tables_in_bob'] diff --git a/core/redisdb.py b/core/redisdb.py new file mode 100644 index 0000000..c50c8d0 --- /dev/null +++ b/core/redisdb.py @@ -0,0 +1,29 @@ +import redis + +import setting + + +class RedisDB: + + DEFAULT_EXPIRE = 300 + + def __init__(self, host=setting.REDIS_HOST, port=setting.REDIS_PORT, base=setting.REDIS_BASE): + self.host = host + self.port = port + self.base = base + self._client = redis.StrictRedis(host=self.host, port=self.port, db=self.base, retry_on_timeout=True) + + def save_start(self, module, expire=300): + lock_key = 'LOCK:{}'.format(module) + self._client.ping() + if self._client.get(lock_key) is not None: + return False + else: + self._client.set(lock_key, 1) + self._client.expire(lock_key, expire) + return True + + def save_end(self, module): + lock_key = 'LOCK:{}'.format(module) + self._client.ping() + self._client.delete(lock_key) diff --git a/core/webutils.py b/core/webutils.py new file mode 100644 index 0000000..3a410a7 --- /dev/null +++ b/core/webutils.py @@ -0,0 +1,97 @@ +from time import time, sleep + +import asyncio + +from sam_aiohttp import ClientSession +from core.influxdb import InfluxDB +from core.mysqldb import MysqlDB +from core.redisdb import RedisDB +from core.log import get_logger +from core.arg import ArgParser +import setting + + +class WebUtils: + + request_headers = { + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3', + 'Accept-Encoding': 'gzip,deflate', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'Pragma': 'no-cache', + 'Upgrade-Insecure-Requests': '1' + } + + def __init__(self, module_=None): + self.module = module_ + self.args = ArgParser().parse_args() + self.logger = get_logger(name=self.module) + self.mysqldb = MysqlDB() + self.redisdb = RedisDB() + self.influxdb = InfluxDB() + self.loop = asyncio.get_event_loop() + self.nb_tasks = 0 + self.tasks_done = 0 + self.quantity = 0 + self.errors = 0 + self.start_time = 0 + self.end_time = 0 + self.locked = False + self._session = None + self._semaphore = None + + async def _fetch(self, obj): + async with self._session.get(url=obj.url, proxy=setting.PROXY) as response: + return obj, await response.read() + + async def _bound_fetch(self, obj): + async with self._semaphore: + return await self._fetch(obj) + + async def _run(self, objects): + # Create and launch tasks + results = list() + async with ClientSession(headers=self.request_headers) as self._session: + tasks = [asyncio.ensure_future(self._bound_fetch(obj)) for obj in objects] + + # Get results of terminated tasks + for obj, data in await asyncio.gather(*tasks): + results.append((obj, data)) + return results + + def start(self): + # Start and get id_cron + self.start_time = int(time()) + self.locked = not self.redisdb.save_start(self.module) + if self.locked: + self.logger.error('[X] Cron already in progress') + return None + + # Init web variables + self.request_headers['User-Agent'] = self.mysqldb.get_random_ua() + self._semaphore = asyncio.Semaphore(setting.SEMAPHORE) + self.logger.info('[*] Starting job {} with {} tasks'.format(self.module, self.nb_tasks)) + + def run(self, objects, callback): + if not self.locked: + while self.loop.is_running(): + sleep(0.5) + future = asyncio.ensure_future(self._run(objects)) + self.loop.run_until_complete(future) + for obj, data in future.result(): + callback(obj, data) + + def end(self): + if not self.locked: + self.end_time = int(time()) + total_time = self.end_time - self.start_time + self.logger.info('[*] {} objects updated'.format(self.quantity)) + self.logger.info('[*] {}/{} tasks done in {} seconds'.format(self.tasks_done, self.nb_tasks, total_time)) + self.redisdb.save_end(module=self.module) + self.influxdb.save_stats( + module=self.module, start_time=self.start_time, nb_tasks=self.nb_tasks, tasks_done=self.tasks_done, + quantity=self.quantity, errors=self.errors, total_time=total_time + ) + self.mysqldb.commit() + self.mysqldb.close() diff --git a/create_schedule.py b/create_schedule.py new file mode 100644 index 0000000..321cf50 --- /dev/null +++ b/create_schedule.py @@ -0,0 +1,106 @@ +from traceback import format_exc +from copy import copy + +from providers.controller import ProviderController +from core.webutils import WebUtils +from lib.league import League +from lib.team import Team + + +class CreateSchedule(WebUtils): + def __init__(self): + super().__init__(module_='create_schedule') + leagues = list(League.get_leagues(db=self.mysqldb, origin=self.module, id_league=self.args.id_league)) + self.nb_tasks = len(leagues) + self.start() + self.run(leagues, self.create_schedule) + self.end() + + def create_schedule(self, league, data): + try: + provider = ProviderController.get_provider(league.url) + + if provider.__name__ == 'Eurosport': + old_round = None + for match in provider.create_schedule(league, data): + match.home.country.build_from_name(db=self.mysqldb) + match.home.images = { + str(size): + '../country/h{}-{}.svg'.format(size, match.home.country.id) + if match.home.country.id is not None + else 'h{}-default-team.svg'.format(size) + for size in (30, 50, 80) + } + match.home.store(db=self.mysqldb) + + match.away.country.build_from_name(db=self.mysqldb) + match.away.images = { + str(size): + '../country/h{}-{}.svg'.format(size, match.away.country.id) + if match.away.country.id is not None + else 'h{}-default-team.svg'.format(size) + for size in (30, 50, 80) + } + match.away.store(db=self.mysqldb) + + if match.round != old_round: + match.league.store_round(new_round=match.round, db=self.mysqldb) + old_round = match.round + + match.store(db=self.mysqldb) + if match.id > 0: + self.logger.info('[+] match {}: {} vs {}: OK'.format(match.id, match.home.id, match.away.id)) + self.quantity += 1 + + elif provider.__name__ == 'Matchendirect': + leagues = list() + for url in provider.create_schedule(league, data): + league_copy = copy(league) + league_copy.url = url + leagues.append(league_copy) + self.run(leagues, self.create_schedule_from_url) + + except BaseException as e: + league.error = format_exc() + self.logger.error('[-] league {}: {} - {}\n{}'.format(league.id, type(e), e, league.error)) + self.errors += 1 + else: + league.error = None + self.logger.info('[+] league {}: OK'.format(league.id)) + self.tasks_done += 1 + finally: + league.store_error(self.mysqldb) + + def create_schedule_from_url(self, league, data): + try: + provider = ProviderController.get_provider(league.url) + for match in provider.create_schedule_from_url(league, data): + _home = Team.from_source(names=match.home.names, league=league, db=self.mysqldb) + if _home is not None: + match.home = _home + else: + match.home.store(self.mysqldb) + + _away = Team.from_source(names=match.away.names, league=league, db=self.mysqldb) + if _away is not None: + match.away = _away + else: + match.away.store(self.mysqldb) + + match.store(self.mysqldb) + if match.id > 0: + self.logger.info('[+] match {}: {} vs {}: OK'.format(match.id, match.home.id, match.away.id)) + self.quantity += 1 + + except BaseException as e: + league.error = format_exc() + self.logger.error('[-] league {}={}: {} - {}\n{}'.format(league.id, league.url, type(e), e, league.error)) + else: + league.error = None + self.logger.info('[+] league {}={}: OK'.format(league.id, league.url)) + finally: + league.store_error(self.mysqldb) + + +if __name__ == '__main__': + CreateSchedule() diff --git a/depopulate_news.py b/depopulate_news.py new file mode 100644 index 0000000..fde7c94 --- /dev/null +++ b/depopulate_news.py @@ -0,0 +1,35 @@ +from datetime import date, timedelta +import os.path + +from lib.tools import url_sanitize +from core.webutils import WebUtils +from lib.news import News +import setting + + +class DepopulateNews(WebUtils): + + def __init__(self): + super().__init__(module_='remove_news') + deadline = date.today() - timedelta(days=90) + old_newss = list(News.get_older_news(date_limit=deadline, db=self.mysqldb)) + self.nb_tasks = len(old_newss) + self.start() + self.remove_old_newss(newss=old_newss) + self.end() + + def remove_old_newss(self, newss): + for news in newss: + self.logger.info('[+] remove news {}'.format(news.id)) + news.remove(db=self.mysqldb) + self.quantity += 1 + if url_sanitize(news.image.title) in news.image.basename: + img_path = '{}/news/{}'.format(setting.IMAGES_FOLDER, news.image.basename) + if os.path.exists(img_path): + os.remove(img_path) + self.tasks_done += 1 + News.update_news_counters(db=self.mysqldb) + + +if __name__ == '__main__': + DepopulateNews() diff --git a/doc/aiohttp.diff b/doc/aiohttp.diff new file mode 100644 index 0000000..a92747b --- /dev/null +++ b/doc/aiohttp.diff @@ -0,0 +1,37 @@ +Seulement dans venv/lib/python3.6/site-packages/aiohttp: _http_parser.cpython-34m.so +Seulement dans sam_aiohttp/: _http_parser.cpython-34m.so_rename +diff -u venv/lib/python3.6/site-packages/aiohttp/http_parser.py sam_aiohttp/http_parser.py +--- venv/lib/python3.6/site-packages/aiohttp/http_parser.py 2018-10-01 10:40:27.310180902 +0200 ++++ sam_aiohttp/http_parser.py 2018-09-19 15:07:48.240622071 +0200 +@@ -641,10 +641,10 @@ + + HttpRequestParser = HttpRequestParserPy + HttpResponseParser = HttpResponseParserPy +-try: +- from ._http_parser import HttpRequestParserC, HttpResponseParserC +- if not NO_EXTENSIONS: # pragma: no cover +- HttpRequestParser = HttpRequestParserC +- HttpResponseParser = HttpResponseParserC +-except ImportError: # pragma: no cover +- pass ++# try: ++# from ._http_parser import HttpRequestParserC, HttpResponseParserC ++# if not NO_EXTENSIONS: # pragma: no cover ++# HttpRequestParser = HttpRequestParserC ++# HttpResponseParser = HttpResponseParserC ++# except ImportError: # pragma: no cover ++# pass +Seulement dans sam_aiohttp/: locks.py +Les sous-répertoires venv/lib/python3.6/site-packages/aiohttp/__pycache__ et sam_aiohttp/__pycache__ sont identiques +diff -u venv/lib/python3.6/site-packages/aiohttp/streams.py sam_aiohttp/streams.py +--- venv/lib/python3.6/site-packages/aiohttp/streams.py 2018-10-01 10:40:27.286180973 +0200 ++++ sam_aiohttp/streams.py 2018-09-19 16:30:20.772414010 +0200 +@@ -235,6 +235,8 @@ + yield from waiter + else: + yield from waiter ++ except: ++ pass + finally: + self._waiter = None + diff --git a/doc/functions.sql b/doc/functions.sql new file mode 100644 index 0000000..dcab10e --- /dev/null +++ b/doc/functions.sql @@ -0,0 +1,33 @@ +DELIMITER ;; + +DROP FUNCTION IF EXISTS count_matching_tags ;; +CREATE FUNCTION count_matching_tags (haystack TEXT, tags JSON) RETURNS INT DETERMINISTIC + BEGIN + DECLARE matches INT; + DECLARE idx INT; + SET matches = 0; + SET idx = 0; + WHILE idx < JSON_LENGTH(tags) DO + SET matches = IF (INSTR(haystack, JSON_UNQUOTE(JSON_EXTRACT(tags, CONCAT('$[', idx, ']')))), matches + 1, matches); + SET idx = idx + 1; + END WHILE; + RETURN matches; + END ;; + + +DROP FUNCTION IF EXISTS get_matching_league ;; +CREATE FUNCTION get_matching_league (haystack TEXT, id_sport INT) RETURNS INT DETERMINISTIC + BEGIN + DECLARE lid INT; + SELECT id + INTO lid + FROM leagues + WHERE ( + leagues.id_sport = id_sport AND + JSON_TYPE(leagues.tags) = 'ARRAY' AND + count_matching_tags(haystack, leagues.tags) > 0 + ) + ORDER BY degree ASC + LIMIT 1; + RETURN lid; + END ;; \ No newline at end of file diff --git a/doc/requirements.txt b/doc/requirements.txt new file mode 100644 index 0000000..1e63994 --- /dev/null +++ b/doc/requirements.txt @@ -0,0 +1,27 @@ +aiohttp==3.5.4 +async-timeout==3.0.1 +attrs==19.1.0 +beautifulsoup4==4.7.1 +certifi==2019.11.28 +chardet==3.0.4 +cssmin==0.2.0 +feedparser==5.2.1 +hiredis==1.0.1 +idna==2.8 +idna-ssl==1.1.0 +influxdb==5.2.3 +jsmin==2.2.2 +multidict==4.5.2 +mysql-connector==2.2.9 +mysqlclient==1.3.14 +python-dateutil==2.8.1 +pytz==2019.1 +redis==3.3.11 +requests==2.22.0 +selenium==3.141.0 +six==1.14.0 +soupsieve==1.9.1 +SQLAlchemy==1.3.3 +typing-extensions==3.7.2 +urllib3==1.25.2 +yarl==1.3.0 diff --git a/lib/browser.py b/lib/browser.py new file mode 100644 index 0000000..680c118 --- /dev/null +++ b/lib/browser.py @@ -0,0 +1,42 @@ +from selenium.common.exceptions import TimeoutException +from selenium.webdriver import Firefox, FirefoxProfile +from selenium.webdriver.common.proxy import Proxy, ProxyType +from selenium.webdriver.firefox.options import Options + +from setting import * + + +class Browser(Firefox): + + def __init__(self, user_agent, headless=True, tor=True): + options = Options() + options.add_argument('--user-agent="{}"'.format(user_agent)) + + if headless: + options.add_argument('--headless') + options.add_argument('--window-size=1920,1080') + + profile = None + if tor: + proxy_url = PROXY.replace('http://', '', 1) + proxy = Proxy({ + 'proxyType': ProxyType.MANUAL, + 'httpProxy': proxy_url, + 'ftpProxy': proxy_url, + 'sslProxy': proxy_url, + 'noProxy': '' + }) + profile = FirefoxProfile() + profile.set_proxy(proxy) + + super().__init__( + executable_path=GECKODRIVER_PATH, firefox_binary=FIREFOX_PATH, options=options, firefox_profile=profile + ) + self.implicitly_wait(5) + self.set_page_load_timeout(20) + + def get(self, url): + try: + super().get(url) + except TimeoutException: + self.execute_script('window.stop()') diff --git a/lib/country.py b/lib/country.py new file mode 100644 index 0000000..724e942 --- /dev/null +++ b/lib/country.py @@ -0,0 +1,28 @@ +import json + + +class Country: + + def __init__(self, idt, name=None, short_name=None, names=None): + self.id = idt + self.name = name + self.short_name = short_name + self.names = names + + def build_from_name(self, db): + for row in db.query('SELECT id FROM countries WHERE name = :name', {'name': self.name}): + self.id = row['id'] + + @staticmethod + def get_countries(db): + for row in db.query("SELECT * FROM countries"): + yield Country( + idt=row['id'], name=row['name'], short_name=row['short_name'], names=json.loads(row['names']) + ) + + +if __name__ == '__main__': + from core.mysqldb import MysqlDB + d = MysqlDB() + for c in Country.get_countries(d): + print(c.__dict__) \ No newline at end of file diff --git a/lib/league.py b/lib/league.py new file mode 100644 index 0000000..f90bbd3 --- /dev/null +++ b/lib/league.py @@ -0,0 +1,193 @@ +import json +import sys + +from lib.country import Country +from lib.team import Team +from lib.tools import n2v + +if 'lib.match' not in sys.modules: + from lib.match import Match + + +class Sport: + def __init__(self, idt): + self.id = idt + self.name = None + self.display_sets = None + + +class Group: + def __init__(self, name, url, league): + self.name = name + self.url = url + self.league = league + + +class League: + def __init__(self, idt): + self.id = idt + self.name = None + self.bets = None + self.url = None + self.gender = None + self.images = dict() + self.error = '' + self.points = dict() + self.teams = list() + self.matches = list() + self.tags = list() + self.round_dates = None + self.sport = None + self.country = None + + def get_teams(self, db): + stmt = """ + SELECT league_teams.id_team, teams.name, teams.names, teams.url + FROM league_teams + INNER JOIN teams ON teams.id = league_teams.id_team + WHERE league_teams.id_league = :id_league + """ + args = {'id_league': self.id} + teams = list() + for row in db.query(stmt, args): + team = Team(row['id_team']) + team.name = row['name'] + team.names = json.loads(row['names']) + team.url = row['url'] + team.league = self + teams.append(team) + return teams + + def get_matches(self, db): + stmt = """ + SELECT matches.id, matches.start_date, matches.id_home, matches.id_away, matches.url, matches.mday, + home.name AS home_name, home.names AS home_names, + away.name AS away_name, away.names AS away_names + FROM matches + INNER JOIN teams AS home ON home.id = matches.id_home + INNER JOIN teams AS away ON away.id = matches.id_away + WHERE matches.id_league = :id_league + """ + for row in db.query(stmt, {'id_league': self.id}): + match = Match(idt=row['id']) + match.start_date = row['start_date'] + match.url = row['url'] + match.mday = row['mday'] + match.home = Team(idt=row['id_home']) + match.home.name = row['home_name'] + match.home.names = json.loads(row['home_names']) + match.away = Team(idt=row['id_away']) + match.away.name = row['away_name'] + match.away.names = json.loads(row['away_names']) + yield match + + def update_live_ranking(self, group, db): + stmt = """ + SELECT id, rank_live + FROM league_teams + WHERE id_league = :id_league AND `group` = :group + ORDER BY pts_live DESC, diff_live DESC, gf_live DESC + """ + args = {'id_league': self.id, 'group': group} + new_rank = 0 + for row in db.query(stmt, args): + new_rank += 1 + if new_rank != row['rank_live']: + stmt = """ + UPDATE league_teams SET rank_live = :rank WHERE id = :id + """ + args = {'rank': new_rank, 'id': row['id']} + db.exec(stmt, args) + + def update_final_ranking(self, group, db): + stmt = """ + SELECT id, rank + FROM league_teams + WHERE id_league = :id_league AND `group` = :group + ORDER BY pts DESC, diff DESC, gf DESC + """ + args = {'id_league': self.id, 'group': group} + new_rank = 0 + for row in db.query(stmt, args): + new_rank += 1 + if new_rank != row['rank']: + stmt = """ + UPDATE league_teams SET rank = :rank WHERE id = :id + """ + args = {'rank': new_rank, 'id': row['id']} + db.exec(stmt, args) + + def store_images(self, db): + stmt = """ + UPDATE leagues + SET images = :images + WHERE id = :id + """ + args = {'images': json.dumps(self.images, separators=(',', ':')), 'id': self.id} + db.exec(stmt, args) + + def store_error(self, db): + stmt = """ + UPDATE leagues SET error = :error WHERE id = :id + """ + args = {'error': n2v(self.error), 'id': self.id} + db.exec(stmt, args) + + def store_round(self, new_round, db): + for row in db.query('SELECT rounds FROM leagues WHERE id = :id', {'id': self.id}): + if new_round not in row['rounds'].split(','): + rounds = '{},{}'.format(row['rounds'], new_round) + db.exec('UPDATE leagues SET rounds = :rounds WHERE id = :id', {'rounds': rounds, 'id': self.id}) + + def store_groups(self, groups, db): + db.exec( + 'UPDATE leagues SET groups = :groups WHERE id = :id', + {'groups': ','.join([group.name for group in groups]), 'id': self.id} + ) + + @staticmethod + def get_leagues(db, origin=None, id_league=None, with_tags=False): + where_conditions = list() + args = dict() + + if origin == 'update_rankings': + where_conditions.append('leagues.url_ranking IS NOT NULL') + elif origin == 'update_schedule': + where_conditions.append('leagues.url_schedule IS NOT NULL') + elif origin == 'update_tvschedule': + where_conditions.append('leagues.url_tvschedule IS NOT NULL') + elif origin == 'create_schedule': + where_conditions.append('leagues.url_schedule IS NOT NULL') + where_conditions.append('leagues.auto_schedule = 1') + + if id_league is not None: + where_conditions.append('leagues.id = :id_league') + args['id_league'] = int(id_league) + if with_tags: + where_conditions.append('leagues.tags IS NOT NULL') + + where_clause = 'WHERE {}'.format(' AND '.join(where_conditions)) if len(where_conditions) > 0 else '' + + stmt = """ + SELECT id, name, points, url_schedule, url_ranking, url_tvschedule, images, id_sport, gender, tags, round_dates, + id_country + FROM leagues + {} + """.format(where_clause) + for row in db.query(stmt, args): + league = League(row['id']) + league.name = row['name'] + league.points = json.loads(row['points']) + league.images = json.loads(row['images']) + league.sport = Sport(idt=row['id_sport']) + league.gender = row['gender'] + league.tags = json.loads(row['tags']) + league.round_dates = json.loads(row['round_dates']) if row['round_dates'] is not None else None + league.country = Country(idt=row['id_country']) + if origin == 'update_rankings': + league.url = row['url_ranking'] + elif origin in ('update_schedule', 'create_schedule'): + league.url = row['url_schedule'] + elif origin == 'update_tvschedule': + league.url = row['url_tvschedule'] + yield league diff --git a/lib/match.py b/lib/match.py new file mode 100644 index 0000000..e215970 --- /dev/null +++ b/lib/match.py @@ -0,0 +1,611 @@ +from datetime import datetime, timedelta +import math +import json +import sys + +from lib.tools import real_round, url_sanitize, unity, n2v +from lib.team import Team + +if 'lib.league' not in sys.modules: + from lib.league import League, Sport +if 'lib.user' not in sys.modules: + from lib.user import User + + +class Scheduler: + def __init__(self, url, recursive_id=0): + self.url = url + self.recursive_id = recursive_id + self.matches = list() + self.previous_url = None + self.next_url = None + + +class Event: + def __init__(self, type_): + self.type = type_ + self.side = None + self.minute = None + self.player = None + + +class Squad: + def __init__(self, role=None, side=None, name=None): + self.role = role + self.side = side + self.name = name + self.lastname = None + self.events = list() + + +class Stat: + def __init__(self, home=0, away=0): + self.home = home + self.away = away + + +class Comm: + def __init__(self, minute=None, type_=None, text=None): + self.type = type_ + self.text = text + self.minute = minute + + +class Match: + COMING = 0 + FIRST_TIME = 1 + HALF_TIME = 2 + SECOND_TIME = 3 + OVER = 4 + POSTPONED = 5 + ET_FIRST_TIME = 6 + ET_HALF_TIME = 7 + ET_SECOND_TIME = 8 + SHOOTOUT = 9 + WAITING_SCORERS = 10 + CANCELLED = 11 + + def __init__(self, idt): + self.id = idt + self.idof10 = 0 + self.idof4 = 0 + self.id_month = 0 + self.mday = 0 + self.round = None + self.leg = 0 + self.status = 0 + self.start_date = None + self.new_start_date = None + self.end_date = None + self.minute = '' + self.extra_time = '' + self.error = None + self.task_done = False + self.tv_channels = list() + self.json_parser = False + + self.score_home = 0 + self.score_away = 0 + self.score_sets = dict() + self.shootout_home = 0 + self.shootout_away = 0 + self.winner = None + + self.url = None + self.url_score = None + self.url_comms = None + self.new_url = None + self.coeffs = dict() + self.events = list() + self.squad = list() + self.stats = dict() + self.comms = list() + self.last_event = None + self.last_event_date = None + + self.home = None + self.away = None + self.league = None + self.sport = None + + def set_winner(self): + self.winner = None + if self.sport.id == 2: + if self.score_sets['home'] and self.score_sets['away']: + if self.score_sets['home'][-1] == 'A': + self.winner = 'away' + elif self.score_sets['away'][-1] == 'A': + self.winner = 'home' + elif int(self.score_sets['home'][-1]) > int(self.score_sets['away'][-1]): + self.winner = 'home' + elif int(self.score_sets['home'][-1]) < int(self.score_sets['away'][-1]): + self.winner = 'away' + elif int(self.score_sets['home'][-1]) == int(self.score_sets['away'][-1]): + self.winner = 'tie' + elif self.score_sets['home'] and self.score_sets['home'][-1] == 'A': + self.winner = 'away' + elif self.score_sets['away'] and self.score_sets['away'][-1] == 'A': + self.winner = 'home' + else: + if self.score_home > self.score_away: + self.winner = 'home' + elif self.score_home < self.score_away: + self.winner = 'away' + elif self.shootout_home > self.shootout_away: + self.winner = 'home' + elif self.shootout_home < self.shootout_away: + self.winner = 'away' + else: + self.winner = 'tie' + + def get_new_status(self): + # tennis case + if self.sport.id == 2: + if not self.minute: + self.status = 0 + self.minute = '' + elif 'terminé' in self.minute.lower() or 'ab.' in self.minute.lower(): + self.status = 4 + self.minute = 'FINI' + elif 'commencé' in self.minute.lower(): + self.status = 1 + self.minute = 'LIVE' + else: + self.minute = '' + + # match has not started + elif 'envoi' in self.minute or not self.minute: + self.status = self.COMING + self.minute = '' + + # match in half time + elif 'mi-temps' in self.minute: + self.status = self.HALF_TIME + self.minute = 'MT' + + # match is finished + elif 'terminé' in self.minute: + nb_scorers_home = 0 + nb_scorers_away = 0 + for event in self.events: + if event.type == 'goal': + if event.side == 'home': + nb_scorers_home += 1 + else: + nb_scorers_away += 1 + if self.end_date is None or datetime.now() - self.end_date < timedelta(minutes=30): + self.status = self.WAITING_SCORERS + self.end_date = datetime.now() + else: + self.status = self.OVER + self.minute = 'FINI' + + # match is in extra time + elif 'prolongation' in self.minute: + self.status = self.ET_FIRST_TIME + self.extra_time = 'extratime' + self.minute = 'PROL' + + # match is in shootout + elif 'tirs au but' in self.minute: + self.status = self.SHOOTOUT + self.extra_time = 'shootout' + self.minute = 'TAB' + + # match is postponed + elif any([word in self.minute for word in ('reporté', 'suspendu')]): + self.status = self.POSTPONED + self.minute = 'REP' + + # match is cancelled + elif 'annulé' in self.minute: + self.status = self.CANCELLED + self.minute = 'ANN' + + # match is in progress + else: + if self.status == self.COMING: + self.status = self.FIRST_TIME + elif self.status == self.HALF_TIME or datetime.now() - self.start_date > timedelta(hours=1): + self.status = self.SECOND_TIME + minute = self.minute.rstrip("'") + if minute.isnumeric(): + if self.status == self.FIRST_TIME and int(minute) > 45: + self.minute = "45'+{}".format(int(minute) - 45) + elif self.status == self.SECOND_TIME and int(minute) > 90: + self.minute = "90'+{}".format(int(minute) - 90) + else: + self.minute = '' + + def get_users_league_bets(self, db): + stmt = """ + SELECT id_month, id_league, mday + FROM bets + WHERE league_id = :id_league AND league_mday = :mday AND open = 1 + """ + args = {'id_league': self.league.id, 'mday': self.mday, 'league_mday': self.mday} + for res in db.query(stmt, args): + self.id_month = res['id_month'] + stmt = """ + SELECT id, id_team, league_bets_{} + FROM users_month + WHERE id_league = :id_league AND id_month = :id_month + """.format(res['mday']) + args = {'id_league': res['id_league'], 'id_month': res['id_month']} + for row in db.query(stmt, args): + yield row['id'], row['id_team'], res['mday'], row['league_bets_{}'.format(res['mday'])] + + def get_users_europe_bets(self, db): + stmt = """ + SELECT id_month, europe_round, europe_matches + FROM bets + WHERE europe_matches REGEXP :id_match AND open = 1 + """ + args = {'id_match': '(^{id},|,{id},|,{id}$)'.format(id=self.id)} + for res in db.query(stmt, args): + id_ = 0 + for id_match in res['europe_matches'].split(','): + if int(id_match) == self.id: + self.idof4 = id_ + break + id_ += 1 + self.id_month = res['id_month'] + europe_round = res['europe_round'] + stmt = """ + SELECT id, europe_scores_{}, europe_scorers_{} + FROM users_month + WHERE id_month = :id_month AND europe_round = :round + """.format(europe_round, europe_round) + args = {'id_month': res['id_month'], 'round': europe_round} + for row in db.query(stmt, args): + bet_scores = json.loads(row['europe_scores_{}'.format(europe_round)]) + bet_scorers = json.loads(row['europe_scorers_{}'.format(europe_round)]) + yield row['id'], res['europe_round'], bet_scores, bet_scorers + + def update_coeffs(self, db): + numbers = {'total': 0, 'home': 0, 'tie': 0, 'away': 0} + for id_, id_team, bet_mday, bets in self.get_users_league_bets(db): + bet = bets[self.idof10] + numbers['total'] += 1 + if bet == '1': + numbers['home'] += 1 + elif bet == 'X': + numbers['tie'] += 1 + elif bet == '2': + numbers['away'] += 1 + else: + numbers['total'] -= 1 + if numbers['total'] > 0: + self.coeffs['home'] = real_round(1 + numbers['total'] * math.sin(math.pi / (4 * (1 + numbers['home']))), 1) + self.coeffs['tie'] = real_round(1 + numbers['total'] * math.sin(math.pi / (4 * (1 + numbers['tie']))), 1) + self.coeffs['away'] = real_round(1 + numbers['total'] * math.sin(math.pi / (4 * (1 + numbers['away']))), 1) + self.store_coeffs(db) + + def update_users_league(self, db): + # check if bets are active + if not self.league.bets or self.coeffs['home'] == 0: + return None + + for id_, id_team, bet_mday, bets in self.get_users_league_bets(db): + user = User(idt=id_) + bet = bets[self.idof10] + own_team = 0 + points = 0 + if self.home.id == id_team or self.away.id == id_team: + own_team = 1 + if bet == '1' and self.score_home > self.score_away: + points = (1 + own_team) * self.coeffs['home'] + elif bet == 'X' and self.score_home == self.score_away: + points = (1 + own_team) * self.coeffs['tie'] + elif bet == '2' and self.score_home < self.score_away: + points = (1 + own_team) * self.coeffs['away'] + if points > 0: + user.set_league_points(bet_mday, points, db) + self.update_users_league_ranking(db) + + def update_users_league_ranking(self, db): + stmt = """ + SELECT COUNT(*) + FROM matches + WHERE id_league = :id_league AND mday = :mday AND status = :over + """ + args = {'id_league': self.league.id, 'mday': self.mday, 'over': self.OVER} + for res in db.query(stmt, args): + stmt = """ + SELECT users_month.id, users_month.league_rank, users_month.id_league + FROM users_month + INNER JOIN bets ON bets.id_league = users_month.id_league AND bets.id_month = users_month.id_month + WHERE bets.league_id = :id_league AND bets.league_mday = :mday AND bets.open = 1 + AND users_month.id_month = :id_month + ORDER BY users_month.league_points_total DESC + """ + args = {'id_league': self.league.id, 'mday': self.mday, 'id_month': self.id_month} + ranking = dict() + for row in db.query(stmt, args): + if row['id_league'] in ranking: + ranking[row['id_league']] += 1 + else: + ranking[row['id_league']] = 1 + new_rank = ranking[row['id_league']] + + if res['COUNT(*)'] == 0: + db.exec( + 'UPDATE users_month SET league_rank = :rank, league_rank_old = :rank_old WHERE id = :id', + {'rank': new_rank, 'rank_old': row['league_rank'], 'id': row['id']} + ) + elif new_rank != row['league_rank']: + db.exec( + 'UPDATE users_month SET league_rank = :rank WHERE id = :id', + {'rank': new_rank, 'id': row['id']} + ) + + def update_users_europe(self, db): + match_scorers = {'home': list(), 'away': list()} + for side in ('home', 'away'): + for event in self.events: + if event.type == 'goal' and event.side == side: + exp_scorer = event.player.split(' ') + last_word = url_sanitize(exp_scorer[-1]) + if last_word == '-csc-': + last_word = 'csc' + elif last_word == '-p-': + last_word = url_sanitize(exp_scorer[-2]) + match_scorers[side].append(last_word) + for id_, europe_round, bet_scores, bet_scorers in self.get_users_europe_bets(db): + user = User(idt=id_) + if bet_scores[self.idof4]['home'] is None or bet_scores[self.idof4]['away'] is None: + continue + points = 0 + if bet_scores[self.idof4]['home'] == self.score_home and bet_scores[self.idof4]['away'] == self.score_away: + points += 5 + elif unity(bet_scores[self.idof4]['home'] - bet_scores[self.idof4]['away']) == \ + unity(self.score_home - self.score_away): + points += 2 + if bet_scores[self.idof4]['home'] + bet_scores[self.idof4]['away'] > 0: + scorer_points = 0 + tmp_scorers = {'home': match_scorers['home'][:], 'away': match_scorers['away'][:]} + for side in ('home', 'away'): + bet_scorers_side = bet_scorers[self.idof4][side] + if isinstance(bet_scorers_side, dict): + bet_scorers_side = list(bet_scorers_side.values()) + for bet_scorer in bet_scorers_side[:bet_scores[self.idof4][side]]: + for tmp_scorer in tmp_scorers[side]: + if url_sanitize(tmp_scorer) in url_sanitize(bet_scorer): + scorer_points += 1 + tmp_scorers[side].remove(tmp_scorer) + break + points += real_round(scorer_points/(bet_scores[self.idof4]['home'] + bet_scores[self.idof4]['away']), 1) + if points > 0: + user.set_europe_points(europe_round, points, db) + + def update_teams_ranking(self, db): + # if no mday then no ranking to update + if self.mday == 0: + return None + + # set home and away stats + self.home.played = 1 + self.away.played = 1 + if self.score_home > self.score_away: + self.home.points = self.league.points['winner'] + self.away.points = self.league.points['loser'] + self.home.wins = 1 + self.away.loss = 1 + elif self.score_away > self.score_home: + self.home.points = self.league.points['loser'] + self.away.points = self.league.points['winner'] + self.home.loss = 1 + self.away.wins = 1 + else: + self.home.points = self.league.points['tie'] + self.away.points = self.league.points['tie'] + self.home.ties = 1 + self.away.ties = 1 + self.home.g_for = self.score_home + self.home.g_against = self.score_away + self.home.g_diff = self.score_home - self.score_away + self.away.g_for = self.score_away + self.away.g_against = self.score_home + self.away.g_diff = self.score_away - self.score_home + + # if match is postponed then reset live stats + if self.status == self.POSTPONED: + self.home.reset_live_stats(db) + self.away.reset_live_stats(db) + + # else update stats + else: + # in all cases update live stats + self.home.update_live_stats(db) + self.away.update_live_stats(db) + self.league.update_live_ranking(self.home.group, db) + if self.away.group != self.home.group: + self.league.update_live_ranking(self.away.group, db) + + # if match is finished then update final stats + if self.status in (self.OVER, self.WAITING_SCORERS): + self.home.update_final_stats(db) + self.away.update_final_stats(db) + self.league.update_final_ranking(self.home.group, db) + if self.away.group != self.home.group: + self.league.update_final_ranking(self.away.group, db) + + def store(self, db): + stmt = """ + INSERT IGNORE INTO matches + (idof10, id_league, mday, round, leg, id_home, id_away, start_date, + url, url_score, url_comms, comms) + VALUES + (:idof10, :id_league, :mday, :round, :leg, :id_home, :id_away, :start_date, + :url, :url_score, :url_comms, '[]') + """ + args = { + 'idof10': self.idof10, 'id_league': self.league.id, 'mday': self.mday, + 'round': n2v(self.round), 'leg': self.leg, 'id_home': self.home.id, + 'id_away': self.away.id, 'start_date': self.start_date.strftime('%Y-%m-%d %H:%M:%S'), 'url': self.url, + 'url_score': self.url_score, 'url_comms': self.url_comms + } + self.id = db.exec(stmt, args) + + def store_score(self, db): + stmt = """ + UPDATE matches + SET start_date = :start_date, score_home = :score_home, score_away = :score_away, minute = :minute, + extra_time = :extra_time, shootout_home = :shootout_home, shootout_away = :shootout_away, events = :events, + squad = :squad, status = :status, error = NULL, score_sets = :score_sets, winner = :winner, stats = :stats, + comms = :comms {last_event} + WHERE id = :id + """ + args = { + 'start_date': self.start_date.strftime('%Y-%m-%d %H:%M:%S'), 'score_home': self.score_home, + 'score_away': self.score_away, 'minute': self.minute, 'extra_time': self.extra_time, + 'shootout_home': self.shootout_home, 'shootout_away': self.shootout_away, + 'events': json.dumps([event.__dict__ for event in self.events], separators=(',', ':')), + 'squad': json.dumps([squad.__dict__ for squad in self.squad], separators=(',', ':')), + 'status': self.status, 'id': self.id, 'score_sets': json.dumps(self.score_sets, separators=(',', ':')), + 'winner': n2v(self.winner), + 'stats': json.dumps({key: stat.__dict__ for key, stat in self.stats.items()}, separators=(',', ':')), + 'comms': json.dumps([comm.__dict__ for comm in self.comms], separators=(',', ':')) + } + if self.last_event is not None: + stmt = stmt.format(last_event=', last_event = :last_event, last_event_date = NOW()') + args['last_event'] = json.dumps(self.last_event.__dict__, separators=(',', ':')) + else: + stmt = stmt.format(last_event='') + db.exec(stmt, args) + + def store_comms(self, db): + db.exec( + 'UPDATE matches SET comms = :comms WHERE id = :id', + {'comms': json.dumps([comm.__dict__ for comm in self.comms], separators=(',', ':')), 'id': self.id} + ) + + def store_url(self, db): + db.exec( + 'UPDATE matches SET url = :url, status = :status WHERE id = :id', + {'url': self.url, 'status': self.COMING, 'id': self.id} + ) + + def store_start_date(self, db): + db.exec( + 'UPDATE matches SET start_date = :start_date, status = :status WHERE id = :id', + {'start_date': self.start_date.strftime('%Y-%m-%d %H:%M:%S'), 'status': self.COMING, 'id': self.id} + ) + + def store_end_date(self, db): + db.exec( + 'UPDATE matches SET end_date = :end_date, status = :status WHERE id = :id', + {'end_date': self.end_date.strftime('%Y-%m-%d %H:%M:%S'), 'status': self.WAITING_SCORERS, 'id': self.id} + ) + + def store_coeffs(self, db): + db.exec( + 'UPDATE matches SET coeffs = :coeffs WHERE id = :id', + {'coeffs': json.dumps(self.coeffs, separators=(',', ':')), 'id': self.id} + ) + + def store_error(self, db): + db.exec( + 'UPDATE matches SET error = :error WHERE id = :id', + {'error': self.error, 'id': self.id} + ) + + def store_minute(self, db): + db.exec( + 'UPDATE matches SET status = :status, minute = :minute WHERE id = :id', + {'status': self.status, 'minute': self.minute, 'id': self.id} + ) + + def store_tvchannels(self, db): + if self.tv_channels: + db.exec( + 'UPDATE matches SET tv_channels = :tv_channels WHERE id = :id', + {'tv_channels': ','.join([str(tv_channel.id) for tv_channel in self.tv_channels]), 'id': self.id} + ) + else: + db.exec( + 'UPDATE matches SET tv_channels = NULL WHERE id = :id', + {'id': self.id} + ) + + @classmethod + def get_matches(cls, db, origin=None, interval_hour=2, interval_day=14, id_match=None, id_league=None): + where_conditions = list() + if origin == 'update_scores': + where_conditions.append('matches.status NOT IN ({}, {})'.format(cls.OVER, cls.CANCELLED)) + where_conditions.append('matches.start_date <= NOW() + INTERVAL {} HOUR'.format(interval_hour)) + elif origin == 'update_schedule': + where_conditions.append('matches.status IN ({}, {})'.format(cls.COMING, cls.POSTPONED)) + where_conditions.append('matches.start_date <= NOW() + INTERVAL {} DAY'.format(interval_day)) + where_conditions.append('leagues.auto_schedule = 0') + elif origin == 'update_tvschedule': + where_conditions.append('matches.status = {}'.format(cls.COMING)) + where_conditions.append('matches.start_date <= NOW() + INTERVAL {} DAY'.format(interval_day)) + + if id_match is not None: + where_conditions.append('matches.id = {}'.format(int(id_match))) + if id_league is not None: + where_conditions.append('matches.id_league = {}'.format(int(id_league))) + + where_clause = '' + if len(where_conditions) > 0: + where_clause = 'WHERE {}'.format(' AND '.join(where_conditions)) + + stmt = """ + SELECT + matches.id, matches.idof10, matches.id_league, matches.id_home, matches.id_away, matches.mday, matches.url, + matches.status, matches.events, matches.squad, matches.coeffs, matches.start_date, matches.end_date, + matches.url_score, matches.url_comms, + home.name AS home_name, home.names AS home_names, + away.name AS away_name, away.names AS away_names, + leagues.name AS league_name, leagues.id_sport, leagues.bets, leagues.points, + leagues.url_schedule AS league_url, + home_lt.group AS home_group, + away_lt.group AS away_group, + sports.name AS sport_name, sports.display_sets + FROM matches + INNER JOIN teams AS home ON home.id = matches.id_home + INNER JOIN teams AS away ON away.id = matches.id_away + INNER JOIN leagues ON leagues.id = matches.id_league + INNER JOIN league_teams AS home_lt ON home_lt.id_league = leagues.id AND home_lt.id_team = home.id + INNER JOIN league_teams AS away_lt ON away_lt.id_league = leagues.id AND away_lt.id_team = away.id + INNER JOIN sports ON sports.id = leagues.id_sport + {} + """.format(where_clause) + for row in db.query(stmt): + match = Match(idt=row['id']) + match.idof10 = row['idof10'] + match.start_date = row['start_date'] + match.end_date = row['end_date'] + match.mday = row['mday'] + match.status = row['status'] + match.url = row['url'] + match.url_score = row['url_score'] + match.url_comms = row['url_comms'] + match.coeffs = json.loads(row['coeffs']) + match.events = json.loads(row['events']) + match.squad = json.loads(row['squad']) + + match.league = League(idt=row['id_league']) + match.league.name = row['league_name'] + match.league.bets = bool(row['bets']) + match.league.url = row['league_url'] + match.league.points = json.loads(row['points']) + + match.home = Team(idt=row['id_home']) + match.home.name = row['home_name'] + match.home.names = json.loads(row['home_names']) + match.home.group = row['home_group'] + match.home.league = match.league + + match.away = Team(idt=row['id_away']) + match.away.name = row['away_name'] + match.away.names = json.loads(row['away_names']) + match.away.group = row['away_group'] + match.away.league = match.league + + match.sport = Sport(idt=row['id_sport']) + match.sport.name = row['sport_name'] + match.sport.display_sets = bool(row['display_sets']) + + yield match diff --git a/lib/month.py b/lib/month.py new file mode 100644 index 0000000..48a7932 --- /dev/null +++ b/lib/month.py @@ -0,0 +1,31 @@ +class Month: + def __init__(self, idt=None, name=None, current=None, bets=None, rank=None, end=None): + self.id = idt if isinstance(idt, int) and 1 <= idt <= 12 else 0 + self.name = name + self.current = current + self.bets = bets + self.rank = rank + self.end = end + + def toggle_current(self, db): + db.exec("UPDATE months SET current = 1 - current WHERE id = :id", {'id': self.id}) + + def close_bets(self, db): + db.exec("UPDATE bets SET open = 0 WHERE id_month = :id", {'id': self.id}) + + @staticmethod + def current_year(db): + for row in db.query("SELECT year FROM years WHERE current = 1"): + return row['year'] + + @staticmethod + def get_months(db): + for row in db.query("SELECT * FROM months ORDER BY orderer ASC"): + yield Month( + idt=row['id'], + name=row['name'], + current=bool(row['current']), + bets=bool(row['bets']), + rank=bool(row['rank']), + end=row['end'] + ) diff --git a/lib/news.py b/lib/news.py new file mode 100644 index 0000000..1dcb71e --- /dev/null +++ b/lib/news.py @@ -0,0 +1,158 @@ +from lib.tools import url_sanitize +from lib.league import Sport +from lib.tools import n2v +import setting + + +class NewsImage: + def __init__(self, url, title, basename, id_news): + self.url = url + self.title = title + self.id_news = id_news + if url is not None: + ext = self.url.split('.')[-1] + self.basename = '{}.{}'.format(url_sanitize(self.title), ext) + else: + self.basename = basename + self.abspath = '{}/news/{}'.format(setting.IMAGES_FOLDER.rstrip('/'), self.basename) + + def store(self, db): + db.exec( + "UPDATE news SET image = :image WHERE id = :id", + {'image': self.basename, 'id': self.id_news} + ) + + +class NewsSource: + def __init__(self, idt, id_sport, url): + self.id = idt + self.url = url + self.sport = Sport(id_sport) + self.nb_news = 0 + self.data = None + self.error = None + + def store_error(self, db): + db.exec( + "UPDATE news_source SET error = :error WHERE id = :id", + {'error': self.error, 'id': self.id} + ) + + @staticmethod + def get_sources(db): + for row in db.query(""" SELECT id, id_sport, url FROM news_source """): + yield NewsSource(idt=row['id'], id_sport=row['id_sport'], url=row['url']) + + +class News: + def __init__(self): + self.id = 0 + self.pub_date = None + self.title = None + self.description = None + self.url = None + self.image = None + self.source = None + self.tags = list() + self.sport = None + self.content = None + self.teaser = None + self.content = None + self.author = None + self.video_src = None + self.data = None + self.error = None + self.league = None + + def store(self, db): + haystack = url_sanitize(f'{self.title}|{self.description}') + self.id = db.exec( + """ + INSERT IGNORE INTO news + (id_sport, id_league, id_team, pub_date, title, description, link, source, tags, image) + VALUES ( + :id_sport, (SELECT get_matching_league(:haystack, :id_sport)), + (SELECT get_matching_team(:haystack, :id_sport)), :pub_date, + :title, :description, :link, :source, :tags, :image + ) + """, + { + 'id_sport': self.sport.id, + 'haystack': haystack, + 'pub_date': self.pub_date.strftime('%Y-%m-%d %H:%M:%S'), + 'title': self.title, + 'description': self.description, + 'link': self.url, + 'source': self.source, + 'tags': ','.join(self.tags), + 'image': self.image.basename + } + ) + db.exec(""" + UPDATE leagues + SET leagues.nb_news = leagues.nb_news + 1 + WHERE leagues.id = (SELECT news.id_league FROM news WHERE news.id = :id_news) + """, {'id_news': self.id}) + db.exec(""" + UPDATE teams + SET teams.nb_news = teams.nb_news + 1 + WHERE teams.id = (SELECT news.id_team FROM news WHERE news.id = :id_news) + """, {'id_news': self.id}) + self.image.id_news = self.id + + def store_content(self, db): + db.exec( + "UPDATE news SET teaser = :teaser, author = :author, content = :content WHERE id = :id", + {'teaser': n2v(self.teaser), 'author': n2v(self.author), 'content': n2v(self.content), 'id': self.id} + ) + + def store_error(self, db): + db.exec( + "UPDATE news SET error = :error, link = :url WHERE id = :id", + {'error': self.error, 'url': self.url, 'id': self.id} + ) + + def store_image(self, db): + db.exec( + "UPDATE news SET image = :image WHERE id = :id", + {'image': self.image.basename, 'id': self.id} + ) + + def store_redirect(self, db): + db.exec( + "UPDATE news SET redirect = link WHERE id = :id", + {'id': self.id} + ) + + def store_league(self, db): + db.exec( + "UPDATE news SET id_league = :id_league WHERE id = :id", + {'id_league': self.league.id, 'id': self.id} + ) + + def remove(self, db): + db.exec( + "DELETE FROM news WHERE id = :id", + {'id': self.id} + ) + + @staticmethod + def get_older_news(date_limit, db): + for row in db.query("SELECT id, link, title, image FROM news WHERE pub_date < :date", {'date': date_limit}): + news = News() + news.id = row['id'] + news.url = row['link'] + news.title = row['title'] + news.image = NewsImage(basename=row['image'], id_news=news.id, title=news.title, url=news.url) + yield news + + @staticmethod + def update_news_counters(db): + db.exec(""" + UPDATE leagues + SET leagues.nb_news = (SELECT COUNT(*) FROM news WHERE news.id_league = leagues.id) + """) + db.exec(""" + UPDATE teams + SET teams.nb_news = (SELECT COUNT(*) FROM news WHERE news.id_team = team.id) + """) diff --git a/lib/notification.py b/lib/notification.py new file mode 100644 index 0000000..329f0a2 --- /dev/null +++ b/lib/notification.py @@ -0,0 +1,30 @@ +class Notification: + + def __init__(self, id_user, id_match, date, mday_or_round, type_, method): + self.id = 0 + self.id_user = id_user + self.id_match = id_match + self.date = date + self.mday_or_round = mday_or_round + self.type = type_ + self.method = method + self.sent = 0 + + def save(self, db): + res = None + stmt = """ + INSERT INTO notifications (id_user, id_match, date, mday_or_round, type, method) + VALUES (:id_user, :id_match, :date, :mday_or_round, :type, :method) + """ + args = { + 'id_user': self.id_user, 'id_match': self.id_match, 'date': self.date.strftime('%Y-%m-%d %H:%M:%S'), + 'mday_or_round': self.mday_or_round, 'type': self.type, 'method': self.method + } + try: + db.exec(stmt, args) + except db.exceptions.IntegrityError: + res = False + else: + res = True + finally: + return res diff --git a/lib/player.py b/lib/player.py new file mode 100644 index 0000000..c11367d --- /dev/null +++ b/lib/player.py @@ -0,0 +1,167 @@ +import datetime +import hashlib +import json +import time + + +class PlayerImage: + def __init__(self, url, full_name, birth_date): + self.url = url + self.full_name = full_name + self.birth_date = birth_date + self.path = None + self.last_save = 0 + self.last_modified = 0 + self.set_path() + + def set_lm(self, lm): + if lm.isnumeric(): + self.last_modified = int(lm) + + def set_path(self): + self.path = '{hash}.{ext}'.format( + hash='{full_name}{birth_date}'.format( + full_name=hashlib.md5(self.full_name.encode()).hexdigest(), + birth_date=hashlib.md5(self.birth_date.strftime('%Y-%m-%d').encode()).hexdigest() + ), + ext=self.url.split('.')[-1].split('?')[0] + ) + + +class Player: + + ROLE_GOALKEEPER = 1 + ROLE_DEFENDER = 2 + ROLE_MIDFIELDER = 3 + ROLE_ATTACKER = 4 + + FOOT_RIGHT = 1 + FOOT_LEFT = 2 + FOOT_BOTH = 3 + + PRICE_MIN = 10000 + + def __init__(self, idt=None, team=None, country1=None, country2=None, full_name=None, number=None, role=None, + position=None, birth_date=None, size=None, foot=None, contract_type=None, contract_end=None, + price=None, image_url=None, error=None): + self.id = idt + self.team = team + self.country1 = country1 + self.country2 = country2 + self.first_name = None + self.last_name = None + self.full_name = None + self.number = number + self.role = role + self.position = position + self.birth_date = None + self.age = None + self.size = size + self.foot = foot + self.contract_type = contract_type + self.contract_end = contract_end + self.price = None + self.image = None + self.error = error + + self.set_names(full_name) + self.set_age(birth_date) + self.set_image(image_url) + self.set_price(price) + + def set_names(self, full_name): + self.full_name = full_name + if self.full_name: + names = self.full_name.split(' ') + first_names = list() + last_names = list() + for idx in range(len(names)): + if idx == len(names) - 1: + last_names.append(names[idx]) + elif last_names: + last_names.append(names[idx]) + elif len(names[idx]) < 4: + last_names.append(names[idx]) + else: + first_names.append(names[idx]) + self.first_name = ' '.join(first_names) + self.last_name = ' '.join(last_names) + + def set_age(self, birth_date): + self.birth_date = birth_date + if self.birth_date is not None: + delta = datetime.datetime.now() - self.birth_date + self.age = int(delta.days / 365.25) + + def set_image(self, image_url): + if image_url and self.full_name and self.birth_date: + self.image = PlayerImage(url=image_url, full_name=self.full_name, birth_date=self.birth_date) + + def set_price(self, price): + if price is not None: + self.price = max(self.PRICE_MIN, price) + else: + self.price = self.PRICE_MIN + + def get_image_details(self, db): + stmt = """SELECT image_save FROM players WHERE full_name = :full_name AND birth_date = :birth_date LIMIT 1""" + args = {'full_name': self.full_name, 'birth_date': self.birth_date} + for row in db.query(stmt, args): + self.image.last_save = row['image_save'] + + def store(self, db): + stmt = """ + INSERT INTO players (id_team, id_sport, id_country1, id_country2, first_name, last_name, full_name, number, + role, position, birth_date, age, size, foot, contract_type, contract_end, price, error) + VALUES (:id_team, :id_sport, :id_country1, :id_country2, :first_name, :last_name, :full_name, :number, :role, + :position, :birth_date, :age, :size, :foot, :contract_type, :contract_end, :price, :error) + ON DUPLICATE KEY UPDATE id_team = :id_team, id_sport = :id_sport, id_country1 = :id_country1, + id_country2 = :id_country2, first_name = :first_name, last_name = :last_name, full_name = :full_name, + number = :number, role = :role, position = :position, birth_date = :birth_date, age = :age, size = :size, + foot = :foot, contract_type = :contract_type, contract_end = :contract_end, price = :price, + error = :error + """ + args = { + 'id_team': self.team.id if self.team is not None else None, + 'id_sport': self.team.id_sport if self.team is not None else None, + 'id_country1': self.country1.id if self.country1 is not None else None, + 'id_country2': self.country2.id if self.country2 is not None else None, + 'first_name': self.first_name, + 'last_name': self.last_name, + 'full_name': self.full_name, + 'number': self.number, + 'role': self.role, + 'position': self.position, + 'birth_date': self.birth_date.strftime('%Y-%m-%d') if self.birth_date is not None else None, + 'age': self.age, + 'size': self.size, + 'foot': self.foot, + 'contract_type': self.contract_type, + 'contract_end': self.contract_end.strftime('%Y-%m-%d') if self.contract_end is not None else None, + 'price': self.price, + 'error': json.dumps(self.error) if self.error is not None else None + } + self.id = db.exec(stmt, args) + + def store_image(self, db): + now = int(time.time()) + stmt = """ + UPDATE players SET image = :image, image_save = :image_save + WHERE full_name = :full_name AND birth_date = :birth_date + """ + args = { + 'image': '{}?v={}'.format(self.image.path, now), + 'image_save': now, + 'full_name': self.full_name, + 'birth_date': self.birth_date + } + db.exec(stmt, args) + + def store_error(self, db): + stmt = """UPDATE players SET error = :error WHERE full_name = :full_name AND birth_date = :birth_date""" + args = { + 'error': json.dumps(self.error) if self.error is not None else None, + 'full_name': self.full_name, + 'birth_date': self.birth_date + } + db.exec(stmt, args) diff --git a/lib/team.py b/lib/team.py new file mode 100644 index 0000000..685271c --- /dev/null +++ b/lib/team.py @@ -0,0 +1,196 @@ +import json + +from lib.country import Country + + +class Team: + def __init__(self, idt): + self.id = idt + self.name = None + self.short_name = None + self.long_name = None + self.names = dict() + self.images = dict() + self.staff = dict() + self.error = None + self.league = None + self.country = None + self.id_sport = None + self.url = None + + self.group = None + self.rank = 0 + self.played = 0 + self.points = 0 + self.wins = 0 + self.ties = 0 + self.loss = 0 + self.g_for = 0 + self.g_against = 0 + self.g_diff = 0 + self.coeff = 0 + + def store(self, db): + stmt = """ + INSERT IGNORE INTO teams (id_sport, name, id_country, short_name, long_name, gender, names, url, staff, images) + VALUES (:id_sport, :name, :id_country, :short_name, :long_name, :gender, :names, :url, :staff, :images) + """ + args = {'id_sport': self.league.sport.id, 'name': self.name, 'id_country': self.country.id, + 'short_name': self.short_name, 'long_name': self.long_name, 'gender': self.league.gender, + 'names': json.dumps(self.names, separators=(',', ':')), 'url': self.url, + 'staff': json.dumps(self.staff, separators=(',', ':')), + 'images': json.dumps(self.images, separators=(',', ':'))} + self.id = db.exec(stmt, args) + if self.id == 0: + stmt = 'SELECT id FROM teams WHERE id_sport = :id_sport AND name = :name' + args = {'id_sport': self.league.sport.id, 'name': self.name} + for row in db.query(stmt, args): + self.id = row['id'] + db.exec( + 'INSERT IGNORE INTO league_teams (id_team, id_league) VALUES (:id_team, :id_league)', + {'id_team': self.id, 'id_league': self.league.id} + ) + + def reset_live_stats(self, db): + stmt = """ + UPDATE league_teams SET played_live = played, pts_live = pts, wins_live = wins, ties_live = ties, + loss_live = loss, gf_live = gf, ga_live = ga, diff_live = diff + WHERE id_team = :id_team AND id_league = :id_league + """ + args = {'id_team': self.id, 'id_league': self.league.id} + db.exec(stmt, args) + + def update_live_stats(self, db): + stmt = """ + UPDATE league_teams SET played_live = played + :played, pts_live = pts + :pts, wins_live = wins + :wins, + ties_live = ties + :ties, loss_live = loss + :loss, gf_live = gf + :gf, ga_live = ga + :ga, + diff_live = diff + :diff + WHERE id_team = :id_team AND id_league = :id_league + """ + args = {'played': self.played, 'pts': self.points, 'wins': self.wins, 'ties': self.ties, 'loss': self.loss, + 'gf': self.g_for, 'ga': self.g_against, 'diff': self.g_diff, 'id_team': self.id, + 'id_league': self.league.id} + db.exec(stmt, args) + + def update_final_stats(self, db): + stmt = """ + UPDATE league_teams SET played = played + :played, pts = pts + :pts, wins = wins + :wins, ties = ties + :ties, + loss = loss + :loss, gf = gf + :gf, ga = ga + :ga, diff = diff + :diff + WHERE id_team = :id_team AND id_league = :id_league + """ + args = {'played': self.played, 'pts': self.points, 'wins': self.wins, 'ties': self.ties, 'loss': self.loss, + 'gf': self.g_for, 'ga': self.g_against, 'diff': self.g_diff, 'id_team': self.id, + 'id_league': self.league.id} + db.exec(stmt, args) + + def set_stats(self, db): + stmt = """ + UPDATE league_teams SET `group` = :group, played = :played, played_live = :played, pts = :pts, pts_live = :pts, + wins = :wins, wins_live = :wins, ties = :ties, ties_live = :ties, loss = :loss, loss_live = :loss, gf = :gf, + gf_live = :gf, ga = :ga, ga_live = :ga, diff = :diff, diff_live = :diff, rank = :rank, rank_live = :rank, + rank_old = :rank + WHERE id_team = :id_team AND id_league = :id_league + """ + args = {'group': self.group.name if self.group is not None else '', 'played': self.played, 'pts': self.points, + 'wins': self.wins, 'ties': self.ties, 'loss': self.loss, 'gf': self.g_for, 'ga': self.g_against, + 'diff': self.g_diff, 'id_team': self.id, 'id_league': self.league.id, 'rank': self.rank} + db.exec(stmt, args) + + def store_staff(self, db): + stmt = """ + UPDATE teams + SET staff = :staff + WHERE id = :id + """ + args = {'staff': json.dumps(self.staff, separators=(',', ':')), 'id': self.id} + db.exec(stmt, args) + + def store_images(self, db): + stmt = """ + UPDATE teams + SET images = :images + WHERE id = :id + """ + args = {'images': json.dumps(self.images, separators=(',', ':')), 'id': self.id} + db.exec(stmt, args) + + def store_names_and_url(self, db): + stmt = """ + UPDATE teams + SET names = :names, url = :url + WHERE id = :id + """ + args = {'names': json.dumps(self.names, separators=(',', ':')), 'url': self.url, 'id': self.id} + db.exec(stmt, args) + + def store_error(self, db): + stmt = """ + UPDATE teams SET error = :error WHERE id = :id + """ + args = {'error': self.error, 'id': self.id} + db.exec(stmt, args) + + @staticmethod + def get_teams(db, origin=None, id_team=None, url=None): + where_conditions = list() + args = dict() + if origin in ('update_staff', 'update_players'): + where_conditions.append("url IS NOT NULL") + if id_team is not None: + where_conditions.append('id = :id'.format(int(id_team))) + args['id'] = int(id_team) + if url is not None: + where_conditions.append('url LIKE :url') + args['url'] = url + + where_clause = '' + if len(where_conditions) > 0: + where_clause = 'WHERE {}'.format(' AND '.join(where_conditions)) + + stmt = """ + SELECT id, name, id_sport, id_country, images, url, staff + FROM teams + {} + """.format(where_clause) + for row in db.query(stmt, args): + team = Team(row['id']) + team.name = row['name'] + team.id_sport = row['id_sport'] + team.country = Country(idt=row['id_country']) + team.images = json.loads(row['images']) + team.url = row['url'] + team.staff = json.loads(row['staff']) + yield team + + @staticmethod + def from_source(names, league, db): + stmt = """ + SELECT teams.id, teams.name + FROM teams + INNER JOIN league_teams ON league_teams.id_team = teams.id + WHERE JSON_CONTAINS(teams.names, :names) AND teams.id_sport = :id_sport AND teams.gender = :gender + AND league_teams.id_league = :id_league + """ + args = { + 'names': json.dumps(names), 'id_sport': league.sport.id, 'gender': league.gender, 'id_league': league.id + } + for row in db.query(stmt, args): + team = Team(row['id']) + team.name = row['name'] + return team + + stmt = """ + SELECT teams.id, teams.name + FROM teams + WHERE JSON_CONTAINS(teams.names, :names) AND teams.id_sport = :id_sport AND teams.gender = :gender + """ + args = {'names': json.dumps(names), 'id_sport': league.sport.id, 'gender': league.gender} + for row in db.query(stmt, args): + team = Team(row['id']) + team.name = row['name'] + stmt = 'INSERT IGNORE INTO league_teams (id_league, id_team) VALUES (:id_league, :id_team)' + args = {'id_league': league.id, 'id_team': team.id} + db.exec(stmt, args) + return team + + return None diff --git a/lib/tools.py b/lib/tools.py new file mode 100644 index 0000000..5d4b7a2 --- /dev/null +++ b/lib/tools.py @@ -0,0 +1,59 @@ +def n2v(x): + if x is None: + return '' + return x + + +def void(x): + return x + + +def unity(x): + if x > 0: + return 1 + elif x < 0: + return -1 + else: + return 0 + + +def real_round(x, n=1): + res = 0 + interval = 1 / (10 ** n) + while True: + if x < res + interval/2 - interval/20: + break + res += interval + return round(res, n) + + +def url_sanitize(s): + res = '' + for c in s: + if ord('a') <= ord(c) <= ord('z') or ord('0') <= ord(c) <= ord('9'): + res += c + elif ord('A') <= ord(c) <= ord('Z'): + res += chr(ord(c) - ord('A') + ord('a')) + elif c in ('è', 'é', 'ê', 'ë', 'É'): + res += 'e' + elif c in ('à', 'á', 'â', 'ä', 'ã'): + res += 'a' + elif c in ('ò', 'ó', 'ô', 'ö', 'ø'): + res += 'o' + elif c in ('ì', 'í', 'î', 'ï', 'Î'): + res += 'i' + elif c in ('ù', 'ú', 'û', 'ü'): + res += 'u' + elif c == 'ñ': + res += 'n' + elif c in ('ç', 'ć'): + res += 'c' + elif c == 'š': + res += 's' + elif c == 'ý': + res += 'y' + elif c == 'ž': + res += 'z' + else: + res += '-' + return res diff --git a/lib/tvchannel.py b/lib/tvchannel.py new file mode 100644 index 0000000..7d69677 --- /dev/null +++ b/lib/tvchannel.py @@ -0,0 +1,40 @@ +import json + + +class TvChannelSource: + + SOURCES = { + 3: 'http://www.programme-television.org/chaines-tv' + } + + def __init__(self, id_country, url): + self.id_country = id_country + self.url = url + self.tv_channels = list() + self.error = None + + +class TvChannel: + def __init__(self, id_country, name, idt=0): + self.id = idt + self.id_country = id_country + self.name = name + self.names = None + + def store(self, db): + db.exec( + 'INSERT IGNORE INTO tv_channels (id_country, name) VALUES (:id_country, :name)', + {'id_country': self.id_country, 'name': self.name} + ) + + @staticmethod + def get_sources(): + for id_country, url in TvChannelSource.SOURCES.items(): + yield TvChannelSource(id_country, url) + + @staticmethod + def get_tvchannels(db): + for row in db.query('SELECT id, id_country, name, names FROM tv_channels'): + tv_channel = TvChannel(id_country=row['id_country'], name=row['name'], idt=row['id']) + tv_channel.names = json.loads(row['names']) + yield tv_channel diff --git a/lib/user.py b/lib/user.py new file mode 100644 index 0000000..df3a12c --- /dev/null +++ b/lib/user.py @@ -0,0 +1,214 @@ +import json +import sys + +from lib.team import Team + +if 'lib.match' not in sys.modules: + from lib.match import Match + + +class User: + def __init__(self, idt=None, name=None, email=None, phone=None): + self.id = idt + self.name = name + self.email = email + self.phone = phone + self.role = 0 + self.notifications = None + self.records = None + + self.id_league = 0 + self.id_team = 0 + + self.league_bets = None + self.league_bets_matches = None + self.league_points = 0 + self.league_rank = 0 + + self.europe = 0 + self.europe_round = None + self.europe_scores = None + self.europe_scorers = None + self.europe_bets_matches = None + + self.bob_rank = 0 + self.bob_points = 0 + self.bob_details = None + + def set_league_points(self, bet_mday, points, db): + stmt = """ + UPDATE users_month + SET league_points_{} = league_points_{} + :points, league_points_total = league_points_total + :points + WHERE id = :id + """.format(bet_mday, bet_mday) + args = {'points': points, 'id': self.id} + db.exec(stmt, args) + + def set_europe_points(self, europe_round, points, db): + stmt = """ + UPDATE users_month + SET europe_points_{} = europe_points_{} + :points + WHERE id = :id + """.format(europe_round, europe_round) + args = {'points': points, 'id': self.id} + db.exec(stmt, args) + + def set_league_bets(self, id_month, db): + stmt = """ + UPDATE users_month + SET league_bets_1 = :bets_1, league_bets_2 = :bets_2, league_bets_3 = :bets_3, league_bets_4 = :bets_4 + WHERE id_user = :id_user AND id_month = :id_month + """ + args = {'bets_{}'.format(mday): ''.join(self.league_bets[mday]) for mday in range(1, 5)} + args.update({'id_user': self.id, 'id_month': id_month}) + db.exec(stmt, args) + + def set_europe_bets(self, id_month, db): + stmt = """ + UPDATE users_month + SET europe_scores_{round} = :bet_scores, europe_scorers_{round} = :bet_scorers + WHERE id_user = :id_user AND id_month = :id_month + """.format(round=self.europe_round) + args = { + 'bet_scores': json.dumps(self.europe_scores), 'bet_scorers': json.dumps(self.europe_scorers), + 'id_user': self.id, 'id_month': id_month + } + db.exec(stmt, args) + + def store_records(self, db): + stmt = """ + UPDATE users + SET records = :records + WHERE id = :id + """ + args = {'records': json.dumps(self.records), 'id': self.id} + db.exec(stmt, args) + + def store_bob_rank(self, db): + stmt = """ + UPDATE users + SET bob_rank_old = bob_rank, bob_rank = :bob_rank, bob_points = :bob_points, bob_details = :bob_details + WHERE id = :id + """ + args = { + 'bob_rank': self.bob_rank, 'bob_points': self.bob_points, 'bob_details': json.dumps(self.bob_details), + 'id': self.id + } + db.exec(stmt, args) + + @staticmethod + def get_users(db): + for row in db.query('SELECT id, username, email, phone FROM users'): + yield User(idt=row['id'], name=row['username'], email=row['email'], phone=row['phone']) + + @staticmethod + def get_users_league_bets(db, interval_day=1, admin=False): + stmt = """ + SELECT id_user, league_bets_1, league_bets_2, league_bets_3, league_bets_4, username, role, notifications, + bets.id_league, bets.id_month, bets.mday, bets.league_id, bets.league_mday, + matches.id AS id_match, matches.idof10, matches.start_date, matches.id_home, matches.id_away, + home.rank AS home_rank, home.coeff AS home_coeff, away.rank AS away_rank, away.coeff AS away_coeff + FROM users_month + INNER JOIN users ON users.id = users_month.id_user + INNER JOIN bets ON bets.id_league = users_month.id_league AND bets.id_month = users_month.id_month + INNER JOIN matches ON matches.id_league = bets.league_id AND matches.mday = bets.league_mday + INNER JOIN league_teams AS home ON home.id_team = matches.id_home AND home.id_league = matches.id_league + INNER JOIN league_teams AS away ON away.id_team = matches.id_away AND away.id_league = matches.id_league + WHERE bets.open = 1 AND matches.status = 0 AND matches.start_date < NOW() + INTERVAL {} DAY {} + ORDER BY users_month.id_league ASC, users_month.id_user ASC, matches.start_date ASC + """.format(int(interval_day), 'AND users.role = 2' if admin else '') + id_user = None + user = None + for row in db.query(stmt): + if row['id_user'] != id_user: + if user is not None: + yield user + user = User(idt=row['id_user'], name=row['username']) + user.role = row['role'] + user.notifications = json.loads(row['notifications']) + user.league_bets = {mday: [x for x in row['league_bets_{}'.format(mday)]] for mday in range(1, 5)} + user.league_bets_matches = {mday: list() for mday in range(1, 5)} + match = Match(idt=row['id_match']) + match.idof10 = row['idof10'] + match.home = Team(idt=row['id_home']) + match.away = Team(idt=row['id_away']) + match.home.rank = row['home_rank'] + match.away.rank = row['away_rank'] + match.home.coeff = row['home_coeff'] + match.away.coeff = row['away_coeff'] + match.start_date = row['start_date'] + match.id_month = row['id_month'] + user.league_bets_matches[row['mday']].append(match) + id_user = row['id_user'] + if user is not None: + yield user + + @staticmethod + def get_users_europe_bets(db, interval_day=1, admin=False): + stmt = """ + SELECT id_user, europe_scores_quarter, europe_scorers_quarter, europe_scores_semi, europe_scorers_semi, + europe_scores_final, europe_scorers_final, username, role, notifications, + bets.id_league, bets.id_month, bets.europe_round, bets.europe_matches, + matches.id AS id_match, matches.start_date, matches.id_home, matches.id_away, + home.staff AS home_staff, away.staff AS away_staff + FROM users_month + INNER JOIN users ON users.id = users_month.id_user + INNER JOIN bets ON bets.europe_round = users_month.europe_round AND bets.id_month = users_month.id_month + INNER JOIN matches ON matches.id IN ( + SUBSTRING_INDEX(bets.europe_matches, ',', 1), + SUBSTRING_INDEX(SUBSTRING_INDEX(bets.europe_matches, ',', 2), ',', -1), + SUBSTRING_INDEX(SUBSTRING_INDEX(bets.europe_matches, ',', 3), ',', -1), + SUBSTRING_INDEX(bets.europe_matches, ',', -1) + ) + INNER JOIN teams AS home ON home.id = matches.id_home + INNER JOIN teams AS away ON away.id = matches.id_away + WHERE bets.open = 1 AND users_month.europe > 0 AND users_month.europe_round != '' + AND matches.start_date > NOW() AND matches.start_date < NOW() + INTERVAL {} DAY {} + ORDER BY users_month.europe ASC, users_month.id_user ASC, matches.start_date ASC + """.format(int(interval_day), 'AND users.role = 2' if admin else '') + id_user = None + user = None + for row in db.query(stmt): + if row['id_user'] != id_user: + if user is not None: + yield user + user = User(idt=row['id_user'], name=row['username']) + user.role = row['role'] + user.notifications = json.loads(row['notifications']) + user.europe_round = row['europe_round'] + user.europe_scores = json.loads(row['europe_scores_{}'.format(row['europe_round'])]) + user.europe_scorers = json.loads(row['europe_scorers_{}'.format(row['europe_round'])]) + user.europe_bets_matches = list() + match = Match(idt=row['id_match']) + match.idof4 = row['europe_matches'].split(',').index(str(match.id)) + match.home = Team(idt=row['id_home']) + match.away = Team(idt=row['id_away']) + match.home.staff = json.loads(row['home_staff']) + match.away.staff = json.loads(row['away_staff']) + match.start_date = row['start_date'] + match.id_month = row['id_month'] + user.europe_bets_matches.append(match) + id_user = row['id_user'] + if user is not None: + yield user + + @staticmethod + def get_users_results(month, db): + stmt = """ + SELECT id_user, id_league, id_team, league_points_total, league_rank, europe, europe_round, + username, records + FROM users_month + INNER JOIN users ON users.id = users_month.id_user + WHERE id_month = :id_month + """ + args = {'id_month': month.id} + for row in db.query(stmt, args): + user = User(idt=row['id_user'], name=row['username']) + user.id_league = row['id_league'] + user.id_team = row['id_team'] + user.league_points = row['league_points_total'] + user.league_rank = row['league_rank'] + user.europe = row['europe'] + user.europe_round = row['europe_round'] + user.records = json.loads(row['records']) + yield user diff --git a/minify.py b/minify.py new file mode 100644 index 0000000..4bc96fd --- /dev/null +++ b/minify.py @@ -0,0 +1,34 @@ +import os.path +import sys +import re + +import cssmin +import jsmin + + +def calc_spread(match): + for char in '+-/*': + if char in match.group(0): + return match.group(0).replace(char, ' {} '.format(char)) + return match.group(0) + + +def main(): + root = sys.argv[1] + for filename in os.listdir(os.path.join(root, 'js/')): + print('[+] minify {}...'.format(filename)) + with open(os.path.join(root, 'js/', filename), 'r') as file: + with open(os.path.join(root, 'jsmin/', filename), 'w') as min_file: + min_file.write(jsmin.jsmin(file.read())) + + for filename in os.listdir(os.path.join(root, 'css/')): + print('[+] minify {}'.format(filename)) + with open(os.path.join(root, 'css/', filename), 'r') as file: + with open(os.path.join(root, 'cssmin/', filename), 'w') as min_file: + min_css = cssmin.cssmin(file.read()) + min_css = re.sub(r'calc\([^)]+\)', calc_spread, min_css) + min_file.write(min_css) + + +if __name__ == '__main__': + main() diff --git a/providers/base.py b/providers/base.py new file mode 100644 index 0000000..f2245df --- /dev/null +++ b/providers/base.py @@ -0,0 +1,57 @@ +class BaseProvider: + + @classmethod + def get_match_info(cls, match, data): + pass + + @classmethod + def get_team_staff(cls, data): + pass + + @classmethod + def get_team_players(cls, data, team, countries): + pass + + @classmethod + def get_league_ranking(cls, league, data): + pass + + @classmethod + def get_group_ranking(cls, group, data): + pass + + @classmethod + def get_newss_from_source(cls, news_source, data): + pass + + @classmethod + def get_news_content(cls, news, data): + pass + + @classmethod + def get_news_image(cls, news_image, data): + pass + + @classmethod + def get_schedule_url(cls, match): + pass + + @classmethod + def get_schedule(cls, scheduler, data): + pass + + @classmethod + def get_tvchannels(cls, source, data): + pass + + @classmethod + def get_tvschedule(cls, league, tv_channels, data): + pass + + @classmethod + def create_schedule(cls, league, data): + pass + + @classmethod + def create_schedule_from_url(cls, league, data): + pass diff --git a/providers/controller.py b/providers/controller.py new file mode 100644 index 0000000..4686fd3 --- /dev/null +++ b/providers/controller.py @@ -0,0 +1,37 @@ +from urllib.parse import urlsplit + +from providers.programmetelevision import ProgrammeTelevision +from providers.footballdirect import FootballDirect +from providers.matchendirect import Matchendirect +from providers.transfermarkt import TransferMarkt +from providers.footmercato import FootMercato +from providers.football365 import Football365 +from providers.eurosport import Eurosport +from providers.football import Football +from providers.lequipe import Lequipe +from providers.footao import Footao +from providers.fftt import Fftt + + +class ProviderController: + + PROVIDERS = [ + ProgrammeTelevision, + FootballDirect, + Matchendirect, + TransferMarkt, + FootMercato, + Football365, + Eurosport, + Football, + Lequipe, + Footao, + Fftt + ] + + @classmethod + def get_provider(cls, url): + netloc = urlsplit(url).netloc + for provider in cls.PROVIDERS: + if netloc in provider.DOMAINS: + return provider diff --git a/providers/eurosport.py b/providers/eurosport.py new file mode 100644 index 0000000..cbe4e5f --- /dev/null +++ b/providers/eurosport.py @@ -0,0 +1,370 @@ +from datetime import datetime, timedelta +from urllib.parse import urljoin +import json +import re + +from bs4 import BeautifulSoup +import bs4.element +import feedparser + +from lib.news import News, NewsImage +from providers.base import BaseProvider +from lib.match import Comm, Match +from lib.country import Country +from lib.league import Group +from lib.team import Team + + +class Eurosport(BaseProvider): + + DOMAINS = { + 'www.eurosport.fr', 'www.rugbyrama.fr', 'video.eurosport.fr', 'video.rugbyrama.fr', 'web-api.eurosport.com' + } + CHARSET = 'utf-8' + IMAGE = 'big-eurosport.png' + + @classmethod + def get_match_info(cls, match, data): + html = data.decode(cls.CHARSET) + + # Get score in json + if match.json_parser: + json_body = json.loads(html) + + player_home = json_body['match']['players'][0] + player_away = json_body['match']['players'][1] + if match.home.name != '{} {}'.format(player_home['firstname'], player_home['lastname']): + raise NameError('home name does not match') + if match.away.name != '{} {}'.format(player_away['firstname'], player_away['lastname']): + raise NameError('away name does not match') + + match_datetime = '{} {}'.format(json_body['match']['date']['date'], json_body['match']['date']['time']) + match.start_date = datetime.strptime(match_datetime, '%Y-%m-%d %H:%M') + if 'score' in json_body['match']: + score_sets = { + json_body['match']['score'][0]['playerid']: json_body['match']['score'][0]['sets'], + json_body['match']['score'][1]['playerid']: json_body['match']['score'][1]['sets'] + } + match.score_sets = { + 'home': score_sets[player_home['id']], + 'away': score_sets[player_away['id']] + } + if 'name' in json_body['match']['status']: + match.minute = json_body['match']['status']['name'] + + # Get all data in html + else: + soup = BeautifulSoup(html, 'html.parser') + div_match = soup.find(id='livehero') + if div_match is None: + raise NameError('div livehero not found') + + # Check team names + div_teams = div_match.find_all('div', class_='heromatch__team-name') + if len(div_teams) != 2 or not div_teams[0].a.text or not div_teams[1].a.text: + raise NameError('divs team not found') + if div_teams[0].a.text.strip() != match.home.names['eurosport']: + raise NameError('home name does not match') + if div_teams[1].a.text.strip() != match.away.names['eurosport']: + raise NameError('away name does not match') + + # Check start_date + div_date = div_match.find('div', class_='heromatch__date') + if div_date is None: + raise NameError('div date not found') + div_time = div_match.find('div', class_='heromatch__time') + if div_time is None: + raise NameError('div time not found') + date_ = div_date.text.strip() + time_ = div_time.text.strip() + match.start_date = datetime.strptime('{} {}:00'.format(date_, time_), '%d/%m/%y %H:%M:%S') + + # Get score + div_scores = div_match.find_all('div', class_='heromatch__score') + if len(div_scores) < 2: + raise NameError('divs score not found') + if div_scores[0].text.strip().isnumeric(): + match.score_home = int(div_scores[0].text.strip()) + if div_scores[1].text.strip().isnumeric(): + match.score_away = int(div_scores[1].text.strip()) + + # Get minute + div_minute = div_match.find('div', class_='heromatch__minute') + if div_minute is None or not div_minute.text: + div_minute = div_match.find('div', class_='heromatch__status') + if div_minute is None: + raise NameError('div minute not found') + match.minute = div_minute.text.strip().lower() + + # Get live comments + div_comms = soup.find(class_='live_comments_v8_5_bis') + if div_comms is not None: + for article_comm in div_comms.find_all('article'): + comm = Comm(type_='', minute='', text='') + left_col = article_comm.find(class_='left-col') + if left_col is not None: + left_span = left_col.find('span') + if left_span is not None: + if len(left_span.contents) > 0 and isinstance(left_span.contents[0], str): + comm.minute = left_span.contents[0].strip() + right_col = article_comm.find(class_='right-col') + if right_col is not None: + right_p = right_col.find('p') + if right_p is not None: + comm.text = right_p.text.strip() + if comm.text: + if comm.text == comm.text.upper(): + comm.text = '{}'.format(comm.text) + else: + continue + match.comms.append(comm) + + @classmethod + def get_match_comms(cls, match, data): + html = data.decode(cls.CHARSET) + json_body = json.loads(html) + if json_body is not None: + for comment in json_body['livecomments']: + if all([key in comment for key in ('marker', 'text')]): + match.comms.append( + Comm(minute=comment['marker'], type_='', text=comment['text']) + ) + + @classmethod + def get_team_staff(cls, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + players = list() + staff = dict() + for li in soup.find_all('li'): + if 'class' in li.attrs and 'team_global_title' in li.attrs['class']: + if li.text == 'Défenseur(s)': + staff['goalkeepers'] = players + elif li.text == 'Milieu(x)': + staff['defenders'] = players + elif li.text == 'Attaquant(s)': + staff['midfielders'] = players + elif li.text == 'Entraîneur': + staff['attackers'] = players + break + players = list() + else: + players.append(li.find('a').text.replace('\xa0', ' ')) + return staff + + @classmethod + def get_league_ranking(cls, league, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + div_standing = soup.find('div', class_='standing_v8_5') + groups = list() + for div in div_standing.find_all('div', class_='tab-content'): + if 'data-ajax-url' in div.attrs: + link = div.attrs['data-ajax-url'] + group_class = div.attrs['data-navtab-content-id'].split('_')[1] + group = soup.find('a', class_=group_class).find('span', class_='navtab-label').text.strip() + groups.append(Group(name=group, url=urljoin(league.url, link), league=league)) + else: + groups.append(Group(name='0', url=league.url, league=league)) + return groups + + @classmethod + def get_group_ranking(cls, group, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + table = soup.find('table') + + for tr in table.find_all('tr', class_='standing-table__row'): + eur_name = tr.find('span', class_='text').text.strip() + for tm in group.league.teams: + if 'eurosport' in tm.names and tm.names['eurosport'] == eur_name: + team = tm + break + else: + continue + + tds = tr.find_all('td') + team.group = group + team.rank = int(tds[0].text.strip()) + team.played = int(tds[-8].text.strip()) + team.wins = int(tds[-7].text.strip()) + team.ties = int(tds[-6].text.strip()) + team.loss = int(tds[-5].text.strip()) + team.g_for = int(tds[-4].text.strip()) + team.g_against = int(tds[-3].text.strip()) + team.g_diff = int(tds[-2].text.strip()) + team.points = int(tds[-1].text.strip()) + + @classmethod + def get_newss_from_source(cls, news_source, data): + xml = data.decode() + tree = feedparser.parse(xml) + for item in tree.entries: + news = News() + news.source = 'eurosport' + news.sport = news_source.sport + news.title = item.title + news.url = item.link + news.description = json.loads(re.sub(r'<.*>', '', item.description)) + + summary = BeautifulSoup(item.summary, 'html.parser') + image_object = summary.find('img') + image_url = image_object.attrs['src'] if image_object and 'src' in image_object.attrs is not None else None + news.image = NewsImage(url=image_url, title=news.title, basename=cls.IMAGE, id_news=news.id) + + if hasattr(item, 'tags'): + news.tags = [tag.term for tag in item.tags] + + # Set current date as pub_date to prevent disorder between id and pub_date + news.pub_date = datetime.now() + + yield news + + @classmethod + def get_news_content(cls, news, data): + html = data.decode(encoding=cls.CHARSET, errors='ignore') + soup = BeautifulSoup(html, 'html.parser') + + if news.url.endswith('/video.shtml'): + res = re.search(r'https://vod-eurosport.akamaized.net/[^"]*', html) + if res is not None: + news.video_src = res.group(0) + news.content = ''.format(news.video_src) + else: + news.content = '' + + div_paraphs = soup.find('div', class_='teaser_container') + if div_paraphs is not None: + news.content += ''.join( + [str(c) for c in div_paraphs.contents if isinstance(c, bs4.element.Tag) and c.name != 'script'] + ) + + else: + h2_teaser = soup.find('h2', class_='storyfull__teaser') + if h2_teaser is not None: + news.teaser = str(h2_teaser.text).strip() + + div_paraphs = soup.find('div', class_='storyfull__paragraphs') + if div_paraphs is not None: + news.content = ''.join( + [str(c) for c in div_paraphs.contents if isinstance(c, bs4.element.Tag) and c.name != 'script'] + ) + + div_author = soup.find('div', class_='storyfull__publisher-author-name') + if div_author is not None: + a_author = div_author.find('a') + if a_author is not None: + news.author = a_author.text.strip() + else: + news.author = str(div_author.contents[0]).strip() + if news.author.startswith('Par '): + news.author = news.author.replace('Par ', '', 1) + + @classmethod + def get_schedule_url(cls, match): + return match.url + + @classmethod + def get_schedule(cls, scheduler, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + divs_name = soup.find_all('span', class_='tennismatch--hidemobile') + if len(divs_name) == 2 and divs_name[0].text and divs_name[1].text: + home = divs_name[0].text.strip() + away = divs_name[1].text.strip() + for match in scheduler.matches: + if match.home.names['eurosport'] == home and match.away.names['eurosport'] == away: + date = datetime.strptime(soup.find('div', class_='livehero__date').contents[0], '%d/%m/%y') + hours, minutes = soup.find('div', class_='tennismatch__time-value').text.strip().split(':') + match.new_start_date = date + timedelta(hours=int(hours), minutes=int(minutes)) + match.task_done = True + + @classmethod + def create_schedule(cls, league, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + current_year = datetime.now().year + + droplet_id = 0 + if league.sport.id == 2: + ajax_container = soup.find('div', class_='ajax-container') + droplet_match = re.search(r'&dropletid=(\d+)&', ajax_container.attrs['data-ajax-url']) + if droplet_match is not None: + droplet_id = int(droplet_match.group(1)) + else: + raise Exception('no droplet_id found') + + rounds = list() + div_rounds = soup.find('div', class_='rounds-dropdown__rounds') + for div_round in div_rounds.find_all('div', class_='rounds-dropdown__round'): + rounds.append(div_round.text.strip()) + + div_matches = soup.find('div', class_='bracket-matches-wrapper') + for div_matches_round in div_matches.find_all('div', class_='bracket-matches'): + for class_ in div_matches_round.attrs['class']: + if class_.startswith('bracket-round--'): + nb_round = class_.replace('bracket-round--', '') + if nb_round.isnumeric(): + nb_round = int(nb_round) + if nb_round <= len(rounds): + current_round = rounds[nb_round - 1] + idof10 = 0 + + for a_match in div_matches_round.find_all('a', class_='match-sets'): + match = Match(idt=0) + match.idof10 = idof10 + match.url = urljoin(league.url, a_match.attrs['href']) + if droplet_id is not None: + match_id = int(a_match.attrs['href'].split('/')[-2].split('mtc')[-1]) + score_url = 'https://web-api.eurosport.com/json/getmatchheaderweb.json' + comms_url = 'https://web-api.eurosport.com/json/getlivecomments.json' + match.url_score = '{}?d={}&ids={}'.format(score_url, droplet_id, match_id) + match.url_comms = '{}?d={}&ids={}'.format(comms_url, droplet_id, match_id) + + match.league = league + match.leg = 0 + match.round = current_round + match.mday = 0 + + div_time = a_match.find('div', class_='match-sets__start-time') + match_date = '{}/{}'.format(div_time.text.strip(), current_year) + match.start_date = datetime.strptime(match_date, '%d/%m/%Y') + + divs_name = a_match.find_all('div', class_='player__name') + divs_logo = a_match.find_all('div', class_='player__logo') + if len(divs_name) == 2 and divs_name[0].text and divs_name[1].text: + + match.home = Team(idt=0) + match.home.league = league + match.home.country = Country(idt=0) + img_country_home = divs_logo[0].find('img') + if img_country_home is None: + continue + match.home.country.name = img_country_home.attrs['title'].strip() + match.home.name = divs_name[0].text.strip() + match.home.long_name = match.home.name + words = match.home.name.split(' ') + for idx in range(len(words)): + if idx < len(words) - 1 and len(words[idx]) > 3: + words[idx] = words[idx][0] + '.' + match.home.short_name = ' '.join(words) + match.home.names = {'eurosport': match.home.name} + + match.away = Team(idt=0) + match.away.league = league + match.away.country = Country(idt=0) + img_country_away = divs_logo[1].find('img') + if img_country_away is None: + continue + match.away.country.name = img_country_away.attrs['title'].strip() + match.away.name = divs_name[1].text.strip() + match.away.long_name = match.away.name + words = match.away.name.split(' ') + for idx in range(len(words)): + if idx < len(words) - 1 and len(words[idx]) > 3: + words[idx] = words[idx][0] + '.' + match.away.short_name = ' '.join(words) + match.away.names = {'eurosport': match.away.name} + + idof10 += 1 + yield match diff --git a/providers/fftt.py b/providers/fftt.py new file mode 100644 index 0000000..fb2415f --- /dev/null +++ b/providers/fftt.py @@ -0,0 +1,50 @@ +from datetime import datetime + +from bs4 import BeautifulSoup +import bs4.element +import feedparser + +from providers.base import BaseProvider +from lib.news import News, NewsImage + + +class Fftt(BaseProvider): + + DOMAINS = {'www.fftt.com'} + CHARSET = 'utf-8' + IMAGE = 'big-fftt.png' + + @classmethod + def get_newss_from_source(cls, news_source, data): + xml = data.decode() + tree = feedparser.parse(xml) + for item in tree.entries: + news = News() + news.source = 'fftt' + news.sport = news_source.sport + news.title = item.title + news.url = item.link + news.description = item.description + + image_url = item.enclosures[0].href if len(item.enclosures) > 0 else None + news.image = NewsImage(url=image_url, title=news.title, basename=cls.IMAGE, id_news=news.id) + + if hasattr(item, 'tags'): + news.tags = [tag.term for tag in item.tags] + + # Set current date as pub_date to prevent disorder between id and pub_date + news.pub_date = datetime.now() + + yield news + + @classmethod + def get_news_content(cls, news, data): + html = data.decode(encoding=cls.CHARSET, errors='ignore') + soup = BeautifulSoup(html, 'html.parser') + + div_paraphs = soup.find('div', class_='news-description') + if div_paraphs is not None: + div_paraphs = soup.find('div', class_='news-description') + news.content = ''.join( + [str(c) for c in div_paraphs.contents if isinstance(c, bs4.element.Tag) and c.name != 'script'] + ) diff --git a/providers/footao.py b/providers/footao.py new file mode 100644 index 0000000..a4ca858 --- /dev/null +++ b/providers/footao.py @@ -0,0 +1,26 @@ +from bs4 import BeautifulSoup + +from providers.base import BaseProvider + + +class Footao(BaseProvider): + + DOMAINS = {'www.footao.tv'} + CHARSET = 'utf-8' + + @classmethod + def get_tvschedule(cls, league, tv_channels, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + for div in soup.find_all('div'): + a = div.find('a', class_='rc') + if a is not None and ' · ' in a.text: + home, away = a.text.split(' · ') + for match in league.matches: + if match.home.names.get('footao') == home and match.away.names.get('footao') == away: + for img in div.find_all('img'): + tvc_name = img['alt'].replace('programme foot', '').split('tv direct')[0].strip() + for tv_channel in tv_channels: + if tv_channel.names.get('footao') == tvc_name: + match.tv_channels.append(tv_channel) + break diff --git a/providers/football.py b/providers/football.py new file mode 100644 index 0000000..53dda39 --- /dev/null +++ b/providers/football.py @@ -0,0 +1,34 @@ +from bs4 import BeautifulSoup + +from providers.base import BaseProvider + + +class Football(BaseProvider): + + DOMAINS = {'www.football.fr'} + CHARSET = 'utf-8' + + @classmethod + def get_team_staff(cls, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + table = soup.find('table', class_='effectif') + players = list() + staff = dict() + for tr in table.find_all('tr'): + th = tr.find('th') + if th is not None: + if th.text.strip() == 'Défenseur': + staff['goalkeepers'] = players + players = list() + elif th.text.strip() == 'Milieu': + staff['defenders'] = players + players = list() + elif th.text.strip() == 'Attaquant': + staff['midfielders'] = players + players = list() + else: + a = tr.find('td', class_='player left').find('a') + players.append(a.text.strip()) + staff['attackers'] = players + return staff diff --git a/providers/football365.py b/providers/football365.py new file mode 100644 index 0000000..9f712d8 --- /dev/null +++ b/providers/football365.py @@ -0,0 +1,38 @@ +from bs4 import BeautifulSoup + +from providers.base import BaseProvider + + +class Football365(BaseProvider): + + DOMAINS = {'www.football365.fr'} + CHARSET = 'utf-8' + + @classmethod + def get_team_staff(cls, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + table = soup.find('table', class_='table table-striped') + players = list() + staff = dict() + for td in table.find_all('td'): + if 'players-effectif' in td.attrs['class']: + h3 = td.find('h3') + if h3.text.strip() == 'Défenseurs': + staff['goalkeepers'] = players + players = list() + elif h3.text.strip() == 'Milieux': + staff['defenders'] = players + players = list() + elif h3.text.strip() == 'Attaquants': + staff['midfielders'] = players + players = list() + elif 'nom_joueur' in td.attrs['class']: + a = td.find('a') + if len(a.contents) > 1: + player = '{} {}'.format(a.contents[0].strip(), a.contents[1].text.strip()) + else: + player = a.contents[0].strip() + players.append(player) + staff['attackers'] = players + return staff diff --git a/providers/footballdirect.py b/providers/footballdirect.py new file mode 100644 index 0000000..3e766eb --- /dev/null +++ b/providers/footballdirect.py @@ -0,0 +1,32 @@ +from bs4 import BeautifulSoup + +from providers.base import BaseProvider + + +class FootballDirect(BaseProvider): + + DOMAINS = {'www.football-direct.com'} + CHARSET = 'iso-8859-15' + + @classmethod + def get_team_staff(cls, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + sections = soup.find('div', id='tabpanel2').find_all('section') + players = list() + staff = dict() + for section in sections: + h3 = section.find('h3') + if h3.text.strip() == 'Défenseurs': + staff['goalkeepers'] = players + players = list() + elif h3.text.strip() == 'Milieu de terrain': + staff['defenders'] = players + players = list() + elif h3.text.strip() == 'Attaquants': + staff['midfielders'] = players + players = list() + for div in section.find_all('div', class_='block6-1'): + players.append(div.find('div', class_='title4_1').find('strong').text.strip()) + staff['attackers'] = players + return staff diff --git a/providers/footmercato.py b/providers/footmercato.py new file mode 100644 index 0000000..71caa72 --- /dev/null +++ b/providers/footmercato.py @@ -0,0 +1,59 @@ +from datetime import datetime + +from bs4 import BeautifulSoup +import bs4.element +import feedparser + +from providers.base import BaseProvider +from lib.news import News, NewsImage + + +class FootMercato(BaseProvider): + + DOMAINS = {'www.footmercato.net'} + CHARSET = 'utf-8' + IMAGE = 'big-foot-mercato.png' + + @classmethod + def get_newss_from_source(cls, news_source, data): + xml = data.decode() + tree = feedparser.parse(xml) + for item in tree.entries: + news = News() + news.source = 'foot-mercato' + news.sport = news_source.sport + news.title = item.title + news.url = item.link + news.description = item.description + + image_url = item.enclosures[0].href if len(item.enclosures) > 0 else None + news.image = NewsImage(url=image_url, title=news.title, basename=cls.IMAGE, id_news=news.id) + + if hasattr(item, 'tags'): + news.tags = [tag.term for tag in item.tags] + + # Set current date as pub_date to prevent disorder between id and pub_date + news.pub_date = datetime.now() + + yield news + + @classmethod + def get_news_content(cls, news, data): + html = data.decode(encoding=cls.CHARSET, errors='ignore') + soup = BeautifulSoup(html, 'html.parser') + + h2_teaser = soup.find('h2', class_='line h3-like') + if h2_teaser is not None: + news.teaser = str(h2_teaser.text).strip() + + div_paraphs = soup.find('div', class_='article-text') + if div_paraphs is not None: + news.content = ''.join( + [str(c) for c in div_paraphs.contents if isinstance(c, bs4.element.Tag) and c.name != 'script'] + ) + + div_author = soup.find('div', class_='article-author') + if div_author is not None: + span_author = div_author.find('span', 'name') + if span_author is not None: + news.author = str(span_author.contents[0]).strip() diff --git a/providers/lequipe.py b/providers/lequipe.py new file mode 100644 index 0000000..b30950e --- /dev/null +++ b/providers/lequipe.py @@ -0,0 +1,59 @@ +from datetime import datetime +from html import unescape + +from bs4 import BeautifulSoup +import bs4.element +import feedparser + +from providers.base import BaseProvider +from lib.news import News, NewsImage + + +class Lequipe(BaseProvider): + + DOMAINS = {'www.lequipe.fr'} + CHARSET = 'utf-8' + IMAGE = 'big-lequipe.png' + + @classmethod + def get_newss_from_source(cls, news_source, data): + xml = data.decode() + tree = feedparser.parse(xml) + for item in tree.entries: + news = News() + news.source = 'lequipe' + news.sport = news_source.sport + news.title = item.title + news.url = item.link + news.description = item.description + + image_url = item.enclosures[0].href if len(item.enclosures) > 0 else None + news.image = NewsImage(url=image_url, title=news.title, basename=cls.IMAGE, id_news=news.id) + + if hasattr(item, 'tags'): + news.tags = [tag.term for tag in item.tags] + + # Set current date as pub_date to prevent disorder between id and pub_date + news.pub_date = datetime.now() + + yield news + + @classmethod + def get_news_content(cls, news, data): + html = unescape(data.decode(encoding=cls.CHARSET, errors='ignore')) + soup = BeautifulSoup(html, 'html.parser') + + div_teaser = soup.find('h2', class_='Article__chapo') + if div_teaser is not None: + news.teaser = str(div_teaser.text).strip() + + div_paraphs = soup.find('div', class_='article__body') + if div_paraphs is not None: + news.content = ''.join([ + str(c) for c in div_paraphs.contents + if isinstance(c, bs4.element.Tag) and c.name != 'script' + ]) + + div_author = soup.find('span', class_='Author__name') + if div_author is not None: + news.author = div_author.text.strip() diff --git a/providers/matchendirect.py b/providers/matchendirect.py new file mode 100644 index 0000000..6a8ad0d --- /dev/null +++ b/providers/matchendirect.py @@ -0,0 +1,514 @@ +from datetime import datetime, timedelta +from urllib.parse import urljoin +import locale + +# noinspection PyProtectedMember +from bs4 import BeautifulSoup, NavigableString + +from lib.match import Match, Event, Squad, Stat, Comm +from providers.base import BaseProvider +from lib.country import Country +from lib.league import Group +from lib.team import Team + + +class Matchendirect(BaseProvider): + + DOMAINS = {'www.matchendirect.fr'} + CHARSET = 'utf-8' + ROLES = [ + [ + [None, 'DLG', 'DLG', 'MDG', 'MG', 'MOG', 'ALG'], + [None, 'DCG', None, 'MDG', 'MCG', 'MOG', 'AG'], + ['G', 'DC', None, 'MDC', 'MC', 'MOC', 'AC'], + [None, 'DCD', None, 'MDD', 'MCD', 'MOD', 'AD'], + [None, 'DLD', 'DLD', 'MDD', 'MD', 'MOD', 'ALD'] + ], + [ + ['ALD', 'MOD', 'MD', 'MDD', 'DLD', 'DLD', None], + ['AD', 'MOD', 'MCD', 'MDD', None, 'DCD', None], + ['AC', 'MOC', 'MC', 'MDC', None, 'DC', 'G'], + ['AG', 'MOG', 'MCG', 'MDG', None, 'DCG', None], + ['ALG', 'MOG', 'MG', 'MDG', 'DLG', 'DLG', None] + ] + ] + MONTH_NUMBERS = { + 'janvier': '01', + 'février': '02', + 'mars': '03', + 'avril': '04', + 'mai': '05', + 'juin': '06', + 'juillet': '07', + 'août': '08', + 'septembre': '09', + 'octobre': '10', + 'novembre': '11', + 'décembre': '12' + } + EVENT_TYPES = { + 'ico_evenement1': ('goal', None), + 'ico_evenement2': ('goal', 'P'), + 'ico_evenement3': ('red-card', None), + 'ico_evenement4': ('yellow-card', None), + 'ico_evenement5': ('yellow-red-card', None), + 'ico_evenement7': ('goal', 'CSC'), + 'ico_evenement81': ('switch-out', None), + 'ico_evenement82': ('switch-out', None), + 'ico_evenement91': ('switch-in', None), + 'ico_evenement92': ('switch-in', None) + } + STAT_NAMES = { + 'Possession': 'possession', + 'Buts': 'goals', + 'Tirs': 'attempts', + 'Corners': 'corners', + 'Hors-jeu': 'offsides', + 'Fautes': 'fouls', + 'Carton jaune': 'yellow_cards', + 'Carton rouge': 'red_cards' + } + COMM_TYPES = { + 'ico_com_occasion': 'chance', + 'ico_com_but': 'goal', + 'ico_com_carton-jaune': 'yellow-card', + 'ico_com_remplacement': 'switch', + 'ico_com_sifflet': 'whistle', + 'ico_com_carton-rouge': 'red-card' + } + + @classmethod + def get_match_info(cls, match, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + div_match = soup.find(id='ajax-match-detail-1') + if div_match is None: + raise NameError('div ajax-match-detail-1 not found') + + # check team names + div_teams = div_match.find_all('div', class_='team') + if len(div_teams) < 2: + raise NameError('divs team not found') + if div_teams[0].a.text.strip() == match.away.names['matchendirect'] \ + and div_teams[1].a.text.strip() == match.home.names['matchendirect']: + raise NameError('team names are inverted') + if div_teams[0].a.text.strip() != match.home.names['matchendirect']: + raise NameError('home name does not match') + if div_teams[1].a.text.strip() != match.away.names['matchendirect']: + raise NameError('away name does not match') + + # check start_date + div_info = div_match.find('div', class_='info1') + if div_info is None: + raise NameError('div info1 is not found') + content_date = div_info.contents[0] + exp_date = content_date.text.split(' ') + day = exp_date[1] + month = cls.MONTH_NUMBERS[exp_date[2]] + year = exp_date[3] + content_time = div_info.contents[1] + exp_time = content_time.strip(' à').split('h') + if len(exp_time) == 2: + hour = exp_time[0] + minute = exp_time[1] + else: + hour = 0 + minute = 0 + match.start_date = datetime.strptime( + '{}-{}-{} {}:{}:00'.format(year, month, day, hour, minute), '%Y-%m-%d %H:%M:%S' + ) + + # get shootout + table_shootout = soup.find('table', id='match_evenement_score') + if table_shootout is not None: + for tr in table_shootout.find_all('tr'): + tds = tr.find_all('td') + if len(tds) == 3: + if tds[0].text.strip() == 'Score après prolongation': + match.extra_time = 'extratime' + if tds[0].text.strip() == 'Tirs au but': + exp_shootout = tds[1].text.split(' - ') + match.extra_time = 'shootout' + match.shootout_home = int(exp_shootout[0]) + match.shootout_away = int(exp_shootout[1]) + + # get score + span_scores = div_match.find_all('span', class_='score') + if len(span_scores) < 2: + raise NameError('spans score not found') + if span_scores[0].text.strip().isnumeric(): + match.score_home = int(span_scores[0].text.strip()) + if span_scores[1].text.strip().isnumeric(): + match.score_away = int(span_scores[1].text.strip()) + + # get minute + div_status = div_match.find('div', class_='status') + if div_status is None: + raise NameError('div status not found') + content_minute = div_status.contents[-1] + if isinstance(content_minute, NavigableString): + match.minute = content_minute.strip().lower() + else: + match.minute = content_minute.text.strip().lower() + + # get events + table_events = soup.find('table', id='match_evenement') + if table_events is not None: + nb_goals = {'home': 0, 'away': 0} + for span_event in table_events.find_all('span'): + if span_event['class'][2] in cls.EVENT_TYPES: + type_, particularity = cls.EVENT_TYPES[span_event['class'][2]] + event = Event(type_=type_) + td_event = span_event.parent + if td_event['class'][0] == 'c1': + event.side = 'home' + else: + event.side = 'away' + event.player = td_event.find('a').text.strip() + if particularity is not None: + event.player += ' ({})'.format(particularity) + event.minute = td_event.parent.find('td', class_='c2').text + if event.type == 'goal': + nb_goals[event.side] += 1 + if nb_goals[event.side] > getattr(match, 'score_' + event.side): + continue + match.events.append(event) + + # get squad + div_squad = soup.find('div', class_='MEDpanelcomposition') + if div_squad is not None: + td_squads = div_squad.find_all('td') + if len(td_squads) > 1: + for span_squad in td_squads[0].find_all('span'): + squad_name = span_squad.previous_sibling.previous_sibling.text.strip() if \ + span_squad.previous_sibling.previous_sibling is not None else span_squad.previous_sibling + if 'ico_compo_titulaire' in span_squad.attrs['class']: + role = 'STR' + else: + role = 'SUB' + squad = Squad(role=role, name=squad_name, side='home') + for event in match.events: + if event.player.replace(' (P)', '') == squad.name: + squad.events.append(event.type) + match.squad.append(squad) + for span_squad in td_squads[1].find_all('span'): + squad_name = span_squad.next_sibling.next_sibling.text.strip() if \ + span_squad.next_sibling.next_sibling is not None else span_squad.next_sibling + if 'ico_compo_titulaire' in span_squad.attrs['class']: + role = 'STR' + else: + role = 'SUB' + squad = Squad(role=role, name=squad_name, side='away') + for event in match.events: + if event.player.replace(' (P)', '') == squad.name: + squad.events.append(event.type) + match.squad.append(squad) + + # get squad roles + table_squad = soup.find('table', id='schema_compo') + sides = [None, None] + if table_squad is not None: + tables = table_squad.find_all('table') + for id_table in range(len(tables)): + table = tables[id_table] + tds = table.find_all('td') + for id_td in range(len(tds)): + td = tds[id_td].find('b') + if td is not None and td.text: + for id_squad in range(len(match.squad)): + squad = match.squad[id_squad] + if squad.role in ('SUB', 'STR') and all([name in squad.name for name in td.text.split()]): + if squad.side not in sides: + sides[id_table // 5] = squad.side + sides[1 - id_table // 5] = 'home' if squad.side == 'away' else 'away' + if squad.side == sides[id_table // 5]: + squad.lastname = td.text.strip() + squad.role = cls.ROLES[id_table // 5][id_table % 5][id_td] + break + + # re-order squad lines + for side in ('home', 'away'): + if len([squad for squad in match.squad if squad.role in ('DCG', 'DC', 'DCD') and squad.side == side]) > 2: + for ids in range(len(match.squad)): + squad = match.squad[ids] + squad.role = 'DG' if squad.side == side and squad.role == 'DCG' else squad.role + squad.role = 'DD' if squad.side == side and squad.role == 'DCD' else squad.role + + if len([squad for squad in match.squad if squad.role.startswith('D') and squad.side == side]) < 4: + for ids in range(len(match.squad)): + squad = match.squad[ids] + squad.role = 'DLG' if squad.side == side and squad.role == 'MG' else squad.role + squad.role = 'DLD' if squad.side == side and squad.role == 'MD' else squad.role + + if len([squad for squad in match.squad if squad.role in ('MDG', 'MDC', 'MDD') and squad.side == side]) > 2: + for ids in range(len(match.squad)): + squad = match.squad[ids] + squad.role = 'MG' if squad.side == side and squad.role == 'MDG' else squad.role + squad.role = 'MD' if squad.side == side and squad.role == 'MDD' else squad.role + + if len([squad for squad in match.squad if squad.role in ('MCG', 'MC', 'MCD') and squad.side == side]) > 2: + if len([sq for sq in match.squad if sq.role in ('MDG', 'MDC', 'MDD') and sq.side == side]) == 0: + for ids in range(len(match.squad)): + squad = match.squad[ids] + squad.role = 'MDC' if squad.side == side and squad.role == 'MC' else squad.role + else: + for ids in range(len(match.squad)): + squad = match.squad[ids] + squad.role = 'MG' if squad.side == side and squad.role == 'MCG' else squad.role + squad.role = 'MD' if squad.side == side and squad.role == 'MCD' else squad.role + + if len([sq for sq in match.squad if sq.role in ('MG', 'MCG', 'MC', 'MCD', 'MD') and sq.side == side]) > 3: + if len([sq for sq in match.squad if sq.role in ('MDG', 'MDC', 'MDD') and sq.side == side]) == 0: + for ids in range(len(match.squad)): + squad = match.squad[ids] + squad.role = 'MDC' if squad.side == side and squad.role == 'MC' else squad.role + squad.role = 'MDG' if squad.side == side and squad.role == 'MCG' else squad.role + squad.role = 'MDD' if squad.side == side and squad.role == 'MCD' else squad.role + else: + for ids in range(len(match.squad)): + squad = match.squad[ids] + squad.role = 'MOC' if squad.side == side and squad.role == 'MC' else squad.role + squad.role = 'MOG' if squad.side == side and squad.role == 'MG' else squad.role + squad.role = 'MOD' if squad.side == side and squad.role == 'MD' else squad.role + + if len([squad for squad in match.squad if squad.role in ('AG', 'AC', 'AD') and squad.side == side]) > 2: + for id_squad in range(len(match.squad)): + squad = match.squad[id_squad] + squad.role = 'ALG' if squad.side == side and squad.role == 'AG' else squad.role + squad.role = 'ALD' if squad.side == side and squad.role == 'AD' else squad.role + + # get stats + div_stats = soup.find('div', class_='MEDpanelstats') + if div_stats is not None: + match.stats = {name: Stat() for name in cls.STAT_NAMES.values()} + for tr in div_stats.find_all('tr'): + tds = tr.find_all('td') + stat_name = tds[2].text.strip() + stat = Stat(home=int(tds[0].text.strip()), away=int(tds[4].text.strip())) + if stat_name in cls.STAT_NAMES: + match.stats[cls.STAT_NAMES[stat_name]] = stat + elif stat_name == 'Tirs cadrés': + match.stats['attempts'].home += stat.home + match.stats['attempts'].away += stat.away + match.stats['in_attempts'] = Stat(home=stat.home, away=stat.away) + elif stat_name == 'Tirs non cadrés': + match.stats['attempts'].home += stat.home + match.stats['attempts'].away += stat.away + elif stat_name == 'Tirs arrêtés': + match.stats['block_attempts'] = stat + match.stats['attempts'].home += stat.home + match.stats['attempts'].away += stat.away + elif stat_name == 'Tirs sur le poteau': + match.stats['pole_attempts'] = stat + match.stats['attempts'].home += stat.home + match.stats['attempts'].away += stat.away + + # get live comments + table_comms = soup.find('table', id='commentaire') + if table_comms is not None: + for tr in table_comms.find_all('tr'): + tds = tr.find_all('td') + if len(tds) == 3: + span_icon = tds[1].find('span') + minute = tds[0].text.strip().replace('+', "'+") + if minute and not minute.endswith("'") and '+' not in minute: + minute += "'" + comm_type = '' + if span_icon is not None: + icon_class = span_icon.attrs['class'][1] + if icon_class in cls.COMM_TYPES: + comm_type = cls.COMM_TYPES[icon_class] + else: + continue + match.comms.append(Comm(minute=minute, type_=comm_type, text=tds[2].text.strip())) + + @classmethod + def get_league_ranking(cls, league, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + table = soup.find('table', id='tableau_classement') + groups = list() + group = Group(name='0', url=None, league=league) + for tr in table.find_all('tr'): + tds = tr.find_all('td') + + # row titles + if len(tds) == 0: + continue + + # row group + if len(tds) == 1: + group_name = tds[0].text.strip() + for grp in groups: + if grp.name == group_name: + group = grp + break + else: + group = Group(name=group_name, url=None, league=league) + groups.append(group) + continue + + # get team + med_name = tds[0].find('a').contents[-1].strip() + for tm in league.teams: + if 'matchendirect' in tm.names and tm.names['matchendirect'] == med_name: + team = tm + break + else: + continue + + # get rank + th = tr.find('th') + span = th.find('span') + if span is not None: + team.rank = int(span.text.strip()) + else: + team.rank = int(th.text.strip()) + + # get stats + team.group = group + team.points = int(tds[1].text.strip()) + team.played = int(tds[2].text.strip()) + team.wins = int(tds[3].text.strip()) + team.ties = int(tds[4].text.strip()) + team.loss = int(tds[5].text.strip()) + team.g_for = int(tds[6].text.strip()) + team.g_against = int(tds[7].text.strip()) + team.g_diff = int(tds[8].text.strip()) + return groups + + @classmethod + def get_schedule_url(cls, match): + # In matchendirect.fr dates are shifted in 2019 + shift_date = match.start_date + timedelta(days=7) + return '{}/{}'.format(match.league.url.rstrip('/'), shift_date.strftime('%Y-%W')) + + @classmethod + def get_schedule(cls, scheduler, data): + locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8') + html = data.decode() + soup = BeautifulSoup(html, 'html.parser') + date = None + for table in soup.find_all('table', class_='table table-striped table-hover'): + for tr in table.find_all('tr'): + th = tr.find('th') + if th is not None: + date = datetime.strptime(th.text.strip(), '%A %d %B %Y') + elif 'data-matchid' in tr.attrs and date is not None: + td_hour = tr.find('td', class_='lm1') + hours, minutes = td_hour.text.strip().split(':') + if hours.isnumeric() and minutes.isnumeric(): + start_date = date + timedelta(hours=int(hours), minutes=int(minutes)) + else: + start_date = date + td_score = tr.find('td', class_='lm3') + home = td_score.find('span', class_='lm3_eq1').contents[0].strip(' \n\t*') + away = td_score.find('span', class_='lm3_eq2').contents[-1].strip(' \n\t*') + url = urljoin('http://www.matchendirect.fr/', td_score.find('a').attrs['href']) + for match in scheduler.matches: + if match.home.names['matchendirect'] == home and match.away.names['matchendirect'] == away: + match.new_url = url + match.new_start_date = start_date + match.task_done = True + break + + a_previous = soup.find('a', class_='objselect_prevnext objselect_prec') + if a_previous is not None: + scheduler.previous_url = urljoin('http://www.matchendirect.fr/', a_previous.attrs['href']) + a_next = soup.find('a', class_='objselect_prevnext objselect_suiv') + if a_next is not None: + scheduler.next_url = urljoin('http://www.matchendirect.fr/', a_next.attrs['href']) + + @classmethod + def create_schedule(cls, league, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + div_top = soup.find('div', id='filtre_haut') + select_url = div_top.find('select') + if select_url is not None: + selected = False + for option_url in select_url.find_all('option'): + if selected or 'selected' in option_url.attrs: + yield urljoin(league.url, option_url.attrs['value']) + selected = True + elif 'selected' in option_url.attrs: + selected = True + + @classmethod + def _current_mday_round_leg(cls, league, date): + _mday = 0 + _round = None + _leg = 0 + if league.round_dates is not None: + for key, value in league.round_dates.items(): + if date >= datetime.strptime(key, '%Y-%m-%d'): + _mday = value['mday'] + _round = value['round'] + _leg = value['leg'] + return _mday, _round, _leg + + @classmethod + def create_schedule_from_url(cls, league, data): + locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8') + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + date = None + current_mday = 0 + current_round = None + current_leg = 0 + idof10 = 0 + for table in soup.find_all('table', class_='table table-striped table-hover'): + for tr in table.find_all('tr'): + th = tr.find('th') + if th is not None: + date = datetime.strptime(th.text.strip(), '%A %d %B %Y') + _mday, _round, _leg = cls._current_mday_round_leg(league, date) + if _mday != current_mday or _round != current_round or _leg != current_leg: + current_mday, current_round, current_leg = _mday, _round, _leg + idof10 = 0 + + elif 'data-matchid' in tr.attrs and date is not None: + match = Match(idt=0) + match.idof10 = idof10 + match.league = league + match.mday = current_mday + match.round = current_round + match.leg = current_leg + + td_hour = tr.find('td', class_='lm1') + hours, minutes = td_hour.text.strip().split(':') + if hours.isnumeric() and minutes.isnumeric(): + match.start_date = date + timedelta(hours=int(hours), minutes=int(minutes)) + else: + match.start_date = date + + td_score = tr.find('td', class_='lm3') + match.url = urljoin('http://www.matchendirect.fr/', td_score.find('a').attrs['href']) + + home_name = td_score.find('span', class_='lm3_eq1').contents[0].strip(' \n\t*') + match.home = Team(idt=0) + match.home.league = league + match.home.name = home_name + ' F' if league.gender == 'F' else home_name + match.home.short_name = match.home.name[:3].upper() + match.home.long_name = match.home.name + match.home.names = {cls.__name__.lower(): home_name} + match.home.id_sport = league.sport.id + match.home.country = Country(idt=league.country.id) + match.home.gender = league.gender + match.home.images = {'png': 'default-team.png', '50': 'h50-default-team.svg', + '30': 'h30-default-team.svg', '80': 'h80-default-team.svg'} + + away_name = td_score.find('span', class_='lm3_eq2').contents[-1].strip(' \n\t*') + match.away = Team(idt=0) + match.away.league = league + match.away.name = away_name + ' F' if league.gender == 'F' else away_name + match.away.short_name = match.away.name[:3].upper() + match.away.long_name = match.away.name + match.away.names = {cls.__name__.lower(): away_name} + match.away.id_sport = league.sport.id + match.away.country = Country(idt=league.country.id) + match.away.gender = league.gender + match.away.images = {'png': 'default-team.png', '50': 'h50-default-team.svg', + '30': 'h30-default-team.svg', '80': 'h80-default-team.svg'} + idof10 += 1 + yield match diff --git a/providers/programmetelevision.py b/providers/programmetelevision.py new file mode 100644 index 0000000..e81957b --- /dev/null +++ b/providers/programmetelevision.py @@ -0,0 +1,19 @@ +from providers.base import BaseProvider + +from bs4 import BeautifulSoup + +from lib.tvchannel import TvChannel + + +class ProgrammeTelevision(BaseProvider): + + DOMAINS = {'www.programme-television.org'} + CHARSET = 'utf-8' + + @classmethod + def get_tvchannels(cls, source, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + for li in soup.find_all('li', class_='col-md-2'): + channel_name = li.find('span', class_='titre').text + source.tv_channels.append(TvChannel(id_country=source.id_country, name=channel_name)) diff --git a/providers/transfermarkt.py b/providers/transfermarkt.py new file mode 100644 index 0000000..054e3e4 --- /dev/null +++ b/providers/transfermarkt.py @@ -0,0 +1,156 @@ +from urllib.parse import urlsplit +from bs4 import BeautifulSoup +from datetime import datetime +import locale + +from providers.base import BaseProvider +from lib.player import Player + + +class TransferMarkt(BaseProvider): + + DOMAINS = {'www.transfermarkt.fr'} + CHARSET = 'UTF-8' + + ROLES = { + 'Gardien': Player.ROLE_GOALKEEPER, + 'Défense': Player.ROLE_DEFENDER, + 'Milieu de terrain': Player.ROLE_MIDFIELDER, + 'Attaquant': Player.ROLE_ATTACKER + } + FEET = { + 'droit': Player.FOOT_RIGHT, + 'gauche': Player.FOOT_LEFT, + 'des deux pieds': Player.FOOT_BOTH + } + + @classmethod + def get_team_staff(cls, data): + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + select = soup.find('select', id='spieler_select_breadcrumb') + staff = dict() + for optgroup in select.find_all('optgroup'): + players = list() + for option in optgroup.find_all('option'): + player_split = option.text.strip().split(' ') + player_name = ' '.join(player_split[1:]) + players.append(player_name) + if optgroup.attrs['label'] == 'Gardien': + staff['goalkeepers'] = players + elif optgroup.attrs['label'] == 'Défense': + staff['defenders'] = players + elif optgroup.attrs['label'] == 'Milieu de terrain': + staff['midfielders'] = players + else: + staff['attackers'] = players + return staff + + @classmethod + def get_team_players(cls, data, team, countries): + locale.setlocale(locale.LC_ALL, 'fr_FR.utf-8') + html = data.decode(cls.CHARSET) + soup = BeautifulSoup(html, 'html.parser') + table = soup.find('div', id='yw1') + if table is not None: + for tr in table.find_all('tr'): + tds = tr.find_all('td', recursive=False) + if len(tds) == 10: + + player = Player(team=team, error=list()) + + number = tds[0].text.strip() + if number.isnumeric(): + player.number = int(number) + elif number != '-': + player.error.append("bad format number '{}'".format(number)) + + role = tds[0].attrs.get('title') + if role in cls.ROLES: + player.role = cls.ROLES[role] + else: + player.error.append("bad format role '{}'".format(role)) + + birth_date = tds[2].text.split('(')[0].strip().replace('avr.', 'avril') + try: + player.set_age(datetime.strptime(birth_date, '%d %b %Y')) + except ValueError: + player.error.append("bad format birth_date '{}'".format(birth_date)) + + imgs = tds[3].find_all('img') + if len(imgs) > 0: + for country in countries: + if country.names['transfermarkt'] == imgs[0].attrs.get('alt'): + player.country1 = country + break + else: + player.error.append("unknown country1 '{}'".format(imgs[0].attrs.get('title'))) + if len(imgs) > 1: + for country in countries: + if country.names['transfermarkt'] == imgs[1].attrs.get('alt'): + player.country2 = country + break + else: + player.error.append("unknown country2 '{}'".format(imgs[1].attrs.get('title'))) + else: + player.error.append("no country found") + + size = tds[4].text.split('m')[0].strip().replace(',', '') + if size.isnumeric(): + player.size = int(size) + elif size: + player.error.append("bad format size '{}'".format(size)) + + foot = tds[5].text.strip() + if foot in cls.FEET: + player.foot = cls.FEET[foot] + elif foot != '-': + player.error.append("bad format foot '{}'".format(foot)) + + contract_date = tds[8].text.strip().replace('avr.', 'avril') + if contract_date != '-': + try: + player.contract_end = datetime.strptime(contract_date, '%d.%m.%Y') + except ValueError: + player.error.append("bad format contract_end '{}'".format(contract_date)) + + price = tds[9].text.strip() + if price.endswith('mio. €'): + try: + player.set_price(int(float(price.split(' ')[0].replace(',', '.')) * 1e6)) + except ValueError: + player.error.append("price '{}' bad format".format(price)) + elif price.endswith('K €'): + try: + player.set_price(int(float(price.split(' ')[0].replace(',', '.')) * 1e3)) + except ValueError: + player.error.append("price '{}' bad format".format(price)) + elif price != '-': + player.error.append("bad format price '{}'".format(price)) + + name_trs = tds[1].find_all('tr') + if len(name_trs) > 0: + span_name = name_trs[0].find('span', class_='hide-for-small') + if span_name is not None: + player.set_names(span_name.text.strip()) + else: + player.error.append("span containing full name not found") + + image = name_trs[0].find('img', class_='bilderrahmen-fixed') + if image is not None: + image_url = image.attrs.get('src') + player.set_image(image_url) + player.image.set_lm(urlsplit(image_url).query.replace('lm=', '')) + else: + player.error.append('no image found') + + if len(name_trs) > 1: + player.position = name_trs[1].text.strip() + else: + player.error.append("tr containing position not found") + else: + player.error.append("tr containing full name not found") + + if not player.error: + player.error = None + yield player diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..f6bb49c --- /dev/null +++ b/readme.md @@ -0,0 +1,50 @@ +## 1- INTRODUCTION + +CronPY is a project made in Python to scrap different types of data about sports, such as : +- all news about sports +- schedules and scores +- team staff +- details about players +- tv schedule + +For that, different technologies have been used : +- `Python3.6` (with `argparse`, `aiohttp`, `asyncio`, `beautifulsoup`, `selenium`) +- `Redis` (used for multi-process locks) +- `Mysql` (used to store data) +- `InfluxDB` (used to store details about each program execution) + +Several websites are stored in order to gather different types of data : +- eurosport.fr +- rugbyrama.fr +- fftt.com +- footao.tv +- football.fr +- football365.fr +- football-direct.com +- footmercato.net +- lequipe.fr +- matchendirect.fr +- programme-television.org +- transfermarkt.fr + +All these data are collected in nonprofit purpose for `bestofbets.net`, a website made for free sports +predictions between friends. This is not maintained as I have started a new website to replace it : `1bet.fr` +(made with `Python Django Framework` and `PostgreSQL`). +I decline any responsibility about your eventual usages of this project. + + +## 2- DEPLOYMENT + +The deployment is quite basic, go on your project directory and execute these commands : + + python3.6 -m venv venv + source venv/bin/activate + pip install --upgrade pip + pip install -r doc/requirements.txt + deactivate + mkdir sam_aiohttp + cp -r venv/lib/python3.6/site-packages/aiohttp/* sam_aiohttp/ + patch -p0 < doc/aiohttp.diff + +MySQL, InfluxDB and Redis databases are needed for this program, as well as some environment variables, +all clearly listed in `setting.py`. \ No newline at end of file diff --git a/scripts/flash_set_league_matches.py b/scripts/flash_set_league_matches.py new file mode 100644 index 0000000..0d83c78 --- /dev/null +++ b/scripts/flash_set_league_matches.py @@ -0,0 +1,62 @@ +from time import sleep +from urllib.parse import urljoin + +from core.mysqldb import MysqlDB +from lib.browser import Browser +from lib.league import League + +paths = { + # 'resultats': 'fs-results', + 'calendrier': 'fs-fixtures' +} + +db = None +browser = None +try: + mysqldb = MysqlDB() + browser = Browser(user_agent=db.get_random_ua(), headless=False) + + for league in League.get_leagues(mysqldb): + if 'flashresultats' in league.urls: + print('~~~ League {} - {} ~~~'.format(league.id, league.name)) + matches = list(league.get_matches(db)) + + for path, dom_id in paths.items(): + url = '{}/{}'.format(league.urls['flashresultats'].rstrip('/'), path) + print('~~~ URL {} ~~~'.format(url)) + browser.get(url) + for _ in range(5): + browser.execute_script('loadMoreGames();') + sleep(5) + + mday = None + for tr in browser.find_elements_by_xpath('//div[@id="{}"]//tr'.format(dom_id)): + tds = tr.find_elements_by_tag_name('td') + if len(tds) == 6: + home = tds[2].find_element_by_tag_name('span').text.strip() + away = tds[3].find_element_by_tag_name('span').text.strip() + id_ = tr.get_attribute('id').split('_')[2] + url = urljoin(league.urls['flashresultats'], '/match/{}'.format(id_)) + for match in matches: + if match.mday == mday: + if match.home.names['flashresultats'] == home: + if match.away.names['flashresultats'] == away: + match.urls['flashresultats'] = url + match.store_urls(db) + print('[+] match {} - {}'.format(match.id, match.urls['flashresultats'])) + break + else: + print('[-] match {} not found ({} - {})'.format(id_, home, away)) + elif len(tds) == 1: + mday = int(tds[0].text.split(' ')[1]) + print('~~~ MDAY {} ~~~'.format(mday)) + +except BaseException as err: + print('ERROR {}: {}'.format(type(err), err)) + +finally: + if browser is not None: + browser.quit() + if db is not None: + db.commit() + db.close() diff --git a/scripts/flash_set_league_teams.py b/scripts/flash_set_league_teams.py new file mode 100644 index 0000000..b451de0 --- /dev/null +++ b/scripts/flash_set_league_teams.py @@ -0,0 +1,69 @@ +import asyncio +from time import time +from urllib.parse import urljoin + +from aiohttp import ClientSession +from bs4 import BeautifulSoup + +from lib.league import League +from core.mysqldb import MysqlDB +import setting + + +def store_league_teams_urls_from_flash(league, data, db): + teams = league.get_teams(db) + html = data.decode() + soup = BeautifulSoup(html, 'html.parser') + div_teams = soup.find('div', id='tournament-page-participants') + for elt in div_teams.find_all('a'): + for team in teams: + if elt.text == team.name or elt.text in team.names.values(): + team.names['flashresultats'] = elt.text + team.urls['flashresultats'] = urljoin(league.urls['flashresultats'], elt.attrs['href']) + team.store_names_and_urls(db) + print('[+] {}: {} {}'.format(team.name, team.names['flashresultats'], team.urls['flashresultats'])) + break + else: + print('[-] {} not found'.format(elt.text)) + + +async def fetch(session, semaphore, league): + print('[+] League {} - {}'.format(league.id, league.name)) + async with semaphore, session.get('{}/equipes'.format(league.urls['flashresultats'].rstrip('/'))) as response: + return league, await response.read() + + +async def run(): + # Init variables + start = int(time()) + mysqldb = MysqlDB() + user_agent = mysqldb.get_random_ua() + semaphore = asyncio.Semaphore(setting.SEMAPHORE) + + # Create and launch tasks + async with ClientSession(headers={'User-Agent': user_agent}) as session: + tasks = [ + asyncio.ensure_future(fetch(session, semaphore, league)) + for league in League.get_leagues(db=mysqldb) + ] + responses = await asyncio.gather(*tasks) + + # Get teams urls and names + for league, data in responses: + store_league_teams_urls_from_flash(league, data, mysqldb) + + # Save results + end = int(time()) + print('[X] job done in {} seconds'.format(end-start)) + mysqldb.commit() + mysqldb.close() + + +def main(): + loop = asyncio.get_event_loop() + future = asyncio.ensure_future(run()) + loop.run_until_complete(future) + + +if __name__ == '__main__': + main() diff --git a/scripts/flash_set_teams.py b/scripts/flash_set_teams.py new file mode 100644 index 0000000..72eada9 --- /dev/null +++ b/scripts/flash_set_teams.py @@ -0,0 +1,80 @@ +import asyncio +from time import time + +from aiohttp import ClientSession +from bs4 import BeautifulSoup + +from core.mysqldb import MysqlDB +from lib.team import Team +import setting + +staff_keys = { + 'Gardiens': 'goalkeepers', + 'Défenseurs': 'defenders', + 'Milieux': 'midfielders', + 'Attaquants': 'attackers', + 'Entraineur': 'coach' +} + + +def store_team_staff_from_flash(team, data, db): + html = data.decode() + soup = BeautifulSoup(html, 'html.parser') + team.staff = dict() + current_key = None + for tr in soup.find_all('tr'): + if tr.attrs['class'][0] == 'player-type-title': + key = tr.find('td').text.strip() + if key in staff_keys: + current_key = staff_keys[key] + team.staff[current_key] = list() + # print('{}: {}'.format(team.name, current_key)) + elif tr.attrs['class'][0] in ('player', 'coach'): + staff_name = tr.find('td', class_='player-name').find('a').text.strip() + team.staff[current_key].append(staff_name) + # print('{}: {}'.format(team.name, staff_name)) + print('[+] Team #{} - {} : {}'.format( + team.id, team.name, ' '.join(['{} {}'.format(len(value), key) for key, value in team.staff.items()])) + ) + team.store_staff(db=db) + + +async def fetch(session, semaphore, team): + async with semaphore, session.get('{}/effectif'.format(team.urls['flashresultats'].rstrip('/'))) as response: + return team, await response.read() + + +async def run(): + # Init variables + start = int(time()) + mysqldb = MysqlDB() + user_agent = mysqldb.get_random_ua() + semaphore = asyncio.Semaphore(setting.SEMAPHORE) + + # Create and launch tasks + async with ClientSession(headers={'User-Agent': user_agent}) as session: + tasks = [ + asyncio.ensure_future(fetch(session, semaphore, team)) + for team in Team.get_teams(db=mysqldb, url='flashresultats') + ] + responses = await asyncio.gather(*tasks) + + # Get staff from html + for team, data in responses: + store_team_staff_from_flash(team=team, data=data, db=mysqldb) + + # Save results + end = int(time()) + print('[X] job done in {} seconds'.format(end-start)) + mysqldb.commit() + mysqldb.close() + + +def main(): + loop = asyncio.get_event_loop() + future = asyncio.ensure_future(run()) + loop.run_until_complete(future) + + +if __name__ == '__main__': + main() diff --git a/scripts/png2svg.py b/scripts/png2svg.py new file mode 100644 index 0000000..8f4e607 --- /dev/null +++ b/scripts/png2svg.py @@ -0,0 +1,68 @@ +from base64 import b64encode +from os import listdir + +from setting import IMAGES_FOLDER + +header = """""" +start_tag = """ + +""" +end_tag = """""" + + +for filename in listdir('{}/team/'.format(IMAGES_FOLDER)): + if filename[-4:] == '.png': + png_name = '{}/team/{}'.format(IMAGES_FOLDER, filename) + with open(png_name, 'rb') as png_file: + b64res = b64encode(png_file.read()).decode() + image_tag = """ + + """ + + for size in (30, 50, 80): + svg_name = '{}/team/h{}-'.format(IMAGES_FOLDER, size) + filename.replace('.png', '.svg') + svg_res = header + start_tag.format(size=size) + image_tag.format(size=size, b64res=b64res) + end_tag + with open(svg_name, 'w') as svg_file: + svg_file.write(svg_res) + + +for filename in listdir('{}/league/'.format(IMAGES_FOLDER)): + if filename[-4:] == '.png': + png_name = '{}/league/{}'.format(IMAGES_FOLDER, filename) + with open(png_name, 'rb') as png_file: + b64res = b64encode(png_file.read()).decode() + image_tag = """ + + """ + + for size in (35, 50, 80): + svg_name = '{}/league/h{}-'.format(IMAGES_FOLDER, size) + filename.replace('.png', '.svg') + svg_res = header + start_tag.format(size=size) + image_tag.format(size=size, b64res=b64res) + end_tag + with open(svg_name, 'w') as svg_file: + svg_file.write(svg_res) + + +for filename in listdir('{}/logo/'.format(IMAGES_FOLDER)): + if filename == 'bob-logo.png': + png_name = '{}/logo/{}'.format(IMAGES_FOLDER, filename) + with open(png_name, 'rb') as png_file: + b64res = b64encode(png_file.read()).decode() + image_tag = """ + + """ + + for size in (40, 110): + svg_name = '{}/logo/h{}-'.format(IMAGES_FOLDER, size) + filename.replace('.png', '.svg') + svg_res = header + start_tag.format(size=size) + image_tag.format(size=size, b64res=b64res) + end_tag + with open(svg_name, 'w') as svg_file: + svg_file.write(svg_res) diff --git a/setting.py b/setting.py new file mode 100644 index 0000000..1fa64e2 --- /dev/null +++ b/setting.py @@ -0,0 +1,37 @@ +import os + +MYSQL_HOST = os.getenv('MYSQL_HOST') +MYSQL_USER = os.getenv('MYSQL_USER') +MYSQL_PASS = os.getenv('MYSQL_PASS') +MYSQL_BASE = os.getenv('MYSQL_BASE') +MYSQL_PORT = int(os.getenv('MYSQL_PORT', 0)) +MYSQL_SAVE = True + +REDIS_HOST = os.getenv('REDIS_HOST') +REDIS_PORT = int(os.getenv('REDIS_PORT', 0)) +REDIS_BASE = int(os.getenv('REDIS_BASE', 0)) + +INFLUX_HOST = os.getenv('INFLUX_HOST') +INFLUX_PORT = int(os.getenv('INFLUX_PORT', 0)) +INFLUX_BASE = int(os.getenv('INFLUX_BASE')) + +LOG_LEVEL = 'INFO' + +PROXY = 'http://127.0.0.1:8118' +SEMAPHORE = 20 +WORKERS = 3 + +IMAGES_FOLDER = os.getenv('IMAGES_FOLDER') +GECKODRIVER_PATH = '/usr/local/bin/geckodriver' +FIREFOX_PATH = '/usr/local/bin/firefox' + +ID_LEAGUE = None +ID_MATCH = None +ID_TEAM = None +ID_USER = None +ARGV = None + +try: + from setting_local import * +except ImportError: + pass diff --git a/update_admin_bets.py b/update_admin_bets.py new file mode 100755 index 0000000..639ae6c --- /dev/null +++ b/update_admin_bets.py @@ -0,0 +1,83 @@ +from traceback import format_exc +import random + +from core.webutils import WebUtils +from lib.user import User + + +class UpdateAdminBets(WebUtils): + + def __init__(self): + super().__init__(module_='update_admin_bets') + league_users = list(User.get_users_league_bets(db=self.mysqldb, admin=True)) + europe_users = list(User.get_users_europe_bets(db=self.mysqldb, admin=True)) + self.nb_tasks = len(league_users) + len(europe_users) + self.start() + self.update_admin_league(users=league_users) + self.update_admin_europe(users=europe_users) + self.end() + + def update_admin_league(self, users): + self.logger.info('[*] Update users league bets') + for user in users: + try: + self.logger.debug('user {} ({}): start'.format(user.id, user.name)) + id_month = 0 + for mday, matches in user.league_bets_matches.items(): + for match in matches: + id_month = match.id_month + coeff_home = match.home.coeff + coeff_away = match.away.coeff + coeff_tie = (coeff_home + coeff_away) // 3 + rd_choice = random.choice(range(coeff_home + coeff_away + coeff_tie)) + if rd_choice < coeff_home: + bet = '1' + elif rd_choice < coeff_home + coeff_away: + bet = '2' + else: + bet = 'X' + user.league_bets[mday][match.idof10] = bet + self.logger.debug('match {} - bet {}'.format(match.id, bet)) + user.set_league_bets(id_month=id_month, db=self.mysqldb) + + except BaseException as e: + self.logger.error('[-] user {} ({}): {} - {}\n{}'.format(user.id, user.name, type(e), e, format_exc())) + self.errors += 1 + else: + self.logger.info('[+] user {} ({}): OK'.format(user.id, user.name)) + self.tasks_done += 1 + self.quantity += 1 + + def update_admin_europe(self, users): + self.logger.info('[*] Update users europe bets') + for user in users: + try: + self.logger.debug('user {} ({}): start'.format(user.id, user.name)) + id_month = 0 + for match in user.europe_bets_matches: + if match is not None: + id_month = match.id_month + score_home = random.choice((0, 1, 1, 2, 2, 3)) + score_away = random.choice((0, 0, 1, 1, 2, 3)) + user.europe_scores[match.idof4]['home'] = score_home + user.europe_scores[match.idof4]['away'] = score_away + user.europe_scorers[match.idof4]['home'] = random.choices( + match.home.staff['attackers'], k=score_home + ) + user.europe_scorers[match.idof4]['away'] = random.choices( + match.away.staff['attackers'], k=score_away + ) + self.logger.debug('match {} - bet {}:{}'.format(match.id, score_home, score_away)) + user.set_europe_bets(id_month=id_month, db=self.mysqldb) + + except BaseException as e: + self.logger.error('[-] user {} ({}): {} - {}\n{}'.format(user.id, user.name, type(e), e, format_exc())) + self.errors += 1 + else: + self.logger.info('[+] user {} ({}): OK'.format(user.id, user.name)) + self.tasks_done += 1 + self.quantity += 1 + + +if __name__ == '__main__': + UpdateAdminBets() diff --git a/update_images.py b/update_images.py new file mode 100755 index 0000000..dec05e6 --- /dev/null +++ b/update_images.py @@ -0,0 +1,102 @@ +from traceback import format_exc +import os.path + +from core.webutils import WebUtils +from lib.league import League +from lib.team import Team +import setting + + +class UpdateImages(WebUtils): + + TEAM_SIZES = {'30', '50', '80'} + LEAGUE_SIZES = {'35', '50', '80'} + + def __init__(self): + super().__init__(module_='update_images') + teams = list(Team.get_teams(db=self.mysqldb, origin=self.module, id_team=self.args.id_team)) + leagues = list(League.get_leagues(db=self.mysqldb, origin=self.module, id_league=self.args.id_league)) + self.nb_tasks = len(teams) + len(leagues) + self.start() + self.update_team_images(teams) + self.update_league_images(leagues) + self.end() + + def update_team_images(self, teams): + self.logger.info('[*] Update team images') + for team in teams: + try: + images = dict() + + defpath = '{}/team/default-team.png'.format(setting.IMAGES_FOLDER) + images['png'] = 'default-team.png?v={}'.format(int(os.path.getmtime(defpath))) + path = '{}/team/{}.png'.format(setting.IMAGES_FOLDER, team.id) + if os.path.exists(path): + images['png'] = '{}.png?v={}'.format(team.id, int(os.path.getmtime(path))) + + for size in self.TEAM_SIZES: + defpath = '{}/team/h{}-default-team.svg'.format(setting.IMAGES_FOLDER, size) + images[size] = 'h{}-default-team.svg?v={}'.format(size, int(os.path.getmtime(defpath))) + path = '{}/team/h{}-{}.svg'.format(setting.IMAGES_FOLDER, size, team.id) + if os.path.exists(path): + images[size] = 'h{}-{}.svg?v={}'.format(size, team.id, int(os.path.getmtime(path))) + elif team.id_sport == 2: + path = '{}/country/h{}-{}.svg'.format(setting.IMAGES_FOLDER, size, team.country.id) + if os.path.exists(path): + images[size] = '../country/h{}-{}.svg?v={}'.format( + size, team.country.id, int(os.path.getmtime(path)) + ) + if images != team.images: + team.images = images + team.store_images(db=self.mysqldb) + + except BaseException as e: + team.error = format_exc() + self.logger.error('[-] team {}: {} - {}\n{}'.format(team.id, type(e), e, team.error)) + self.errors += 1 + else: + team.error = None + self.logger.info('[+] team {}: OK'.format(team.id)) + self.quantity += 1 + self.tasks_done += 1 + finally: + team.store_error(db=self.mysqldb) + + def update_league_images(self, leagues): + self.logger.info('[*] Update league images') + for league in leagues: + try: + images = dict() + + defpath = '{}/league/default-league.png'.format(setting.IMAGES_FOLDER) + images['png'] = 'default-league.png?v={}'.format(int(os.path.getmtime(defpath))) + path = '{}/league/{}.png'.format(setting.IMAGES_FOLDER, league.id) + if os.path.exists(path): + images['png'] = '{}.png?v={}'.format(league.id, int(os.path.getmtime(path))) + + for size in self.LEAGUE_SIZES: + defpath = '{}/league/h{}-default-league.svg'.format(setting.IMAGES_FOLDER, size) + images[size] = 'h{}-default-league.svg?v={}'.format(size, int(os.path.getmtime(defpath))) + path = '{}/league/h{}-{}.svg'.format(setting.IMAGES_FOLDER, size, league.id) + if os.path.exists(path): + images[size] = 'h{}-{}.svg?v={}'.format(size, league.id, int(os.path.getmtime(path))) + + if images != league.images: + league.images = images + league.store_images(db=self.mysqldb) + + except BaseException as e: + league.error = format_exc() + self.logger.error('[-] league {}: {} - {}\n{}'.format(league.id, type(e), e, league.error)) + self.erros += 1 + else: + league.error = None + self.logger.info('[+] league {}: OK'.format(league.id)) + self.quantity += 1 + self.tasks_done += 1 + finally: + league.store_error(db=self.mysqldb) + + +if __name__ == '__main__': + UpdateImages() diff --git a/update_month.py b/update_month.py new file mode 100644 index 0000000..af9ba5b --- /dev/null +++ b/update_month.py @@ -0,0 +1,130 @@ +from collections import OrderedDict +from datetime import date + +from lib.tools import n2v, real_round +from core.webutils import WebUtils +from lib.user import User +from lib.month import Month + + +class UpdateMonth(WebUtils): + + def __init__(self): + super().__init__(module_='update_month') + current_month, next_month = self.get_current_months() + if current_month.end <= date.today(): + users = list(User.get_users_results(month=current_month, db=self.mysqldb)) + self.nb_tasks = len(users) + self.start() + self.save_users_results(month=current_month, users=users) + current_month.toggle_current(db=self.mysqldb) + current_month.close_bets(db=self.mysqldb) + next_month.toggle_current(db=self.mysqldb) + self.end() + else: + self.logger.info('[X] Cron unnecessary before {}'.format(current_month.end.strftime('%Y-%m-%d'))) + + def save_users_results(self, month, users): + year = Month.current_year(self.mysqldb) + for user in users: + self.logger.info('[+] user {}: {}'.format(user.id, user.name)) + self.quantity += 1 + record = { + 'id_league': 0, + 'id_team': 0, + 'league_rank': 0, + 'league_nbusers': 0, + 'league_points': 0, + 'europe': user.europe, + 'europe_round': n2v(user.europe_round) + } + if user.id_league > 0 and user.league_points > 0: + record['id_league'] = user.id_league + record['id_team'] = user.id_team + record['league_rank'] = user.league_rank + record['league_points'] = user.league_points + record['league_nbusers'] = len( + [pl for pl in users if pl.id_league == user.id_league and pl.league_points > 0] + ) + user.records.setdefault(str(year), dict()) + user.records[str(year)][str(month.id)] = record + user.store_records(db=self.mysqldb) + self.logger.debug('record: {}'.format(record)) + + if month.rank: + user.bob_details = OrderedDict() + user.bob_points = 0 + _id_month = month.id + _year = year + for _ in range(10): + league_points = 0 + europe_points = 0 + if str(_year) in user.records and str(_id_month) in user.records[str(_year)]: + nb_users = user.records[str(_year)][str(_id_month)]['league_nbusers'] + rank = user.records[str(_year)][str(_id_month)]['league_rank'] + league_points = real_round(float(100 * nb_users) / (2 ** (rank - 1))) + europe = user.records[str(_year)][str(_id_month)]['europe'] + europe_round = user.records[str(_year)][str(_id_month)]['europe_round'] + europe_points_dict = { + 91: {'winner': 1000, 'final': 500, 'semi': 250, 'quarter': 125}, + 92: {'winner': 600, 'final': 300, 'semi': 150, 'quarter': 75} + } + europe_points = europe_points_dict[europe][europe_round] if europe in europe_points_dict else 0 + + user.bob_details[str(_id_month)] = { + 'league': league_points, + 'europe': europe_points + } + user.bob_points += league_points + user.bob_points += europe_points + + if _id_month == 1: + _id_month = 12 + elif _id_month == 8: + _id_month = 5 + _year -= 1 + else: + _id_month -= 1 + self.logger.debug('bob_details: {}'.format(user.bob_details)) + self.logger.debug('bob_points: {}'.format(user.bob_points)) + self.tasks_done += 1 + + if month.rank: + self.logger.info('[*] BOB Ranking') + users.sort(key=lambda x: x.bob_points, reverse=True) + rank = 0 + for user in users: + rank += 1 + user.bob_rank = rank + self.logger.debug('{}. {} - {}'.format(user.bob_rank, user.name, user.bob_points)) + user.store_bob_rank(db=self.mysqldb) + + def get_current_months(self): + current_month = None + next_month = None + done = False + + months = list(Month.get_months(db=self.mysqldb)) + for month in months: + if done and month.bets: + next_month = month + break + elif month.current: + current_month = month + done = True + + if current_month is None: + raise NameError('No current month found !') + if next_month is None: + for month in months: + if month.bets: + next_month = month + break + if next_month is None: + raise NameError('No next month found !') + + return current_month, next_month + + +if __name__ == '__main__': + UpdateMonth() diff --git a/update_news.py b/update_news.py new file mode 100755 index 0000000..20fe3c6 --- /dev/null +++ b/update_news.py @@ -0,0 +1,77 @@ +from traceback import format_exc + +from providers.controller import ProviderController +from lib.news import NewsSource +from core.webutils import WebUtils +import setting + + +class UpdateNews(WebUtils): + + def __init__(self): + super().__init__(module_='update_news') + news_sources = list(NewsSource.get_sources(db=self.mysqldb)) + self.nb_tasks = len(news_sources) + self.start() + self.run(news_sources, self.update_news_from_source) + self.end() + + def update_news_from_source(self, news_source, data): + try: + provider = ProviderController.get_provider(news_source.url) + newss = list() + for news in provider.get_newss_from_source(news_source, data): + news.store(db=self.mysqldb) + if news.id > 0: # News is not already existing + newss.append(news) + if len(newss) > 2 * setting.SEMAPHORE: + break + else: + break # News already added so stop to prevent duplicate entries + + self.run(newss, self.update_news_content) + self.run([news.image for news in newss if news.image.url is not None], self.update_news_image) + + except BaseException as e: + news_source.error = format_exc() + self.logger.error('[-] news_source {}: {} - {}\n{}'.format(news_source.id, type(e), e, news_source.error)) + else: + news_source.error = None + self.logger.info('[+] news source {}: OK'.format(news_source.id)) + self.tasks_done += 1 + finally: + news_source.store_error(db=self.mysqldb) + + def update_news_content(self, news, data): + try: + provider = ProviderController.get_provider(news.url) + provider.get_news_content(news, data) + news.store_content(db=self.mysqldb) + + except BaseException as e: + news.error = format_exc() + self.logger.error('[-] news content {}: {} - {}\n{}'.format(news.id, type(e), e, news.error)) + self.errors += 1 + else: + news.error = None + self.logger.info('[+] news content {}: OK'.format(news.id)) + self.quantity += 1 + finally: + news.store_error(db=self.mysqldb) + + def update_news_image(self, news_image, data): + try: + with open(news_image.abspath, 'wb') as file: + file.write(data) + news_image.store(db=self.mysqldb) + + except BaseException as e: + self.logger.error('[-] news image {}: {} - {}\n{}'.format(news_image.id_news, type(e), e, format_exc())) + self.errors += 1 + else: + self.logger.info('[+] news image {}: OK'.format(news_image.id_news)) + self.quantity += 1 + + +if __name__ == '__main__': + UpdateNews() diff --git a/update_notifications.py b/update_notifications.py new file mode 100755 index 0000000..0503a32 --- /dev/null +++ b/update_notifications.py @@ -0,0 +1,79 @@ +from traceback import format_exc +from datetime import timedelta + +from lib.notification import Notification +from core.webutils import WebUtils +from lib.user import User + + +class UpdateNotifications(WebUtils): + + def __init__(self): + super().__init__(module_='update_notifications') + league_users = list(User.get_users_league_bets(db=self.mysqldb)) + europe_users = list(User.get_users_europe_bets(db=self.mysqldb)) + self.nb_tasks = len(league_users) + len(europe_users) + self.start() + self.update_league_notifications(league_users) + self.update_europe_notifications(europe_users) + self.end() + + def update_league_notifications(self, users): + self.logger.info('[*] Update users league notifications') + for user in users: + if user.notifications is not None and 'league' is user.notifications: + try: + for mday, matches in user.league_bets_matches.items(): + if matches: + for method, hours in user.notifications['league'].items(): + for hour in hours: + date_diff = matches[0].start_date - timedelta(hours=hour) + notif = Notification( + id_user=user.id, + id_match=matches[0].id, + date=date_diff, + mday_or_round=str(mday), + type_='league', + method=method + ) + notif.save(db=self.mysqldb) + + except BaseException as e: + self.logger.error('[-] user {}: {} - {}\n{}'.format(user.id, type(e), e, format_exc())) + self.errors += 1 + else: + self.logger.info('[+] user {}: OK'.format(user.id)) + self.tasks_done += 1 + self.quantity += 1 + + def update_europe_notifications(self, users): + self.logger.info('[*] Update users europe notifications') + for user in users: + if user.notifications is not None and 'europe' is user.notifications: + try: + matches = user.europe_bets_matches + if matches: + for method, hours in user.notifications['europe'].items(): + for hour in hours: + date_diff = matches[0].start_date - timedelta(hours=hour) + notif = Notification( + id_user=user.id, + id_match=matches[0].id, + date=date_diff, + mday_or_round=str(user.europe_round), + type_='europe', + method=method + ) + notif.save(db=self.mysqldb) + + except BaseException as e: + self.logger.error('[-] user {}: {} - {}\n{}'.format(user.id, type(e), e, format_exc())) + self.errors += 1 + else: + self.logger.info('[+] user {}: OK'.format(user.id)) + self.tasks_done += 1 + self.quantity += 1 + + +if __name__ == '__main__': + UpdateNotifications() diff --git a/update_players.py b/update_players.py new file mode 100644 index 0000000..164cd0f --- /dev/null +++ b/update_players.py @@ -0,0 +1,67 @@ +from traceback import format_exc +import os.path + +from providers.controller import ProviderController +from core.webutils import WebUtils +from lib.country import Country +from lib.player import Player +from lib.team import Team +import setting + + +class UpdatePlayers(WebUtils): + + def __init__(self): + super().__init__(module_='update_players') + teams = list(Team.get_teams(origin=self.module, id_team=self.args.id_team, db=self.mysqldb)) + self.images = list() + self.countries = list(Country.get_countries(self.mysqldb)) + self.nb_tasks = len(teams) + self.start() + self.run(teams, self.update_players) + self.run(self.images, self.update_image) + self.end() + + def update_players(self, team, data): + try: + provider = ProviderController.get_provider(team.url) + for player in provider.get_team_players(data, team, self.countries): + player.store(self.mysqldb) + player.get_image_details(self.mysqldb) + if player.image.last_save < player.image.last_modified and 'default.jpg' not in player.image.url: + self.images.append(player.image) + + except BaseException as e: + team.error = format_exc() + self.logger.error('[-] team {}: {} - {}\n{}'.format(team.id, type(e), e, team.error)) + self.errors += 1 + else: + team.error = None + self.logger.info('[+] team {}: OK'.format(team.id)) + self.tasks_done += 1 + self.quantity += 1 + finally: + team.store_error(self.mysqldb) + + def update_image(self, image, data): + player = Player(full_name=image.full_name, birth_date=image.birth_date) + player.image = image + try: + with open(os.path.join(setting.IMAGES_FOLDER, 'player', str(image.path)), 'wb') as fp: + fp.write(data) + player.store_image(self.mysqldb) + + except BaseException as e: + player.error = format_exc() + self.logger.error('[-] player {}: {} - {}\n{}'.format(player.full_name, type(e), e, player.error)) + self.errors += 1 + else: + player.error = None + self.logger.info('[+] player {}: OK'.format(player.full_name)) + self.quantity += 1 + finally: + player.store_error(self.mysqldb) + + +if __name__ == '__main__': + UpdatePlayers() diff --git a/update_preprod.py b/update_preprod.py new file mode 100755 index 0000000..4e3b0d1 --- /dev/null +++ b/update_preprod.py @@ -0,0 +1,33 @@ +from traceback import format_exc +import sys + +from core.webutils import WebUtils + + +class UpdatePreprod(WebUtils): + + def __init__(self): + super().__init__(module_='update_preprod') + tables = list(self.mysqldb.get_tables(self.args.table)) + self.nb_tasks = len(tables) + self.start() + self.update_tables(tables) + self.end() + + def update_tables(self, tables): + for table in tables: + try: + self.mysqldb.exec('DROP TABLE IF EXISTS preprod.{table}'.format(table=table)) + self.mysqldb.exec('CREATE TABLE preprod.{table} LIKE bob.{table}'.format(table=table)) + self.mysqldb.exec('INSERT INTO preprod.{table} SELECT * FROM bob.{table}'.format(table=table)) + except BaseException as e: + self.logger.error('[-] table {}: {} - {}\n{}'.format(table, type(e), e, format_exc())) + self.errors += 1 + else: + self.logger.info('[+] table {}: OK'.format(table)) + self.tasks_done += 1 + self.quantity += 1 + + +if __name__ == '__main__': + UpdatePreprod() diff --git a/update_rankings.py b/update_rankings.py new file mode 100755 index 0000000..bbd97fc --- /dev/null +++ b/update_rankings.py @@ -0,0 +1,64 @@ +from traceback import format_exc + +from providers.controller import ProviderController +from lib.league import League +from core.webutils import WebUtils + + +class UpdateRankings(WebUtils): + + def __init__(self): + super().__init__(module_='update_rankings') + leagues = list(League.get_leagues(db=self.mysqldb, origin=self.module, id_league=self.args.id_league)) + self.nb_tasks = len(leagues) + self.start() + self.run(leagues, self.update_league_ranking) + self.end() + + def update_league_ranking(self, league, data): + try: + provider = ProviderController.get_provider(league.url) + league.teams = league.get_teams(self.mysqldb) + groups = provider.get_league_ranking(league, data) + + if provider.__name__ == 'Eurosport' and groups: + self.run(groups, self.update_group_ranking) + else: + for team in league.teams: + team.set_stats(self.mysqldb) + if groups: + league.store_groups(groups, self.mysqldb) + + except BaseException as e: + league.error = format_exc() + self.logger.error('[-] league {}: {} - {}\n{}'.format(league.id, type(e), e, league.error)) + self.errors += 1 + else: + league.error = None + self.logger.info('[+] league {}: OK'.format(league.id)) + self.tasks_done += 1 + self.quantity += 1 + finally: + league.store_error(self.mysqldb) + + def update_group_ranking(self, group, data): + try: + provider = ProviderController.get_provider(group.url) + provider.get_group_ranking(group, data) + for team in group.league.teams: + team.set_stats(self.mysqldb) + + except BaseException as e: + group.league.error = format_exc() + self.logger.error('[-] league {} group {}: {} - {}\n{}'.format( + group.league.id, group.name, type(e), e, group.league.error + )) + else: + group.league.error = None + self.logger.info('[+] league {} group {}: OK'.format(group.league.id, group.name)) + finally: + group.league.store_error(self.mysqldb) + + +if __name__ == '__main__': + UpdateRankings() diff --git a/update_schedule.py b/update_schedule.py new file mode 100755 index 0000000..2621de6 --- /dev/null +++ b/update_schedule.py @@ -0,0 +1,73 @@ +from traceback import format_exc + +from providers.controller import ProviderController +from lib.match import Match, Scheduler +from core.webutils import WebUtils + + +class UpdateSchedule(WebUtils): + + def __init__(self): + super().__init__(module_='update_schedule') + schedulers = self.get_schedulers() + self.nb_tasks = len(schedulers) + self.start() + self.run(schedulers, self.update_schedule) + self.end() + + def get_schedulers(self): + schedulers = list() + for match in Match.get_matches(origin=self.module, id_league=self.args.id_league, db=self.mysqldb): + provider = ProviderController.get_provider(match.url) + url = provider.get_schedule_url(match) + + for _scheduler in schedulers: # Check if scheduler is already existing + if _scheduler.url == url: + scheduler = _scheduler + break + else: # Else create new scheduler + scheduler = Scheduler(url=url) + schedulers.append(scheduler) + scheduler.matches.append(match) # Append match to scheduler + return schedulers + + def update_schedule(self, scheduler, data): + try: + provider = ProviderController.get_provider(scheduler.url) + provider.get_schedule(scheduler, data) + for match in scheduler.matches: + if match.task_done: + if match.new_url not in (None, match.url): + match.url = match.new_url + match.store_url(db=self.mysqldb) + if match.new_start_date not in (None, match.start_date): + match.start_date = match.new_start_date + match.store_start_date(db=self.mysqldb) + self.logger.info('[+] match {}: OK'.format(match.id)) + self.quantity += 1 + + except BaseException as e: + self.logger.error('[-] scheduler {}: {} - {}\n{}'.format(scheduler.url, type(e), e, format_exc())) + self.errors += 1 + else: + self.logger.info('[+] scheduler {}: OK'.format(scheduler.url)) + if scheduler.recursive_id == 0: + self.tasks_done += 1 + + matches_not_done = [match for match in scheduler.matches if not match.task_done] + if scheduler.recursive_id < 1 and len(matches_not_done) > 0: + schedulers = list() + if scheduler.previous_url is not None: + previous_scheduler = Scheduler(url=scheduler.previous_url, recursive_id=scheduler.recursive_id+1) + previous_scheduler.matches = matches_not_done + schedulers.append(previous_scheduler) + if scheduler.next_url is not None: + next_scheduler = Scheduler(url=scheduler.next_url, recursive_id=scheduler.recursive_id + 1) + next_scheduler.matches = matches_not_done + schedulers.append(next_scheduler) + if len(schedulers) > 0: + self.run(schedulers, self.update_schedule) + + +if __name__ == '__main__': + UpdateSchedule() diff --git a/update_scores.py b/update_scores.py new file mode 100755 index 0000000..6cd8c0f --- /dev/null +++ b/update_scores.py @@ -0,0 +1,124 @@ +from datetime import datetime +from traceback import format_exc +import copy + +from providers.controller import ProviderController +from lib.match import Event, Match +from core.webutils import WebUtils + + +class UpdateScores(WebUtils): + + def __init__(self): + super().__init__(module_='update_scores') + matches = list(Match.get_matches( + origin=self.module, id_match=self.args.id_match, id_league=self.args.id_league, db=self.mysqldb + )) + matches_comms = list() + for match in matches: + if match.url_score is not None: + match.url = match.url_score + match.json_parser = True + if match.url_comms is not None: + match_comms = copy.copy(match) + match_comms.url = match_comms.url_comms + match_comms.json_parser = True + matches_comms.append(match_comms) + + self.nb_tasks = len(matches) + len(matches_comms) + self.start() + self.run(matches, self.update_match) + self.run(matches_comms, self.update_match_comms) + self.end() + + def update_match(self, match, data): + try: + old_status = match.status + old_nb_squad = len(match.squad) if match.squad is not None else 0 + old_nb_events = len(match.events) if match.events is not None else 0 + match.squad = list() + match.events = list() + + # Get info from source + provider = ProviderController.get_provider(match.url) + provider.get_match_info(match, data) + + # Get new status + match.get_new_status() + + # If match has just started update coeffs + if match.status not in (match.COMING, match.POSTPONED) and match.league.bets and match.coeffs['home'] == 0: + match.update_coeffs(self.mysqldb) + + # If match is in progress update teams ranking + if old_status not in (match.OVER, match.WAITING_SCORERS) and match.status != match.COMING: + match.update_teams_ranking(self.mysqldb) + + # If match has just finished update end_date + if old_status not in (match.OVER, match.WAITING_SCORERS) and match.status == match.WAITING_SCORERS: + match.store_end_date(self.mysqldb) + + # If match is totally over update users points + if old_status != match.OVER and match.status == match.OVER: + match.update_users_league(self.mysqldb) + match.update_users_europe(self.mysqldb) + + # If match is over set winner + if match.status in (match.OVER, match.WAITING_SCORERS): + match.set_winner() + + # Set last_event if necessary + if match.status == match.COMING and old_nb_squad == 0 and len(match.squad) > 0: + match.last_event = Event(type_='squad') + elif old_status == match.COMING and match.status == match.FIRST_TIME: + match.last_event = Event(type_='start') + elif old_status == match.FIRST_TIME and match.status == match.HALF_TIME: + match.last_event = Event(type_='half_time') + elif old_status == match.SECOND_TIME and match.status == match.OVER: + match.last_event = Event(type_='over') + elif len(match.events) > old_nb_events and match.events[-1].type == 'goal': + match.last_event = match.events[-1] + + match.store_score(self.mysqldb) # In any case store match details + + except BaseException as e: + match.error = format_exc() + self.logger.error('[-] match {}: {} - {}\n{}'.format(match.id, type(e), e, match.error)) + self.errors += 1 + else: + match.error = None + self.logger.info('[+] match {}: OK'.format(match.id)) + self.tasks_done += 1 + self.quantity += 1 + finally: + if match.start_date < datetime.now() and match.status == match.COMING and match.sport.id != 2: + match.status = 1 + match.minute = '' + match.store_minute(self.mysqldb) + match.store_error(self.mysqldb) + + def update_match_comms(self, match, data): + try: + # Get info from source + provider = ProviderController.get_provider(match.url) + provider.get_match_comms(match, data) + + provider.get_match_comms(match, data) + if match.comms: + match.store_comms(self.mysqldb) + + except BaseException as e: + match.error = format_exc() + self.logger.error('[-] match {}: {} - {}\n{}'.format(match.id, type(e), e, match.error)) + self.errors += 1 + else: + match.error = None + self.logger.info('[+] match {}: OK'.format(match.id)) + self.tasks_done += 1 + self.quantity += 1 + finally: + match.store_error(self.mysqldb) + + +if __name__ == '__main__': + UpdateScores() diff --git a/update_staff.py b/update_staff.py new file mode 100755 index 0000000..ae63c1e --- /dev/null +++ b/update_staff.py @@ -0,0 +1,40 @@ +from traceback import format_exc + +from providers.controller import ProviderController +from core.webutils import WebUtils +from lib.team import Team + + +class UpdateStaff(WebUtils): + + def __init__(self): + super().__init__(module_='update_staff') + teams = list(Team.get_teams(origin=self.module, id_team=self.args.id_team, db=self.mysqldb)) + self.nb_tasks = len(teams) + self.start() + self.run(teams, self.update_staff) + self.end() + + def update_staff(self, team, data): + try: + provider = ProviderController.get_provider(team.url) + new_staff = provider.get_team_staff(data) + if new_staff != team.staff: + team.staff = new_staff + team.store_staff(self.mysqldb) + + except BaseException as e: + team.error = format_exc() + self.logger.error('[-] team {}: {} - {}\n{}'.format(team.id, type(e), e, team.error)) + self.errors += 1 + else: + team.error = None + self.logger.info('[+] team {}: OK'.format(team.id)) + self.tasks_done += 1 + self.quantity += 1 + finally: + team.store_error(self.mysqldb) + + +if __name__ == '__main__': + UpdateStaff() diff --git a/update_tvchannels.py b/update_tvchannels.py new file mode 100644 index 0000000..913abbb --- /dev/null +++ b/update_tvchannels.py @@ -0,0 +1,36 @@ +from traceback import format_exc + +from providers.controller import ProviderController +from core.webutils import WebUtils +from lib.tvchannel import TvChannel + + +class UpdateTvChannels(WebUtils): + def __init__(self): + super().__init__(module_='update_tvchannels') + sources = list(TvChannel.get_sources()) + self.nb_tasks = len(sources) + self.start() + self.run(sources, self.update_tvchannels) + self.end() + + def update_tvchannels(self, source, data): + try: + provider = ProviderController.get_provider(source.url) + provider.get_tvchannels(source, data) + for tv_channel in source.tv_channels: + tv_channel.store(db=self.mysqldb) + + except BaseException as e: + source.error = format_exc() + self.logger.error('[-] team {}: {} - {}\n{}'.format(source.url, type(e), e, source.error)) + self.errors += 1 + else: + source.error = None + self.logger.info('[+] team {}: OK'.format(source.url)) + self.tasks_done += 1 + self.quantity += 1 + + +if __name__ == '__main__': + UpdateTvChannels() diff --git a/update_tvschedule.py b/update_tvschedule.py new file mode 100644 index 0000000..b98661f --- /dev/null +++ b/update_tvschedule.py @@ -0,0 +1,39 @@ +from traceback import format_exc + +from providers.controller import ProviderController +from lib.tvchannel import TvChannel +from lib.league import League +from lib.match import Match +from core.webutils import WebUtils + + +class UpdateTVSchedule(WebUtils): + def __init__(self): + super().__init__(module_='update_tvschedule') + leagues = list(League.get_leagues(db=self.mysqldb, origin=self.module, id_league=self.args.id_league)) + self.tv_channels = [tvc for tvc in TvChannel.get_tvchannels(db=self.mysqldb) if tvc.names is not None] + self.nb_tasks = len(leagues) + self.start() + self.run(leagues, self.update_tvschedule) + self.end() + + def update_tvschedule(self, league, data): + try: + league.matches = list(Match.get_matches(db=self.mysqldb, origin=self.module, id_league=league.id)) + provider = ProviderController.get_provider(league.url) + provider.get_tvschedule(league, self.tv_channels, data) + for match in league.matches: + if match.tv_channels: + match.store_tvchannels(db=self.mysqldb) + self.quantity += 1 + + except BaseException as e: + self.logger.error('[-] scheduler {}: {} - {}\n{}'.format(league.url, type(e), e, format_exc())) + self.errors += 1 + else: + self.logger.info('[+] scheduler {}: OK'.format(league.url)) + self.tasks_done += 1 + + +if __name__ == '__main__': + UpdateTVSchedule() diff --git a/worker.py b/worker.py new file mode 100644 index 0000000..a79261f --- /dev/null +++ b/worker.py @@ -0,0 +1,80 @@ +from datetime import datetime, timedelta +import subprocess +import os.path +import shlex +import time +import sys + +from core.log import get_logger + + +def main(): + filename = sys.argv[1] + if filename == 'update_scores': + last_date = datetime.now() - timedelta(seconds=30) + last_date = last_date.replace(second=0) + frequency = timedelta(seconds=30) + elif filename == 'update_news': + last_date = datetime.now() - timedelta(minutes=1) + last_date = last_date.replace(second=0) + frequency = timedelta(minutes=1) + elif filename == 'update_schedule': + last_date = datetime.now() - timedelta(days=1) + last_date = last_date.replace(hour=0) + frequency = timedelta(days=1) + elif filename == 'update_images': + last_date = datetime.now() - timedelta(days=1) + last_date = last_date.replace(hour=1) + frequency = timedelta(days=1) + elif filename == 'update_rankings': + last_date = datetime.now() - timedelta(days=1) + last_date = last_date.replace(hour=2) + frequency = timedelta(days=1) + elif filename == 'update_staff': + last_date = datetime.now() - timedelta(days=1) + last_date = last_date.replace(hour=3) + frequency = timedelta(days=1) + elif filename == 'update_admin_bets': + last_date = datetime.now() - timedelta(days=1) + last_date = last_date.replace(hour=4) + frequency = timedelta(days=1) + elif filename == 'update_notifications': + last_date = datetime.now() - timedelta(days=1) + last_date = last_date.replace(hour=5) + frequency = timedelta(days=1) + elif filename == 'update_preprod': + last_date = datetime.now() - timedelta(days=1) + last_date = last_date.replace(hour=6) + frequency = timedelta(days=1) + elif filename == 'update_month': + last_date = datetime.now() - timedelta(days=1) + last_date = last_date.replace(hour=7) + frequency = timedelta(days=1) + elif filename == 'update_tvschedule': + last_date = datetime.now() - timedelta(days=1) + last_date = last_date.replace(hour=8) + frequency = timedelta(days=1) + elif filename == 'create_schedule': + last_date = datetime.now() - timedelta(days=1) + last_date = last_date.replace(hour=22) + frequency = timedelta(hours=12) + else: + raise Exception('File {}.py not found'.format(filename)) + + logger = get_logger(name=filename) + dirname = os.path.dirname(os.path.abspath(__file__)) + binary = os.path.join(dirname, 'venv/bin/python') + prog = os.path.join(dirname, filename) + args = shlex.split('{} {}.py'.format(binary, prog)) + + while True: + current_date = datetime.now() + if current_date >= last_date + frequency: + logger.info('[O] Launching module {}.py ...'.format(filename)) + last_date = current_date + subprocess.call(args) + time.sleep(1) + + +if __name__ == '__main__': + main()