import asyncio from time import time from aiohttp import ClientSession from bs4 import BeautifulSoup from core.mysqldb import MysqlDB from lib.team import Team import setting staff_keys = { 'Gardiens': 'goalkeepers', 'Défenseurs': 'defenders', 'Milieux': 'midfielders', 'Attaquants': 'attackers', 'Entraineur': 'coach' } def store_team_staff_from_flash(team, data, db): html = data.decode() soup = BeautifulSoup(html, 'html.parser') team.staff = dict() current_key = None for tr in soup.find_all('tr'): if tr.attrs['class'][0] == 'player-type-title': key = tr.find('td').text.strip() if key in staff_keys: current_key = staff_keys[key] team.staff[current_key] = list() # print('{}: {}'.format(team.name, current_key)) elif tr.attrs['class'][0] in ('player', 'coach'): staff_name = tr.find('td', class_='player-name').find('a').text.strip() team.staff[current_key].append(staff_name) # print('{}: {}'.format(team.name, staff_name)) print('[+] Team #{} - {} : {}'.format( team.id, team.name, ' '.join(['{} {}'.format(len(value), key) for key, value in team.staff.items()])) ) team.store_staff(db=db) async def fetch(session, semaphore, team): async with semaphore, session.get('{}/effectif'.format(team.urls['flashresultats'].rstrip('/'))) as response: return team, await response.read() async def run(): # Init variables start = int(time()) mysqldb = MysqlDB() user_agent = mysqldb.get_random_ua() semaphore = asyncio.Semaphore(setting.SEMAPHORE) # Create and launch tasks async with ClientSession(headers={'User-Agent': user_agent}) as session: tasks = [ asyncio.ensure_future(fetch(session, semaphore, team)) for team in Team.get_teams(db=mysqldb, url='flashresultats') ] responses = await asyncio.gather(*tasks) # Get staff from html for team, data in responses: store_team_staff_from_flash(team=team, data=data, db=mysqldb) # Save results end = int(time()) print('[X] job done in {} seconds'.format(end-start)) mysqldb.commit() mysqldb.close() def main(): loop = asyncio.get_event_loop() future = asyncio.ensure_future(run()) loop.run_until_complete(future) if __name__ == '__main__': main()