import asyncio from time import time from urllib.parse import urljoin from aiohttp import ClientSession from bs4 import BeautifulSoup from lib.league import League from core.mysqldb import MysqlDB import setting def store_league_teams_urls_from_flash(league, data, db): teams = league.get_teams(db) html = data.decode() soup = BeautifulSoup(html, 'html.parser') div_teams = soup.find('div', id='tournament-page-participants') for elt in div_teams.find_all('a'): for team in teams: if elt.text == team.name or elt.text in team.names.values(): team.names['flashresultats'] = elt.text team.urls['flashresultats'] = urljoin(league.urls['flashresultats'], elt.attrs['href']) team.store_names_and_urls(db) print('[+] {}: {} {}'.format(team.name, team.names['flashresultats'], team.urls['flashresultats'])) break else: print('[-] {} not found'.format(elt.text)) async def fetch(session, semaphore, league): print('[+] League {} - {}'.format(league.id, league.name)) async with semaphore, session.get('{}/equipes'.format(league.urls['flashresultats'].rstrip('/'))) as response: return league, await response.read() async def run(): # Init variables start = int(time()) mysqldb = MysqlDB() user_agent = mysqldb.get_random_ua() semaphore = asyncio.Semaphore(setting.SEMAPHORE) # Create and launch tasks async with ClientSession(headers={'User-Agent': user_agent}) as session: tasks = [ asyncio.ensure_future(fetch(session, semaphore, league)) for league in League.get_leagues(db=mysqldb) ] responses = await asyncio.gather(*tasks) # Get teams urls and names for league, data in responses: store_league_teams_urls_from_flash(league, data, mysqldb) # Save results end = int(time()) print('[X] job done in {} seconds'.format(end-start)) mysqldb.commit() mysqldb.close() def main(): loop = asyncio.get_event_loop() future = asyncio.ensure_future(run()) loop.run_until_complete(future) if __name__ == '__main__': main()