70 lines
2.2 KiB
Python
70 lines
2.2 KiB
Python
import asyncio
|
|
from time import time
|
|
from urllib.parse import urljoin
|
|
|
|
from aiohttp import ClientSession
|
|
from bs4 import BeautifulSoup
|
|
|
|
from lib.league import League
|
|
from core.mysqldb import MysqlDB
|
|
import setting
|
|
|
|
|
|
def store_league_teams_urls_from_flash(league, data, db):
|
|
teams = league.get_teams(db)
|
|
html = data.decode()
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
div_teams = soup.find('div', id='tournament-page-participants')
|
|
for elt in div_teams.find_all('a'):
|
|
for team in teams:
|
|
if elt.text == team.name or elt.text in team.names.values():
|
|
team.names['flashresultats'] = elt.text
|
|
team.urls['flashresultats'] = urljoin(league.urls['flashresultats'], elt.attrs['href'])
|
|
team.store_names_and_urls(db)
|
|
print('[+] {}: {} {}'.format(team.name, team.names['flashresultats'], team.urls['flashresultats']))
|
|
break
|
|
else:
|
|
print('[-] {} not found'.format(elt.text))
|
|
|
|
|
|
async def fetch(session, semaphore, league):
|
|
print('[+] League {} - {}'.format(league.id, league.name))
|
|
async with semaphore, session.get('{}/equipes'.format(league.urls['flashresultats'].rstrip('/'))) as response:
|
|
return league, await response.read()
|
|
|
|
|
|
async def run():
|
|
# Init variables
|
|
start = int(time())
|
|
mysqldb = MysqlDB()
|
|
user_agent = mysqldb.get_random_ua()
|
|
semaphore = asyncio.Semaphore(setting.SEMAPHORE)
|
|
|
|
# Create and launch tasks
|
|
async with ClientSession(headers={'User-Agent': user_agent}) as session:
|
|
tasks = [
|
|
asyncio.ensure_future(fetch(session, semaphore, league))
|
|
for league in League.get_leagues(db=mysqldb)
|
|
]
|
|
responses = await asyncio.gather(*tasks)
|
|
|
|
# Get teams urls and names
|
|
for league, data in responses:
|
|
store_league_teams_urls_from_flash(league, data, mysqldb)
|
|
|
|
# Save results
|
|
end = int(time())
|
|
print('[X] job done in {} seconds'.format(end-start))
|
|
mysqldb.commit()
|
|
mysqldb.close()
|
|
|
|
|
|
def main():
|
|
loop = asyncio.get_event_loop()
|
|
future = asyncio.ensure_future(run())
|
|
loop.run_until_complete(future)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|