81 lines
2.4 KiB
Python
81 lines
2.4 KiB
Python
|
import asyncio
|
||
|
from time import time
|
||
|
|
||
|
from aiohttp import ClientSession
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
from core.mysqldb import MysqlDB
|
||
|
from lib.team import Team
|
||
|
import setting
|
||
|
|
||
|
staff_keys = {
|
||
|
'Gardiens': 'goalkeepers',
|
||
|
'Défenseurs': 'defenders',
|
||
|
'Milieux': 'midfielders',
|
||
|
'Attaquants': 'attackers',
|
||
|
'Entraineur': 'coach'
|
||
|
}
|
||
|
|
||
|
|
||
|
def store_team_staff_from_flash(team, data, db):
|
||
|
html = data.decode()
|
||
|
soup = BeautifulSoup(html, 'html.parser')
|
||
|
team.staff = dict()
|
||
|
current_key = None
|
||
|
for tr in soup.find_all('tr'):
|
||
|
if tr.attrs['class'][0] == 'player-type-title':
|
||
|
key = tr.find('td').text.strip()
|
||
|
if key in staff_keys:
|
||
|
current_key = staff_keys[key]
|
||
|
team.staff[current_key] = list()
|
||
|
# print('{}: {}'.format(team.name, current_key))
|
||
|
elif tr.attrs['class'][0] in ('player', 'coach'):
|
||
|
staff_name = tr.find('td', class_='player-name').find('a').text.strip()
|
||
|
team.staff[current_key].append(staff_name)
|
||
|
# print('{}: {}'.format(team.name, staff_name))
|
||
|
print('[+] Team #{} - {} : {}'.format(
|
||
|
team.id, team.name, ' '.join(['{} {}'.format(len(value), key) for key, value in team.staff.items()]))
|
||
|
)
|
||
|
team.store_staff(db=db)
|
||
|
|
||
|
|
||
|
async def fetch(session, semaphore, team):
|
||
|
async with semaphore, session.get('{}/effectif'.format(team.urls['flashresultats'].rstrip('/'))) as response:
|
||
|
return team, await response.read()
|
||
|
|
||
|
|
||
|
async def run():
|
||
|
# Init variables
|
||
|
start = int(time())
|
||
|
mysqldb = MysqlDB()
|
||
|
user_agent = mysqldb.get_random_ua()
|
||
|
semaphore = asyncio.Semaphore(setting.SEMAPHORE)
|
||
|
|
||
|
# Create and launch tasks
|
||
|
async with ClientSession(headers={'User-Agent': user_agent}) as session:
|
||
|
tasks = [
|
||
|
asyncio.ensure_future(fetch(session, semaphore, team))
|
||
|
for team in Team.get_teams(db=mysqldb, url='flashresultats')
|
||
|
]
|
||
|
responses = await asyncio.gather(*tasks)
|
||
|
|
||
|
# Get staff from html
|
||
|
for team, data in responses:
|
||
|
store_team_staff_from_flash(team=team, data=data, db=mysqldb)
|
||
|
|
||
|
# Save results
|
||
|
end = int(time())
|
||
|
print('[X] job done in {} seconds'.format(end-start))
|
||
|
mysqldb.commit()
|
||
|
mysqldb.close()
|
||
|
|
||
|
|
||
|
def main():
|
||
|
loop = asyncio.get_event_loop()
|
||
|
future = asyncio.ensure_future(run())
|
||
|
loop.run_until_complete(future)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|