scraper/match/match.go

343 lines
7.4 KiB
Go
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package match
import (
"fmt"
"log"
"net/url"
"regexp"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"1bet.fr/scraper/requests"
"1bet.fr/scraper/utils"
)
const (
_ = iota
GenderMale = iota
)
const (
_ = iota
StatusComing = iota
)
type Sport struct {
Id int
Name string
CleanName string
}
type Country struct {
Id int
}
type Player struct {
Id int
FullName string
CleanName string
Gender int
}
type TeamImages struct {
H30 string
H50 string
H80 string
}
type Team struct {
Id int
Sport *Sport
Country *Country
Name string
CleanName string
ShortName *string
LongName *string
Gender *int
Names *map[string]string
PlayersUrl *string
Images *TeamImages
Tags *[]string
CleanTags *[]string
NewsCount int
Error *string
Trace *string
}
func NewTeam(gender *int, sport *Sport, country *Country) *Team {
return &Team{
Gender: gender,
Images: &TeamImages{
H30: "t0-h30.svg",
H50: "t0-h50.svg",
H80: "t0-h80.svg",
},
Sport: sport,
Country: country,
}
}
type Match struct {
Id int
League *League
TeamHome *Team
TeamAway *Team
PlayerHome *Player
PlayerAway *Player
MatchDay *int
Round *string
Leg *int
BaseUrl *string
ScoreUrl *string
LiveUrl *string
TvChannels *[]string
MatchDayId *int
Status *int
Minute *string
StartDate *time.Time
EndDate *time.Time
HomeScore *int
AwayScore *int
SetsScore *[]interface{}
Winner *int
ExtraTime *int
ShootoutHome *int
ShootoutAway *int
Squad *[]interface{}
Events *[]interface{}
Stats *[]interface{}
Live *[]interface{}
LastEvent *interface{}
LastEventDate *time.Time
Error *string
Trace *string
}
func NewMatch(league *League, teamHome *Team, teamAway *Team, playerHome *Player, playerAway *Player, round string, matchDay int, leg int) *Match {
return &Match{
League: league,
TeamHome: teamHome,
TeamAway: teamAway,
PlayerHome: playerHome,
PlayerAway: playerAway,
MatchDay: utils.IntPointer(matchDay),
Round: utils.StringPointer(round),
Leg: utils.IntPointer(leg),
}
}
type source struct {
League *League
URL *url.URL
matchDay int
round string
currentDate *time.Time
currentTimezone string
}
func newSource(league *League, scheme string, host string, path string, query string, round string, matchDay int) *source {
return &source{
League: league,
URL: &url.URL{
Scheme: scheme,
Host: host,
Path: path,
RawQuery: query,
},
round: round,
matchDay: matchDay,
}
}
func (s *source) GetMatches() ([]*Match, error) {
var matches []*Match
switch s.URL.Host {
case utils.HostMatchendirect:
doc, err := requests.GetDocumentFromURL(s.URL.String())
if err != nil {
return nil, err
}
doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) {
headCell := row.Find("th")
if len(headCell.Nodes) == 1 {
curDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(headCell.Text()))
if err != nil {
log.Fatalf("unexpected error while parsing date : %s", err)
return
}
s.currentDate = &curDate
s.currentTimezone = utils.FrenchTimezone(curDate)
} else {
teamHome := NewTeam(s.League.Gender, s.League.Sport, s.League.Country)
teamAway := NewTeam(s.League.Gender, s.League.Sport, s.League.Country)
match := NewMatch(s.League, teamHome, teamAway, &Player{}, &Player{}, s.round, s.matchDay, 0)
startTime := strings.TrimSpace(row.Find("td.lm1").Text())
if startTime == "-- : --" {
startTime = "00:00"
}
startDate, err := time.Parse(
"2006-01-02 15:04 MST",
s.currentDate.Format("2006-01-02 ") + startTime + " " + s.currentTimezone,
)
if err != nil {
match.Error = utils.StringPointer("parse date error")
match.Trace = utils.StringPointer(fmt.Sprint(err))
matches = append(matches, match)
return
}
match.StartDate = &startDate
homeNames := map[string]string{utils.KeywordMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq1").Text()), "*")}
match.TeamHome.Names = &homeNames
awayNames := map[string]string{utils.KeywordMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq2").Text()), "*")}
match.TeamAway.Names = &awayNames
basePath, ok := row.Find(".lm3 a").First().Attr("href")
if !ok {
match.Error = utils.StringPointer("unable to find href attr")
match.Trace = utils.StringPointer("unable to find href attribute for element '.lm3 a'")
matches = append(matches, match)
return
}
baseUrl := &url.URL{Scheme: s.URL.Scheme, Host: s.URL.Host, Path: basePath}
match.BaseUrl = utils.StringPointer(baseUrl.String())
if match.Round == nil && match.MatchDay == nil && s.League.MatchDays != nil && s.League.MatchesByMatchDay != nil {
if s.League.currentMatchDayId == 0 {
s.League.currentMatchDay ++
}
curMatchDay := s.League.currentMatchDay
curMatchDayId := s.League.currentMatchDayId
match.MatchDay = &curMatchDay
match.MatchDayId = &curMatchDayId
s.League.currentMatchDayId = (s.League.currentMatchDayId + 1) % *s.League.MatchesByMatchDay
}
matches = append(matches, match)
}
})
default:
return nil, fmt.Errorf("unexpected source url %s", s.URL.String())
}
return matches, nil
}
type League struct {
Id int
Sport *Sport
Country *Country
Name string
CleanName string
Gender *int
Degree *int
ScheduleUrl *string
RankingUrl *string
ChannelUrl *string
MatchDays *int
MatchesByMatchDay *int
TeamCount int
Rounds []string
Groups []string
Points interface{}
Promotions interface{}
Images interface{}
Schedule interface{}
Tags []string
CleanTags []string
NewsCount int
Error *string
Trace *string
currentMatchDay int
currentMatchDayId int
}
func (l *League) ListSources() ([]*source, error) {
var sources []*source
if l.ScheduleUrl == nil {
return nil, nil
}
doc, err := requests.GetDocumentFromURL(*l.ScheduleUrl)
if err != nil {
return nil, err
}
originUrl, err := url.Parse(*l.ScheduleUrl)
if err != nil {
return nil, err
}
switch originUrl.Host {
case utils.HostMatchendirect:
doc.Find(".fDate option").Each(func (i int, s *goquery.Selection) {
value, ok := s.Attr("value")
if !ok {
return
}
parsedDate := strings.Split(strings.Split(value, "/")[3], "-")
year := utils.AtoI(parsedDate[0])
week := utils.AtoI(parsedDate[1])
if (year == 2020 && week >= 34) || year > 2020 {
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, value, "", "", 0))
}
})
case utils.HostEurosport, utils.HostRugbyrama:
curRegexp := regexp.MustCompile(`(\d)+e\s+Journée`)
ajaxUrl, ok := doc.Find(".ajax-container").Attr("data-ajax-url")
if !ok {
return nil, fmt.Errorf("ajax-container url not found")
}
ajaxParsedUrl, err := url.Parse(ajaxUrl)
if err != nil {
return nil, err
}
ajaxQuery := ajaxParsedUrl.Query()
doc.Find("#results-match-nav .rounds-dropdown__round").Each(func (i int, s *goquery.Selection) {
matchDay := 0
round, _ := s.Attr("data-label")
reMatch := curRegexp.FindStringSubmatch(round)
if reMatch != nil {
round = ""
matchDay = utils.AtoI(reMatch[1])
}
roundId, _ := s.Attr("data-round-id")
ajaxQuery.Set("roundid", roundId)
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, ajaxParsedUrl.Path, ajaxQuery.Encode(), round, matchDay))
})
default:
return nil, fmt.Errorf("unknown source url : %s", *l.ScheduleUrl)
}
return sources, nil
}