scraper/match/match.go

286 lines
5.0 KiB
Go

package match
import (
"fmt"
"net/url"
"regexp"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"1bet.fr/scraper/requests"
"1bet.fr/scraper/utils"
)
type Sport struct {
Id int
Name string
CleanName string
}
const (
_ = iota
GenderMale = iota
GenderFemale = iota
)
type Team struct {
Id int
SportId int
CountryId int
Name string
CleanName string
ShortName string
LongName string
Gender int
Names interface{}
Url string
Images interface{}
Tags []string
CleanTags []string
NewsCount int
Error string
Trace string
}
const (
_ = iota
LegFirst = iota
LegSecond = iota
LegReplay = iota
)
const (
_ = iota
StatusFirstTime = iota
StatusHalfTime = iota
StatusSecondTime = iota
StatusFirstExtra = iota
StatusHalfExtra = iota
StatusSecondExtra = iota
StatusShootout = iota
StatusWaitScores = iota
StatusOver = iota
StatusPostponed = iota
StatusCancelled = iota
)
const (
_ = iota
WinnerHome = iota
WinnerAway = iota
WinnerDraw = iota
)
const (
_ = iota
ExtraTimeExtraTime = iota
ExtraTimeShootout = iota
)
type Match struct {
Id int
LeagueId int
TeamHomeId int
TeamAwayId int
PlayerHomeId int
PlayerAwayId int
MatchDay int
MatchDayId int
Round string
Leg int
BaseUrl string
ScoreUrl string
LiveUrl string
TvChannels []string
Status int
Minute int
StartDate *time.Time
EndDate *time.Time
HomeScore int
AwayScore int
SetsScore int
Winner int
ExtraTime int
ShootoutHome int
ShootoutAway int
Squad []interface{}
Events []interface{}
Stats []interface{}
Live []interface{}
LastEvent interface{}
LastEventDate *time.Time
Error string
Trace string
}
type League struct {
Id int
Sport *Sport
CountryId int
Name string
CleanName string
Gender *int
Degree *int
ScheduleUrl *string
RankingUrl *string
ChannelUrl *string
MatchDays *int
CurrentMatchDay *int
MatchesByMatchDay *int
TeamCount int
Rounds []string
Groups []string
Points interface{}
Promotions interface{}
Images interface{}
Schedule interface{}
Tags []string
CleanTags []string
NewsCount int
Error *string
Trace *string
}
func (l *League) ListSources() ([]*Source, error) {
var sources []*Source
if l.ScheduleUrl == nil {
return nil, nil
}
doc, err := requests.GetDocumentFromURL(*l.ScheduleUrl)
if err != nil {
return nil, err
}
originUrl, err := url.Parse(*l.ScheduleUrl)
if err != nil {
return nil, err
}
switch originUrl.Host {
case utils.HostMatchendirect:
doc.Find(".fDate option").Each(func (i int, s *goquery.Selection) {
value, ok := s.Attr("value")
if !ok {
return
}
parsedDate := strings.Split(strings.Split(value, "/")[3], "-")
year := utils.AtoI(parsedDate[0])
week := utils.AtoI(parsedDate[1])
if year >= 2020 && week >= 34 {
sources = append(sources, &Source{
League: l,
Url: &url.URL{
Scheme: originUrl.Scheme,
Host: originUrl.Host,
Path: value,
},
})
}
})
case utils.HostEurosport, utils.HostRugbyrama:
eurosportRegexp := regexp.MustCompile(`(\d)+e\s+Journée`)
ajaxUrl, ok := doc.Find(".ajax-container").Attr("data-ajax-url")
if !ok {
return nil, fmt.Errorf("ajax-container url not found")
}
ajaxParsedUrl, err := url.Parse(ajaxUrl)
if err != nil {
return nil, err
}
ajaxQuery := ajaxParsedUrl.Query()
doc.Find("#results-match-nav .rounds-dropdown__round").Each(func (i int, s *goquery.Selection) {
var round *string
var matchDay *int
roundStr, _ := s.Attr("data-label")
reMatch := eurosportRegexp.FindStringSubmatch(roundStr)
if reMatch != nil {
mdayInt := utils.AtoI(reMatch[1])
matchDay = &mdayInt
} else {
round = &roundStr
}
roundId, _ := s.Attr("data-round-id")
ajaxQuery.Set("roundid", roundId)
sources = append(sources, &Source{
League: l,
Url: &url.URL{
Scheme: originUrl.Scheme,
Host: originUrl.Host,
Path: ajaxParsedUrl.Path,
RawQuery: ajaxQuery.Encode(),
},
Round: round,
MatchDay: matchDay,
})
})
default:
return nil, fmt.Errorf("unknown source url : %s", *l.ScheduleUrl)
}
return sources, nil
}
type Source struct {
League *League
Url *url.URL
MatchDay *int
Round *string
currentDate *time.Time
}
func (s *Source) GetMatches() ([]*Match, error) {
var matches []*Match
switch s.Url.Host {
case utils.HostMatchendirect:
doc, err := requests.GetDocumentFromURL(s.Url.String())
if err != nil {
return nil, err
}
doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) {
row.Children().Each(func (j int, col *goquery.Selection) {
colspan, ok := col.Attr("colspan")
if ok && colspan == "4" {
currentDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(col.Text()))
if err != nil {
fmt.Println(err)
return
}
s.currentDate = &currentDate
fmt.Println(s.currentDate)
}
})
})
case utils.HostEurosport, utils.HostRugbyrama:
default:
return nil, fmt.Errorf("unexpected source url %s", s.Url.String())
}
return matches, nil
}