316 lines
6.9 KiB
Go
316 lines
6.9 KiB
Go
package match
|
||
|
||
import (
|
||
"fmt"
|
||
"log"
|
||
"net/url"
|
||
"regexp"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/PuerkitoBio/goquery"
|
||
|
||
"1bet.fr/scraper/requests"
|
||
"1bet.fr/scraper/utils"
|
||
)
|
||
|
||
const (
|
||
_ = iota
|
||
GenderMale = iota
|
||
//GenderFemale = iota
|
||
)
|
||
|
||
type Sport struct {
|
||
Id int
|
||
Name string
|
||
CleanName string
|
||
}
|
||
|
||
type Country struct {
|
||
Id int
|
||
}
|
||
|
||
type Player struct {
|
||
Id int
|
||
FullName string
|
||
CleanName string
|
||
Gender int
|
||
}
|
||
|
||
type Team struct {
|
||
Id int
|
||
Sport *Sport
|
||
Country *Country
|
||
|
||
Name string
|
||
CleanName string
|
||
|
||
ShortName *string
|
||
LongName *string
|
||
Gender *int
|
||
|
||
Names *map[string]string
|
||
PlayersUrl *string
|
||
Images *interface{}
|
||
|
||
Tags *[]string
|
||
CleanTags *[]string
|
||
NewsCount int
|
||
|
||
Error *string
|
||
Trace *string
|
||
}
|
||
|
||
type Match struct {
|
||
Id int
|
||
League *League
|
||
TeamHome *Team
|
||
TeamAway *Team
|
||
PlayerHome *Player
|
||
PlayerAway *Player
|
||
|
||
MatchDay *int
|
||
Round *string
|
||
Leg *int
|
||
|
||
BaseUrl *string
|
||
ScoreUrl *string
|
||
LiveUrl *string
|
||
TvChannels *[]string
|
||
|
||
MatchDayId *int
|
||
Status *int
|
||
Minute *string
|
||
StartDate *time.Time
|
||
EndDate *time.Time
|
||
HomeScore *int
|
||
AwayScore *int
|
||
SetsScore *[]interface{}
|
||
Winner *int
|
||
ExtraTime *int
|
||
ShootoutHome *int
|
||
ShootoutAway *int
|
||
|
||
Squad *[]interface{}
|
||
Events *[]interface{}
|
||
Stats *[]interface{}
|
||
Live *[]interface{}
|
||
LastEvent *interface{}
|
||
LastEventDate *time.Time
|
||
|
||
Error *string
|
||
Trace *string
|
||
}
|
||
|
||
func NewMatch(league *League, teamHome *Team, teamAway *Team, playerHome *Player, playerAway *Player, round string, matchDay int, leg int) *Match {
|
||
return &Match{
|
||
League: league,
|
||
TeamHome: teamHome,
|
||
TeamAway: teamAway,
|
||
PlayerHome: playerHome,
|
||
PlayerAway: playerAway,
|
||
MatchDay: utils.IntPointer(matchDay),
|
||
Round: utils.StringPointer(round),
|
||
Leg: utils.IntPointer(leg),
|
||
}
|
||
}
|
||
|
||
type source struct {
|
||
League *League
|
||
URL *url.URL
|
||
matchDay int
|
||
round string
|
||
|
||
currentDate *time.Time
|
||
}
|
||
|
||
func newSource(league *League, scheme string, host string, path string, query string, round string, matchDay int) *source {
|
||
return &source{
|
||
League: league,
|
||
URL: &url.URL{
|
||
Scheme: scheme,
|
||
Host: host,
|
||
Path: path,
|
||
RawQuery: query,
|
||
},
|
||
round: round,
|
||
matchDay: matchDay,
|
||
}
|
||
}
|
||
|
||
func (s *source) GetMatches() ([]*Match, error) {
|
||
var matches []*Match
|
||
|
||
switch s.URL.Host {
|
||
case utils.HostMatchendirect:
|
||
doc, err := requests.GetDocumentFromURL(s.URL.String())
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) {
|
||
headCell := row.Find("th")
|
||
if len(headCell.Nodes) == 1 {
|
||
curDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(headCell.Text()))
|
||
if err != nil {
|
||
log.Fatalf("unexpected error while parsing date : %s", err)
|
||
return
|
||
}
|
||
s.currentDate = &curDate
|
||
} else {
|
||
match := NewMatch(s.League, &Team{Gender: s.League.Gender}, &Team{Gender: s.League.Gender}, &Player{}, &Player{}, s.round, s.matchDay, 0)
|
||
|
||
startTime := strings.TrimSpace(row.Find("td.lm1").Text())
|
||
if startTime == "-- : --" {
|
||
startTime = "00:00"
|
||
}
|
||
startDate, err := time.Parse(
|
||
"2006-01-02 15:04 MST",
|
||
s.currentDate.Format("2006-01-02 ") + startTime + " CEST",
|
||
)
|
||
if err != nil {
|
||
match.Error = utils.StringPointer("parse date error")
|
||
match.Trace = utils.StringPointer(fmt.Sprint(err))
|
||
matches = append(matches, match)
|
||
return
|
||
}
|
||
match.StartDate = &startDate
|
||
|
||
homeNames := map[string]string{utils.HostMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq1").Text()), "*")}
|
||
match.TeamHome.Names = &homeNames
|
||
awayNames := map[string]string{utils.HostMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq2").Text()), "*")}
|
||
match.TeamAway.Names = &awayNames
|
||
|
||
basePath, ok := row.Find(".lm3 a").First().Attr("href")
|
||
if !ok {
|
||
match.Error = utils.StringPointer("unable to find href attr")
|
||
match.Trace = utils.StringPointer("unable to find href attribute for element '.lm3 a'")
|
||
matches = append(matches, match)
|
||
return
|
||
}
|
||
baseUrl := &url.URL{Scheme: s.URL.Scheme, Host: s.URL.Host, Path: basePath}
|
||
match.BaseUrl = utils.StringPointer(baseUrl.String())
|
||
|
||
if match.Round == nil && match.MatchDay == nil && s.League.MatchDays != nil && s.League.MatchesByMatchDay != nil {
|
||
if s.League.currentMatchDayId == 0 {
|
||
s.League.currentMatchDay ++
|
||
}
|
||
curMatchDay := s.League.currentMatchDay
|
||
curMatchDayId := s.League.currentMatchDayId
|
||
match.MatchDay = &curMatchDay
|
||
match.MatchDayId = &curMatchDayId
|
||
s.League.currentMatchDayId = (s.League.currentMatchDayId + 1) % *s.League.MatchesByMatchDay
|
||
}
|
||
matches = append(matches, match)
|
||
}
|
||
})
|
||
|
||
default:
|
||
return nil, fmt.Errorf("unexpected source url %s", s.URL.String())
|
||
}
|
||
return matches, nil
|
||
}
|
||
|
||
type League struct {
|
||
Id int
|
||
Sport *Sport
|
||
Country *Country
|
||
|
||
Name string
|
||
CleanName string
|
||
Gender *int
|
||
Degree *int
|
||
|
||
ScheduleUrl *string
|
||
RankingUrl *string
|
||
ChannelUrl *string
|
||
|
||
MatchDays *int
|
||
MatchesByMatchDay *int
|
||
TeamCount int
|
||
|
||
Rounds []string
|
||
Groups []string
|
||
Points interface{}
|
||
Promotions interface{}
|
||
|
||
Images interface{}
|
||
Schedule interface{}
|
||
|
||
Tags []string
|
||
CleanTags []string
|
||
NewsCount int
|
||
|
||
Error *string
|
||
Trace *string
|
||
|
||
currentMatchDay int
|
||
currentMatchDayId int
|
||
}
|
||
|
||
func (l *League) ListSources() ([]*source, error) {
|
||
var sources []*source
|
||
|
||
if l.ScheduleUrl == nil {
|
||
return nil, nil
|
||
}
|
||
|
||
doc, err := requests.GetDocumentFromURL(*l.ScheduleUrl)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
originUrl, err := url.Parse(*l.ScheduleUrl)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
switch originUrl.Host {
|
||
case utils.HostMatchendirect:
|
||
doc.Find(".fDate option").Each(func (i int, s *goquery.Selection) {
|
||
value, ok := s.Attr("value")
|
||
if !ok {
|
||
return
|
||
}
|
||
parsedDate := strings.Split(strings.Split(value, "/")[3], "-")
|
||
year := utils.AtoI(parsedDate[0])
|
||
week := utils.AtoI(parsedDate[1])
|
||
if (year == 2020 && week >= 34) || year > 2020 {
|
||
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, value, "", "", 0))
|
||
}
|
||
})
|
||
|
||
case utils.HostEurosport, utils.HostRugbyrama:
|
||
curRegexp := regexp.MustCompile(`(\d)+e\s+Journée`)
|
||
|
||
ajaxUrl, ok := doc.Find(".ajax-container").Attr("data-ajax-url")
|
||
if !ok {
|
||
return nil, fmt.Errorf("ajax-container url not found")
|
||
}
|
||
ajaxParsedUrl, err := url.Parse(ajaxUrl)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
ajaxQuery := ajaxParsedUrl.Query()
|
||
|
||
doc.Find("#results-match-nav .rounds-dropdown__round").Each(func (i int, s *goquery.Selection) {
|
||
matchDay := 0
|
||
round, _ := s.Attr("data-label")
|
||
reMatch := curRegexp.FindStringSubmatch(round)
|
||
if reMatch != nil {
|
||
round = ""
|
||
matchDay = utils.AtoI(reMatch[1])
|
||
}
|
||
|
||
roundId, _ := s.Attr("data-round-id")
|
||
ajaxQuery.Set("roundid", roundId)
|
||
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, ajaxParsedUrl.Path, ajaxQuery.Encode(), round, matchDay))
|
||
})
|
||
|
||
default:
|
||
return nil, fmt.Errorf("unknown source url : %s", *l.ScheduleUrl)
|
||
}
|
||
return sources, nil
|
||
}
|
||
|
||
|