scraper/match/match.go

343 lines
7.4 KiB
Go
Raw Permalink Normal View History

package match
import (
"fmt"
2020-10-19 09:26:23 +00:00
"log"
"net/url"
"regexp"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"1bet.fr/scraper/requests"
"1bet.fr/scraper/utils"
)
2020-10-19 09:26:23 +00:00
const (
_ = iota
GenderMale = iota
)
const (
_ = iota
StatusComing = iota
2020-10-19 09:26:23 +00:00
)
type Sport struct {
Id int
Name string
CleanName string
}
2020-10-19 09:26:23 +00:00
type Country struct {
Id int
}
type Player struct {
Id int
FullName string
CleanName string
Gender int
}
type TeamImages struct {
H30 string
H50 string
H80 string
}
type Team struct {
Id int
2020-10-19 09:26:23 +00:00
Sport *Sport
Country *Country
Name string
CleanName string
2020-10-19 09:26:23 +00:00
ShortName *string
LongName *string
Gender *int
2020-10-19 09:26:23 +00:00
Names *map[string]string
PlayersUrl *string
Images *TeamImages
2020-10-19 09:26:23 +00:00
Tags *[]string
CleanTags *[]string
NewsCount int
2020-10-19 09:26:23 +00:00
Error *string
Trace *string
}
func NewTeam(gender *int, sport *Sport, country *Country) *Team {
return &Team{
Gender: gender,
Images: &TeamImages{
H30: "t0-h30.svg",
H50: "t0-h50.svg",
H80: "t0-h80.svg",
},
Sport: sport,
Country: country,
}
}
2020-10-19 09:26:23 +00:00
type Match struct {
Id int
League *League
TeamHome *Team
TeamAway *Team
PlayerHome *Player
PlayerAway *Player
2020-10-19 09:26:23 +00:00
MatchDay *int
Round *string
Leg *int
2020-10-19 09:26:23 +00:00
BaseUrl *string
ScoreUrl *string
LiveUrl *string
TvChannels *[]string
2020-10-19 09:26:23 +00:00
MatchDayId *int
Status *int
Minute *string
StartDate *time.Time
EndDate *time.Time
2020-10-19 09:26:23 +00:00
HomeScore *int
AwayScore *int
SetsScore *[]interface{}
Winner *int
ExtraTime *int
ShootoutHome *int
ShootoutAway *int
Squad *[]interface{}
Events *[]interface{}
Stats *[]interface{}
Live *[]interface{}
LastEvent *interface{}
LastEventDate *time.Time
2020-10-19 09:26:23 +00:00
Error *string
Trace *string
}
func NewMatch(league *League, teamHome *Team, teamAway *Team, playerHome *Player, playerAway *Player, round string, matchDay int, leg int) *Match {
return &Match{
League: league,
TeamHome: teamHome,
TeamAway: teamAway,
PlayerHome: playerHome,
PlayerAway: playerAway,
MatchDay: utils.IntPointer(matchDay),
Round: utils.StringPointer(round),
Leg: utils.IntPointer(leg),
}
}
type source struct {
League *League
URL *url.URL
matchDay int
round string
currentDate *time.Time
currentTimezone string
2020-10-19 09:26:23 +00:00
}
func newSource(league *League, scheme string, host string, path string, query string, round string, matchDay int) *source {
return &source{
League: league,
URL: &url.URL{
Scheme: scheme,
Host: host,
Path: path,
RawQuery: query,
},
round: round,
matchDay: matchDay,
}
}
func (s *source) GetMatches() ([]*Match, error) {
var matches []*Match
switch s.URL.Host {
case utils.HostMatchendirect:
doc, err := requests.GetDocumentFromURL(s.URL.String())
if err != nil {
return nil, err
}
doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) {
headCell := row.Find("th")
if len(headCell.Nodes) == 1 {
curDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(headCell.Text()))
if err != nil {
log.Fatalf("unexpected error while parsing date : %s", err)
return
}
s.currentDate = &curDate
s.currentTimezone = utils.FrenchTimezone(curDate)
2020-10-19 09:26:23 +00:00
} else {
teamHome := NewTeam(s.League.Gender, s.League.Sport, s.League.Country)
teamAway := NewTeam(s.League.Gender, s.League.Sport, s.League.Country)
match := NewMatch(s.League, teamHome, teamAway, &Player{}, &Player{}, s.round, s.matchDay, 0)
2020-10-19 09:26:23 +00:00
startTime := strings.TrimSpace(row.Find("td.lm1").Text())
if startTime == "-- : --" {
startTime = "00:00"
}
startDate, err := time.Parse(
"2006-01-02 15:04 MST",
s.currentDate.Format("2006-01-02 ") + startTime + " " + s.currentTimezone,
2020-10-19 09:26:23 +00:00
)
if err != nil {
match.Error = utils.StringPointer("parse date error")
match.Trace = utils.StringPointer(fmt.Sprint(err))
matches = append(matches, match)
return
}
match.StartDate = &startDate
homeNames := map[string]string{utils.KeywordMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq1").Text()), "*")}
2020-10-19 09:26:23 +00:00
match.TeamHome.Names = &homeNames
awayNames := map[string]string{utils.KeywordMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq2").Text()), "*")}
2020-10-19 09:26:23 +00:00
match.TeamAway.Names = &awayNames
basePath, ok := row.Find(".lm3 a").First().Attr("href")
if !ok {
match.Error = utils.StringPointer("unable to find href attr")
match.Trace = utils.StringPointer("unable to find href attribute for element '.lm3 a'")
matches = append(matches, match)
return
}
baseUrl := &url.URL{Scheme: s.URL.Scheme, Host: s.URL.Host, Path: basePath}
match.BaseUrl = utils.StringPointer(baseUrl.String())
if match.Round == nil && match.MatchDay == nil && s.League.MatchDays != nil && s.League.MatchesByMatchDay != nil {
if s.League.currentMatchDayId == 0 {
s.League.currentMatchDay ++
}
curMatchDay := s.League.currentMatchDay
curMatchDayId := s.League.currentMatchDayId
match.MatchDay = &curMatchDay
match.MatchDayId = &curMatchDayId
s.League.currentMatchDayId = (s.League.currentMatchDayId + 1) % *s.League.MatchesByMatchDay
}
matches = append(matches, match)
}
})
default:
return nil, fmt.Errorf("unexpected source url %s", s.URL.String())
}
return matches, nil
}
type League struct {
Id int
Sport *Sport
2020-10-19 09:26:23 +00:00
Country *Country
Name string
CleanName string
Gender *int
Degree *int
ScheduleUrl *string
RankingUrl *string
ChannelUrl *string
MatchDays *int
MatchesByMatchDay *int
TeamCount int
Rounds []string
Groups []string
Points interface{}
Promotions interface{}
Images interface{}
Schedule interface{}
Tags []string
CleanTags []string
NewsCount int
Error *string
Trace *string
2020-10-19 09:26:23 +00:00
currentMatchDay int
currentMatchDayId int
}
2020-10-19 09:26:23 +00:00
func (l *League) ListSources() ([]*source, error) {
var sources []*source
if l.ScheduleUrl == nil {
return nil, nil
}
doc, err := requests.GetDocumentFromURL(*l.ScheduleUrl)
if err != nil {
return nil, err
}
originUrl, err := url.Parse(*l.ScheduleUrl)
if err != nil {
return nil, err
}
switch originUrl.Host {
case utils.HostMatchendirect:
doc.Find(".fDate option").Each(func (i int, s *goquery.Selection) {
value, ok := s.Attr("value")
if !ok {
return
}
parsedDate := strings.Split(strings.Split(value, "/")[3], "-")
year := utils.AtoI(parsedDate[0])
week := utils.AtoI(parsedDate[1])
2020-10-19 09:26:23 +00:00
if (year == 2020 && week >= 34) || year > 2020 {
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, value, "", "", 0))
}
})
2020-10-19 09:26:23 +00:00
case utils.HostEurosport, utils.HostRugbyrama:
2020-10-19 09:26:23 +00:00
curRegexp := regexp.MustCompile(`(\d)+e\s+Journée`)
ajaxUrl, ok := doc.Find(".ajax-container").Attr("data-ajax-url")
if !ok {
return nil, fmt.Errorf("ajax-container url not found")
}
ajaxParsedUrl, err := url.Parse(ajaxUrl)
if err != nil {
return nil, err
}
ajaxQuery := ajaxParsedUrl.Query()
doc.Find("#results-match-nav .rounds-dropdown__round").Each(func (i int, s *goquery.Selection) {
2020-10-19 09:26:23 +00:00
matchDay := 0
round, _ := s.Attr("data-label")
reMatch := curRegexp.FindStringSubmatch(round)
if reMatch != nil {
2020-10-19 09:26:23 +00:00
round = ""
matchDay = utils.AtoI(reMatch[1])
}
roundId, _ := s.Attr("data-round-id")
ajaxQuery.Set("roundid", roundId)
2020-10-19 09:26:23 +00:00
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, ajaxParsedUrl.Path, ajaxQuery.Encode(), round, matchDay))
})
2020-10-19 09:26:23 +00:00
default:
return nil, fmt.Errorf("unknown source url : %s", *l.ScheduleUrl)
}
return sources, nil
}