2020-10-10 17:08:06 +00:00
|
|
|
|
package match
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
2020-10-19 09:26:23 +00:00
|
|
|
|
"log"
|
2020-10-10 17:08:06 +00:00
|
|
|
|
"net/url"
|
|
|
|
|
"regexp"
|
|
|
|
|
"strings"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
|
|
|
|
|
|
"1bet.fr/scraper/requests"
|
|
|
|
|
"1bet.fr/scraper/utils"
|
|
|
|
|
)
|
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
const (
|
|
|
|
|
_ = iota
|
|
|
|
|
GenderMale = iota
|
2020-12-02 16:58:49 +00:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
_ = iota
|
|
|
|
|
StatusComing = iota
|
2020-10-19 09:26:23 +00:00
|
|
|
|
)
|
|
|
|
|
|
2020-10-10 17:08:06 +00:00
|
|
|
|
type Sport struct {
|
|
|
|
|
Id int
|
|
|
|
|
Name string
|
|
|
|
|
CleanName string
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
type Country struct {
|
|
|
|
|
Id int
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type Player struct {
|
|
|
|
|
Id int
|
|
|
|
|
FullName string
|
|
|
|
|
CleanName string
|
|
|
|
|
Gender int
|
|
|
|
|
}
|
2020-10-10 17:08:06 +00:00
|
|
|
|
|
2020-12-02 16:58:49 +00:00
|
|
|
|
type TeamImages struct {
|
|
|
|
|
H30 string
|
|
|
|
|
H50 string
|
|
|
|
|
H80 string
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-10 17:08:06 +00:00
|
|
|
|
type Team struct {
|
|
|
|
|
Id int
|
2020-10-19 09:26:23 +00:00
|
|
|
|
Sport *Sport
|
|
|
|
|
Country *Country
|
2020-10-10 17:08:06 +00:00
|
|
|
|
|
|
|
|
|
Name string
|
|
|
|
|
CleanName string
|
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
ShortName *string
|
|
|
|
|
LongName *string
|
|
|
|
|
Gender *int
|
2020-10-10 17:08:06 +00:00
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
Names *map[string]string
|
|
|
|
|
PlayersUrl *string
|
2020-12-02 16:58:49 +00:00
|
|
|
|
Images *TeamImages
|
2020-10-10 17:08:06 +00:00
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
Tags *[]string
|
|
|
|
|
CleanTags *[]string
|
2020-10-10 17:08:06 +00:00
|
|
|
|
NewsCount int
|
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
Error *string
|
|
|
|
|
Trace *string
|
2020-10-10 17:08:06 +00:00
|
|
|
|
}
|
|
|
|
|
|
2020-12-02 16:58:49 +00:00
|
|
|
|
func NewTeam(gender *int, sport *Sport, country *Country) *Team {
|
|
|
|
|
return &Team{
|
|
|
|
|
Gender: gender,
|
|
|
|
|
Images: &TeamImages{
|
|
|
|
|
H30: "t0-h30.svg",
|
|
|
|
|
H50: "t0-h50.svg",
|
|
|
|
|
H80: "t0-h80.svg",
|
|
|
|
|
},
|
|
|
|
|
Sport: sport,
|
|
|
|
|
Country: country,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
type Match struct {
|
|
|
|
|
Id int
|
|
|
|
|
League *League
|
|
|
|
|
TeamHome *Team
|
|
|
|
|
TeamAway *Team
|
|
|
|
|
PlayerHome *Player
|
|
|
|
|
PlayerAway *Player
|
2020-10-10 17:08:06 +00:00
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
MatchDay *int
|
|
|
|
|
Round *string
|
|
|
|
|
Leg *int
|
2020-10-10 17:08:06 +00:00
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
BaseUrl *string
|
|
|
|
|
ScoreUrl *string
|
|
|
|
|
LiveUrl *string
|
|
|
|
|
TvChannels *[]string
|
2020-10-10 17:08:06 +00:00
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
MatchDayId *int
|
|
|
|
|
Status *int
|
|
|
|
|
Minute *string
|
2020-10-10 17:08:06 +00:00
|
|
|
|
StartDate *time.Time
|
|
|
|
|
EndDate *time.Time
|
2020-10-19 09:26:23 +00:00
|
|
|
|
HomeScore *int
|
|
|
|
|
AwayScore *int
|
|
|
|
|
SetsScore *[]interface{}
|
|
|
|
|
Winner *int
|
|
|
|
|
ExtraTime *int
|
|
|
|
|
ShootoutHome *int
|
|
|
|
|
ShootoutAway *int
|
|
|
|
|
|
|
|
|
|
Squad *[]interface{}
|
|
|
|
|
Events *[]interface{}
|
|
|
|
|
Stats *[]interface{}
|
|
|
|
|
Live *[]interface{}
|
|
|
|
|
LastEvent *interface{}
|
2020-10-10 17:08:06 +00:00
|
|
|
|
LastEventDate *time.Time
|
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
Error *string
|
|
|
|
|
Trace *string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func NewMatch(league *League, teamHome *Team, teamAway *Team, playerHome *Player, playerAway *Player, round string, matchDay int, leg int) *Match {
|
|
|
|
|
return &Match{
|
|
|
|
|
League: league,
|
|
|
|
|
TeamHome: teamHome,
|
|
|
|
|
TeamAway: teamAway,
|
|
|
|
|
PlayerHome: playerHome,
|
|
|
|
|
PlayerAway: playerAway,
|
|
|
|
|
MatchDay: utils.IntPointer(matchDay),
|
|
|
|
|
Round: utils.StringPointer(round),
|
|
|
|
|
Leg: utils.IntPointer(leg),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type source struct {
|
|
|
|
|
League *League
|
|
|
|
|
URL *url.URL
|
|
|
|
|
matchDay int
|
|
|
|
|
round string
|
|
|
|
|
|
|
|
|
|
currentDate *time.Time
|
2020-12-02 16:58:49 +00:00
|
|
|
|
currentTimezone string
|
2020-10-19 09:26:23 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func newSource(league *League, scheme string, host string, path string, query string, round string, matchDay int) *source {
|
|
|
|
|
return &source{
|
|
|
|
|
League: league,
|
|
|
|
|
URL: &url.URL{
|
|
|
|
|
Scheme: scheme,
|
|
|
|
|
Host: host,
|
|
|
|
|
Path: path,
|
|
|
|
|
RawQuery: query,
|
|
|
|
|
},
|
|
|
|
|
round: round,
|
|
|
|
|
matchDay: matchDay,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (s *source) GetMatches() ([]*Match, error) {
|
|
|
|
|
var matches []*Match
|
|
|
|
|
|
|
|
|
|
switch s.URL.Host {
|
|
|
|
|
case utils.HostMatchendirect:
|
|
|
|
|
doc, err := requests.GetDocumentFromURL(s.URL.String())
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) {
|
|
|
|
|
headCell := row.Find("th")
|
|
|
|
|
if len(headCell.Nodes) == 1 {
|
|
|
|
|
curDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(headCell.Text()))
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatalf("unexpected error while parsing date : %s", err)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
s.currentDate = &curDate
|
2020-12-02 16:58:49 +00:00
|
|
|
|
s.currentTimezone = utils.FrenchTimezone(curDate)
|
2020-10-19 09:26:23 +00:00
|
|
|
|
} else {
|
2020-12-02 16:58:49 +00:00
|
|
|
|
teamHome := NewTeam(s.League.Gender, s.League.Sport, s.League.Country)
|
|
|
|
|
teamAway := NewTeam(s.League.Gender, s.League.Sport, s.League.Country)
|
|
|
|
|
match := NewMatch(s.League, teamHome, teamAway, &Player{}, &Player{}, s.round, s.matchDay, 0)
|
2020-10-19 09:26:23 +00:00
|
|
|
|
|
|
|
|
|
startTime := strings.TrimSpace(row.Find("td.lm1").Text())
|
|
|
|
|
if startTime == "-- : --" {
|
|
|
|
|
startTime = "00:00"
|
|
|
|
|
}
|
|
|
|
|
startDate, err := time.Parse(
|
|
|
|
|
"2006-01-02 15:04 MST",
|
2020-12-02 16:58:49 +00:00
|
|
|
|
s.currentDate.Format("2006-01-02 ") + startTime + " " + s.currentTimezone,
|
2020-10-19 09:26:23 +00:00
|
|
|
|
)
|
|
|
|
|
if err != nil {
|
|
|
|
|
match.Error = utils.StringPointer("parse date error")
|
|
|
|
|
match.Trace = utils.StringPointer(fmt.Sprint(err))
|
|
|
|
|
matches = append(matches, match)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
match.StartDate = &startDate
|
|
|
|
|
|
2020-12-02 16:58:49 +00:00
|
|
|
|
homeNames := map[string]string{utils.KeywordMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq1").Text()), "*")}
|
2020-10-19 09:26:23 +00:00
|
|
|
|
match.TeamHome.Names = &homeNames
|
2020-12-02 16:58:49 +00:00
|
|
|
|
awayNames := map[string]string{utils.KeywordMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq2").Text()), "*")}
|
2020-10-19 09:26:23 +00:00
|
|
|
|
match.TeamAway.Names = &awayNames
|
|
|
|
|
|
|
|
|
|
basePath, ok := row.Find(".lm3 a").First().Attr("href")
|
|
|
|
|
if !ok {
|
|
|
|
|
match.Error = utils.StringPointer("unable to find href attr")
|
|
|
|
|
match.Trace = utils.StringPointer("unable to find href attribute for element '.lm3 a'")
|
|
|
|
|
matches = append(matches, match)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
baseUrl := &url.URL{Scheme: s.URL.Scheme, Host: s.URL.Host, Path: basePath}
|
|
|
|
|
match.BaseUrl = utils.StringPointer(baseUrl.String())
|
|
|
|
|
|
|
|
|
|
if match.Round == nil && match.MatchDay == nil && s.League.MatchDays != nil && s.League.MatchesByMatchDay != nil {
|
|
|
|
|
if s.League.currentMatchDayId == 0 {
|
|
|
|
|
s.League.currentMatchDay ++
|
|
|
|
|
}
|
|
|
|
|
curMatchDay := s.League.currentMatchDay
|
|
|
|
|
curMatchDayId := s.League.currentMatchDayId
|
|
|
|
|
match.MatchDay = &curMatchDay
|
|
|
|
|
match.MatchDayId = &curMatchDayId
|
|
|
|
|
s.League.currentMatchDayId = (s.League.currentMatchDayId + 1) % *s.League.MatchesByMatchDay
|
|
|
|
|
}
|
|
|
|
|
matches = append(matches, match)
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return nil, fmt.Errorf("unexpected source url %s", s.URL.String())
|
|
|
|
|
}
|
|
|
|
|
return matches, nil
|
2020-10-10 17:08:06 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type League struct {
|
|
|
|
|
Id int
|
|
|
|
|
Sport *Sport
|
2020-10-19 09:26:23 +00:00
|
|
|
|
Country *Country
|
2020-10-10 17:08:06 +00:00
|
|
|
|
|
|
|
|
|
Name string
|
|
|
|
|
CleanName string
|
|
|
|
|
Gender *int
|
|
|
|
|
Degree *int
|
|
|
|
|
|
|
|
|
|
ScheduleUrl *string
|
|
|
|
|
RankingUrl *string
|
|
|
|
|
ChannelUrl *string
|
|
|
|
|
|
|
|
|
|
MatchDays *int
|
|
|
|
|
MatchesByMatchDay *int
|
|
|
|
|
TeamCount int
|
|
|
|
|
|
|
|
|
|
Rounds []string
|
|
|
|
|
Groups []string
|
|
|
|
|
Points interface{}
|
|
|
|
|
Promotions interface{}
|
|
|
|
|
|
|
|
|
|
Images interface{}
|
|
|
|
|
Schedule interface{}
|
|
|
|
|
|
|
|
|
|
Tags []string
|
|
|
|
|
CleanTags []string
|
|
|
|
|
NewsCount int
|
|
|
|
|
|
|
|
|
|
Error *string
|
|
|
|
|
Trace *string
|
2020-10-19 09:26:23 +00:00
|
|
|
|
|
|
|
|
|
currentMatchDay int
|
|
|
|
|
currentMatchDayId int
|
2020-10-10 17:08:06 +00:00
|
|
|
|
}
|
|
|
|
|
|
2020-10-19 09:26:23 +00:00
|
|
|
|
func (l *League) ListSources() ([]*source, error) {
|
|
|
|
|
var sources []*source
|
2020-10-10 17:08:06 +00:00
|
|
|
|
|
|
|
|
|
if l.ScheduleUrl == nil {
|
|
|
|
|
return nil, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
doc, err := requests.GetDocumentFromURL(*l.ScheduleUrl)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
originUrl, err := url.Parse(*l.ScheduleUrl)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch originUrl.Host {
|
|
|
|
|
case utils.HostMatchendirect:
|
|
|
|
|
doc.Find(".fDate option").Each(func (i int, s *goquery.Selection) {
|
|
|
|
|
value, ok := s.Attr("value")
|
|
|
|
|
if !ok {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
parsedDate := strings.Split(strings.Split(value, "/")[3], "-")
|
|
|
|
|
year := utils.AtoI(parsedDate[0])
|
|
|
|
|
week := utils.AtoI(parsedDate[1])
|
2020-10-19 09:26:23 +00:00
|
|
|
|
if (year == 2020 && week >= 34) || year > 2020 {
|
|
|
|
|
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, value, "", "", 0))
|
2020-10-10 17:08:06 +00:00
|
|
|
|
}
|
|
|
|
|
})
|
2020-10-19 09:26:23 +00:00
|
|
|
|
|
2020-10-10 17:08:06 +00:00
|
|
|
|
case utils.HostEurosport, utils.HostRugbyrama:
|
2020-10-19 09:26:23 +00:00
|
|
|
|
curRegexp := regexp.MustCompile(`(\d)+e\s+Journée`)
|
2020-10-10 17:08:06 +00:00
|
|
|
|
|
|
|
|
|
ajaxUrl, ok := doc.Find(".ajax-container").Attr("data-ajax-url")
|
|
|
|
|
if !ok {
|
|
|
|
|
return nil, fmt.Errorf("ajax-container url not found")
|
|
|
|
|
}
|
|
|
|
|
ajaxParsedUrl, err := url.Parse(ajaxUrl)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
ajaxQuery := ajaxParsedUrl.Query()
|
|
|
|
|
|
|
|
|
|
doc.Find("#results-match-nav .rounds-dropdown__round").Each(func (i int, s *goquery.Selection) {
|
2020-10-19 09:26:23 +00:00
|
|
|
|
matchDay := 0
|
|
|
|
|
round, _ := s.Attr("data-label")
|
|
|
|
|
reMatch := curRegexp.FindStringSubmatch(round)
|
2020-10-10 17:08:06 +00:00
|
|
|
|
if reMatch != nil {
|
2020-10-19 09:26:23 +00:00
|
|
|
|
round = ""
|
|
|
|
|
matchDay = utils.AtoI(reMatch[1])
|
2020-10-10 17:08:06 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
roundId, _ := s.Attr("data-round-id")
|
|
|
|
|
ajaxQuery.Set("roundid", roundId)
|
2020-10-19 09:26:23 +00:00
|
|
|
|
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, ajaxParsedUrl.Path, ajaxQuery.Encode(), round, matchDay))
|
2020-10-10 17:08:06 +00:00
|
|
|
|
})
|
2020-10-19 09:26:23 +00:00
|
|
|
|
|
2020-10-10 17:08:06 +00:00
|
|
|
|
default:
|
|
|
|
|
return nil, fmt.Errorf("unknown source url : %s", *l.ScheduleUrl)
|
|
|
|
|
}
|
|
|
|
|
return sources, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|