package match import ( "fmt" "log" "net/url" "regexp" "strings" "time" "github.com/PuerkitoBio/goquery" "1bet.fr/scraper/requests" "1bet.fr/scraper/utils" ) const ( _ = iota GenderMale = iota ) const ( _ = iota StatusComing = iota ) type Sport struct { Id int Name string CleanName string } type Country struct { Id int } type Player struct { Id int FullName string CleanName string Gender int } type TeamImages struct { H30 string H50 string H80 string } type Team struct { Id int Sport *Sport Country *Country Name string CleanName string ShortName *string LongName *string Gender *int Names *map[string]string PlayersUrl *string Images *TeamImages Tags *[]string CleanTags *[]string NewsCount int Error *string Trace *string } func NewTeam(gender *int, sport *Sport, country *Country) *Team { return &Team{ Gender: gender, Images: &TeamImages{ H30: "t0-h30.svg", H50: "t0-h50.svg", H80: "t0-h80.svg", }, Sport: sport, Country: country, } } type Match struct { Id int League *League TeamHome *Team TeamAway *Team PlayerHome *Player PlayerAway *Player MatchDay *int Round *string Leg *int BaseUrl *string ScoreUrl *string LiveUrl *string TvChannels *[]string MatchDayId *int Status *int Minute *string StartDate *time.Time EndDate *time.Time HomeScore *int AwayScore *int SetsScore *[]interface{} Winner *int ExtraTime *int ShootoutHome *int ShootoutAway *int Squad *[]interface{} Events *[]interface{} Stats *[]interface{} Live *[]interface{} LastEvent *interface{} LastEventDate *time.Time Error *string Trace *string } func NewMatch(league *League, teamHome *Team, teamAway *Team, playerHome *Player, playerAway *Player, round string, matchDay int, leg int) *Match { return &Match{ League: league, TeamHome: teamHome, TeamAway: teamAway, PlayerHome: playerHome, PlayerAway: playerAway, MatchDay: utils.IntPointer(matchDay), Round: utils.StringPointer(round), Leg: utils.IntPointer(leg), } } type source struct { League *League URL *url.URL matchDay int round string currentDate *time.Time currentTimezone string } func newSource(league *League, scheme string, host string, path string, query string, round string, matchDay int) *source { return &source{ League: league, URL: &url.URL{ Scheme: scheme, Host: host, Path: path, RawQuery: query, }, round: round, matchDay: matchDay, } } func (s *source) GetMatches() ([]*Match, error) { var matches []*Match switch s.URL.Host { case utils.HostMatchendirect: doc, err := requests.GetDocumentFromURL(s.URL.String()) if err != nil { return nil, err } doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) { headCell := row.Find("th") if len(headCell.Nodes) == 1 { curDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(headCell.Text())) if err != nil { log.Fatalf("unexpected error while parsing date : %s", err) return } s.currentDate = &curDate s.currentTimezone = utils.FrenchTimezone(curDate) } else { teamHome := NewTeam(s.League.Gender, s.League.Sport, s.League.Country) teamAway := NewTeam(s.League.Gender, s.League.Sport, s.League.Country) match := NewMatch(s.League, teamHome, teamAway, &Player{}, &Player{}, s.round, s.matchDay, 0) startTime := strings.TrimSpace(row.Find("td.lm1").Text()) if startTime == "-- : --" { startTime = "00:00" } startDate, err := time.Parse( "2006-01-02 15:04 MST", s.currentDate.Format("2006-01-02 ") + startTime + " " + s.currentTimezone, ) if err != nil { match.Error = utils.StringPointer("parse date error") match.Trace = utils.StringPointer(fmt.Sprint(err)) matches = append(matches, match) return } match.StartDate = &startDate homeNames := map[string]string{utils.KeywordMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq1").Text()), "*")} match.TeamHome.Names = &homeNames awayNames := map[string]string{utils.KeywordMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq2").Text()), "*")} match.TeamAway.Names = &awayNames basePath, ok := row.Find(".lm3 a").First().Attr("href") if !ok { match.Error = utils.StringPointer("unable to find href attr") match.Trace = utils.StringPointer("unable to find href attribute for element '.lm3 a'") matches = append(matches, match) return } baseUrl := &url.URL{Scheme: s.URL.Scheme, Host: s.URL.Host, Path: basePath} match.BaseUrl = utils.StringPointer(baseUrl.String()) if match.Round == nil && match.MatchDay == nil && s.League.MatchDays != nil && s.League.MatchesByMatchDay != nil { if s.League.currentMatchDayId == 0 { s.League.currentMatchDay ++ } curMatchDay := s.League.currentMatchDay curMatchDayId := s.League.currentMatchDayId match.MatchDay = &curMatchDay match.MatchDayId = &curMatchDayId s.League.currentMatchDayId = (s.League.currentMatchDayId + 1) % *s.League.MatchesByMatchDay } matches = append(matches, match) } }) default: return nil, fmt.Errorf("unexpected source url %s", s.URL.String()) } return matches, nil } type League struct { Id int Sport *Sport Country *Country Name string CleanName string Gender *int Degree *int ScheduleUrl *string RankingUrl *string ChannelUrl *string MatchDays *int MatchesByMatchDay *int TeamCount int Rounds []string Groups []string Points interface{} Promotions interface{} Images interface{} Schedule interface{} Tags []string CleanTags []string NewsCount int Error *string Trace *string currentMatchDay int currentMatchDayId int } func (l *League) ListSources() ([]*source, error) { var sources []*source if l.ScheduleUrl == nil { return nil, nil } doc, err := requests.GetDocumentFromURL(*l.ScheduleUrl) if err != nil { return nil, err } originUrl, err := url.Parse(*l.ScheduleUrl) if err != nil { return nil, err } switch originUrl.Host { case utils.HostMatchendirect: doc.Find(".fDate option").Each(func (i int, s *goquery.Selection) { value, ok := s.Attr("value") if !ok { return } parsedDate := strings.Split(strings.Split(value, "/")[3], "-") year := utils.AtoI(parsedDate[0]) week := utils.AtoI(parsedDate[1]) if (year == 2020 && week >= 34) || year > 2020 { sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, value, "", "", 0)) } }) case utils.HostEurosport, utils.HostRugbyrama: curRegexp := regexp.MustCompile(`(\d)+e\s+Journée`) ajaxUrl, ok := doc.Find(".ajax-container").Attr("data-ajax-url") if !ok { return nil, fmt.Errorf("ajax-container url not found") } ajaxParsedUrl, err := url.Parse(ajaxUrl) if err != nil { return nil, err } ajaxQuery := ajaxParsedUrl.Query() doc.Find("#results-match-nav .rounds-dropdown__round").Each(func (i int, s *goquery.Selection) { matchDay := 0 round, _ := s.Attr("data-label") reMatch := curRegexp.FindStringSubmatch(round) if reMatch != nil { round = "" matchDay = utils.AtoI(reMatch[1]) } roundId, _ := s.Attr("data-round-id") ajaxQuery.Set("roundid", roundId) sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, ajaxParsedUrl.Path, ajaxQuery.Encode(), round, matchDay)) }) default: return nil, fmt.Errorf("unknown source url : %s", *l.ScheduleUrl) } return sources, nil }