New function updateSchedule
This commit is contained in:
parent
bd0366fdc3
commit
f07ed95702
1
go.mod
1
go.mod
|
@ -4,6 +4,7 @@ go 1.12
|
|||
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.6.0
|
||||
github.com/go-redis/redis v6.15.9+incompatible
|
||||
github.com/lib/pq v1.8.0
|
||||
github.com/mmcdole/gofeed v1.1.0
|
||||
golang.org/x/net v0.0.0-20201009032441-dbdefad45b89
|
||||
|
|
2
go.sum
2
go.sum
|
@ -7,6 +7,8 @@ github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9Pq
|
|||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg=
|
||||
github.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
|
||||
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
|
|
67
main.go
67
main.go
|
@ -1,13 +1,13 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"1bet.fr/scraper/utils"
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/lib/pq"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"github.com/lib/pq"
|
||||
|
||||
"1bet.fr/scraper/match"
|
||||
"1bet.fr/scraper/news"
|
||||
"1bet.fr/scraper/postgres"
|
||||
|
@ -84,38 +84,59 @@ func updateSchedule() {
|
|||
}
|
||||
|
||||
waitGroup := sync.WaitGroup{}
|
||||
sourceChannel := make(chan *match.Source)
|
||||
leagueChannel := make(chan *match.League)
|
||||
for i := 0; i < nbProcesses; i++ {
|
||||
waitGroup.Add(1)
|
||||
go func(sc chan *match.Source, wg *sync.WaitGroup) {
|
||||
go func(lc chan *match.League, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
for s := range sc {
|
||||
matches, err := s.GetMatches()
|
||||
for l := range lc {
|
||||
sources, err := l.ListSources()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
l.Error = utils.StringPointer("list sources error")
|
||||
l.Trace = utils.StringPointer(fmt.Sprint(err))
|
||||
if updated, err := postgres.UpdateLeague(l); err != nil {
|
||||
log.Fatalf("error while updating league : %s", err)
|
||||
} else if updated != 1 {
|
||||
log.Fatalf("error while updating league : %d league(s) updated", updated)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
for _, m := range matches {
|
||||
fmt.Println(m)
|
||||
//if err = postgres.InsertMatch(m); err != nil {
|
||||
// log.Fatal(err)
|
||||
//}
|
||||
for _, s := range sources {
|
||||
log.Printf("[+] Parsing source %s", s.URL)
|
||||
matches, err := s.GetMatches()
|
||||
if err != nil {
|
||||
log.Printf("[-] error while getting matches from league source %s : %s", s.URL, err)
|
||||
s.League.Error = utils.StringPointer("league source error")
|
||||
s.League.Trace = utils.StringPointer(fmt.Sprint(err))
|
||||
if updated, err := postgres.UpdateLeague(s.League); err != nil {
|
||||
log.Fatalf("error while updating league : %s", err)
|
||||
} else if updated != 1 {
|
||||
log.Fatalf("error while updating league : %d league(s) updated", updated)
|
||||
}
|
||||
break
|
||||
}
|
||||
for _, m := range matches {
|
||||
if err := postgres.InsertTeamBySourceName(m.TeamHome, s.League); err != nil {
|
||||
log.Fatalf("error while saving team home : %s", err)
|
||||
}
|
||||
if err := postgres.InsertTeamBySourceName(m.TeamAway, s.League); err != nil {
|
||||
log.Fatalf("error while saving team away : %s", err)
|
||||
}
|
||||
if err := postgres.InsertMatch(m); err != nil {
|
||||
log.Fatalf("error while saving match : %s", err)
|
||||
}
|
||||
log.Printf("New match #%d (%s - %s)", m.Id, m.TeamHome.Name, m.TeamAway.Name)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
}(sourceChannel, &waitGroup)
|
||||
}(leagueChannel, &waitGroup)
|
||||
}
|
||||
|
||||
for _, league := range leagues {
|
||||
sources, err := league.ListSources()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
for _, s := range sources {
|
||||
log.Printf("Adding source %s", s.Url.String())
|
||||
sourceChannel <- s
|
||||
}
|
||||
leagueChannel <- league
|
||||
}
|
||||
close(leagueChannel)
|
||||
waitGroup.Wait()
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
|
336
match/match.go
336
match/match.go
|
@ -2,6 +2,7 @@ package match
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
@ -13,122 +14,206 @@ import (
|
|||
"1bet.fr/scraper/utils"
|
||||
)
|
||||
|
||||
const (
|
||||
_ = iota
|
||||
GenderMale = iota
|
||||
//GenderFemale = iota
|
||||
)
|
||||
|
||||
type Sport struct {
|
||||
Id int
|
||||
Name string
|
||||
CleanName string
|
||||
}
|
||||
|
||||
const (
|
||||
_ = iota
|
||||
GenderMale = iota
|
||||
GenderFemale = iota
|
||||
)
|
||||
type Country struct {
|
||||
Id int
|
||||
}
|
||||
|
||||
type Player struct {
|
||||
Id int
|
||||
FullName string
|
||||
CleanName string
|
||||
Gender int
|
||||
}
|
||||
|
||||
type Team struct {
|
||||
Id int
|
||||
SportId int
|
||||
CountryId int
|
||||
Sport *Sport
|
||||
Country *Country
|
||||
|
||||
Name string
|
||||
CleanName string
|
||||
|
||||
ShortName string
|
||||
LongName string
|
||||
Gender int
|
||||
ShortName *string
|
||||
LongName *string
|
||||
Gender *int
|
||||
|
||||
Names interface{}
|
||||
Url string
|
||||
Images interface{}
|
||||
Names *map[string]string
|
||||
PlayersUrl *string
|
||||
Images *interface{}
|
||||
|
||||
Tags []string
|
||||
CleanTags []string
|
||||
Tags *[]string
|
||||
CleanTags *[]string
|
||||
NewsCount int
|
||||
|
||||
Error string
|
||||
Trace string
|
||||
Error *string
|
||||
Trace *string
|
||||
}
|
||||
|
||||
const (
|
||||
_ = iota
|
||||
LegFirst = iota
|
||||
LegSecond = iota
|
||||
LegReplay = iota
|
||||
)
|
||||
|
||||
const (
|
||||
_ = iota
|
||||
StatusFirstTime = iota
|
||||
StatusHalfTime = iota
|
||||
StatusSecondTime = iota
|
||||
StatusFirstExtra = iota
|
||||
StatusHalfExtra = iota
|
||||
StatusSecondExtra = iota
|
||||
StatusShootout = iota
|
||||
StatusWaitScores = iota
|
||||
StatusOver = iota
|
||||
StatusPostponed = iota
|
||||
StatusCancelled = iota
|
||||
)
|
||||
|
||||
const (
|
||||
_ = iota
|
||||
WinnerHome = iota
|
||||
WinnerAway = iota
|
||||
WinnerDraw = iota
|
||||
)
|
||||
|
||||
const (
|
||||
_ = iota
|
||||
ExtraTimeExtraTime = iota
|
||||
ExtraTimeShootout = iota
|
||||
)
|
||||
|
||||
type Match struct {
|
||||
Id int
|
||||
LeagueId int
|
||||
TeamHomeId int
|
||||
TeamAwayId int
|
||||
PlayerHomeId int
|
||||
PlayerAwayId int
|
||||
League *League
|
||||
TeamHome *Team
|
||||
TeamAway *Team
|
||||
PlayerHome *Player
|
||||
PlayerAway *Player
|
||||
|
||||
MatchDay int
|
||||
MatchDayId int
|
||||
Round string
|
||||
Leg int
|
||||
MatchDay *int
|
||||
Round *string
|
||||
Leg *int
|
||||
|
||||
BaseUrl string
|
||||
ScoreUrl string
|
||||
LiveUrl string
|
||||
TvChannels []string
|
||||
BaseUrl *string
|
||||
ScoreUrl *string
|
||||
LiveUrl *string
|
||||
TvChannels *[]string
|
||||
|
||||
Status int
|
||||
Minute int
|
||||
MatchDayId *int
|
||||
Status *int
|
||||
Minute *string
|
||||
StartDate *time.Time
|
||||
EndDate *time.Time
|
||||
HomeScore int
|
||||
AwayScore int
|
||||
SetsScore int
|
||||
Winner int
|
||||
ExtraTime int
|
||||
ShootoutHome int
|
||||
ShootoutAway int
|
||||
HomeScore *int
|
||||
AwayScore *int
|
||||
SetsScore *[]interface{}
|
||||
Winner *int
|
||||
ExtraTime *int
|
||||
ShootoutHome *int
|
||||
ShootoutAway *int
|
||||
|
||||
Squad []interface{}
|
||||
Events []interface{}
|
||||
Stats []interface{}
|
||||
Live []interface{}
|
||||
LastEvent interface{}
|
||||
Squad *[]interface{}
|
||||
Events *[]interface{}
|
||||
Stats *[]interface{}
|
||||
Live *[]interface{}
|
||||
LastEvent *interface{}
|
||||
LastEventDate *time.Time
|
||||
|
||||
Error string
|
||||
Trace string
|
||||
Error *string
|
||||
Trace *string
|
||||
}
|
||||
|
||||
func NewMatch(league *League, teamHome *Team, teamAway *Team, playerHome *Player, playerAway *Player, round string, matchDay int, leg int) *Match {
|
||||
return &Match{
|
||||
League: league,
|
||||
TeamHome: teamHome,
|
||||
TeamAway: teamAway,
|
||||
PlayerHome: playerHome,
|
||||
PlayerAway: playerAway,
|
||||
MatchDay: utils.IntPointer(matchDay),
|
||||
Round: utils.StringPointer(round),
|
||||
Leg: utils.IntPointer(leg),
|
||||
}
|
||||
}
|
||||
|
||||
type source struct {
|
||||
League *League
|
||||
URL *url.URL
|
||||
matchDay int
|
||||
round string
|
||||
|
||||
currentDate *time.Time
|
||||
}
|
||||
|
||||
func newSource(league *League, scheme string, host string, path string, query string, round string, matchDay int) *source {
|
||||
return &source{
|
||||
League: league,
|
||||
URL: &url.URL{
|
||||
Scheme: scheme,
|
||||
Host: host,
|
||||
Path: path,
|
||||
RawQuery: query,
|
||||
},
|
||||
round: round,
|
||||
matchDay: matchDay,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *source) GetMatches() ([]*Match, error) {
|
||||
var matches []*Match
|
||||
|
||||
switch s.URL.Host {
|
||||
case utils.HostMatchendirect:
|
||||
doc, err := requests.GetDocumentFromURL(s.URL.String())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) {
|
||||
headCell := row.Find("th")
|
||||
if len(headCell.Nodes) == 1 {
|
||||
curDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(headCell.Text()))
|
||||
if err != nil {
|
||||
log.Fatalf("unexpected error while parsing date : %s", err)
|
||||
return
|
||||
}
|
||||
s.currentDate = &curDate
|
||||
} else {
|
||||
match := NewMatch(s.League, &Team{Gender: s.League.Gender}, &Team{Gender: s.League.Gender}, &Player{}, &Player{}, s.round, s.matchDay, 0)
|
||||
|
||||
startTime := strings.TrimSpace(row.Find("td.lm1").Text())
|
||||
if startTime == "-- : --" {
|
||||
startTime = "00:00"
|
||||
}
|
||||
startDate, err := time.Parse(
|
||||
"2006-01-02 15:04 MST",
|
||||
s.currentDate.Format("2006-01-02 ") + startTime + " CEST",
|
||||
)
|
||||
if err != nil {
|
||||
match.Error = utils.StringPointer("parse date error")
|
||||
match.Trace = utils.StringPointer(fmt.Sprint(err))
|
||||
matches = append(matches, match)
|
||||
return
|
||||
}
|
||||
match.StartDate = &startDate
|
||||
|
||||
homeNames := map[string]string{utils.HostMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq1").Text()), "*")}
|
||||
match.TeamHome.Names = &homeNames
|
||||
awayNames := map[string]string{utils.HostMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq2").Text()), "*")}
|
||||
match.TeamAway.Names = &awayNames
|
||||
|
||||
basePath, ok := row.Find(".lm3 a").First().Attr("href")
|
||||
if !ok {
|
||||
match.Error = utils.StringPointer("unable to find href attr")
|
||||
match.Trace = utils.StringPointer("unable to find href attribute for element '.lm3 a'")
|
||||
matches = append(matches, match)
|
||||
return
|
||||
}
|
||||
baseUrl := &url.URL{Scheme: s.URL.Scheme, Host: s.URL.Host, Path: basePath}
|
||||
match.BaseUrl = utils.StringPointer(baseUrl.String())
|
||||
|
||||
if match.Round == nil && match.MatchDay == nil && s.League.MatchDays != nil && s.League.MatchesByMatchDay != nil {
|
||||
if s.League.currentMatchDayId == 0 {
|
||||
s.League.currentMatchDay ++
|
||||
}
|
||||
curMatchDay := s.League.currentMatchDay
|
||||
curMatchDayId := s.League.currentMatchDayId
|
||||
match.MatchDay = &curMatchDay
|
||||
match.MatchDayId = &curMatchDayId
|
||||
s.League.currentMatchDayId = (s.League.currentMatchDayId + 1) % *s.League.MatchesByMatchDay
|
||||
}
|
||||
matches = append(matches, match)
|
||||
}
|
||||
})
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected source url %s", s.URL.String())
|
||||
}
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
type League struct {
|
||||
Id int
|
||||
Sport *Sport
|
||||
CountryId int
|
||||
Country *Country
|
||||
|
||||
Name string
|
||||
CleanName string
|
||||
|
@ -140,7 +225,6 @@ type League struct {
|
|||
ChannelUrl *string
|
||||
|
||||
MatchDays *int
|
||||
CurrentMatchDay *int
|
||||
MatchesByMatchDay *int
|
||||
TeamCount int
|
||||
|
||||
|
@ -158,10 +242,13 @@ type League struct {
|
|||
|
||||
Error *string
|
||||
Trace *string
|
||||
|
||||
currentMatchDay int
|
||||
currentMatchDayId int
|
||||
}
|
||||
|
||||
func (l *League) ListSources() ([]*Source, error) {
|
||||
var sources []*Source
|
||||
func (l *League) ListSources() ([]*source, error) {
|
||||
var sources []*source
|
||||
|
||||
if l.ScheduleUrl == nil {
|
||||
return nil, nil
|
||||
|
@ -187,19 +274,13 @@ func (l *League) ListSources() ([]*Source, error) {
|
|||
parsedDate := strings.Split(strings.Split(value, "/")[3], "-")
|
||||
year := utils.AtoI(parsedDate[0])
|
||||
week := utils.AtoI(parsedDate[1])
|
||||
if year >= 2020 && week >= 34 {
|
||||
sources = append(sources, &Source{
|
||||
League: l,
|
||||
Url: &url.URL{
|
||||
Scheme: originUrl.Scheme,
|
||||
Host: originUrl.Host,
|
||||
Path: value,
|
||||
},
|
||||
})
|
||||
if (year == 2020 && week >= 34) || year > 2020 {
|
||||
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, value, "", "", 0))
|
||||
}
|
||||
})
|
||||
|
||||
case utils.HostEurosport, utils.HostRugbyrama:
|
||||
eurosportRegexp := regexp.MustCompile(`(\d)+e\s+Journée`)
|
||||
curRegexp := regexp.MustCompile(`(\d)+e\s+Journée`)
|
||||
|
||||
ajaxUrl, ok := doc.Find(".ajax-container").Attr("data-ajax-url")
|
||||
if !ok {
|
||||
|
@ -212,74 +293,23 @@ func (l *League) ListSources() ([]*Source, error) {
|
|||
ajaxQuery := ajaxParsedUrl.Query()
|
||||
|
||||
doc.Find("#results-match-nav .rounds-dropdown__round").Each(func (i int, s *goquery.Selection) {
|
||||
var round *string
|
||||
var matchDay *int
|
||||
|
||||
roundStr, _ := s.Attr("data-label")
|
||||
reMatch := eurosportRegexp.FindStringSubmatch(roundStr)
|
||||
matchDay := 0
|
||||
round, _ := s.Attr("data-label")
|
||||
reMatch := curRegexp.FindStringSubmatch(round)
|
||||
if reMatch != nil {
|
||||
mdayInt := utils.AtoI(reMatch[1])
|
||||
matchDay = &mdayInt
|
||||
} else {
|
||||
round = &roundStr
|
||||
round = ""
|
||||
matchDay = utils.AtoI(reMatch[1])
|
||||
}
|
||||
|
||||
roundId, _ := s.Attr("data-round-id")
|
||||
ajaxQuery.Set("roundid", roundId)
|
||||
|
||||
sources = append(sources, &Source{
|
||||
League: l,
|
||||
Url: &url.URL{
|
||||
Scheme: originUrl.Scheme,
|
||||
Host: originUrl.Host,
|
||||
Path: ajaxParsedUrl.Path,
|
||||
RawQuery: ajaxQuery.Encode(),
|
||||
},
|
||||
Round: round,
|
||||
MatchDay: matchDay,
|
||||
})
|
||||
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, ajaxParsedUrl.Path, ajaxQuery.Encode(), round, matchDay))
|
||||
})
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown source url : %s", *l.ScheduleUrl)
|
||||
}
|
||||
return sources, nil
|
||||
}
|
||||
|
||||
type Source struct {
|
||||
League *League
|
||||
Url *url.URL
|
||||
MatchDay *int
|
||||
Round *string
|
||||
currentDate *time.Time
|
||||
}
|
||||
|
||||
func (s *Source) GetMatches() ([]*Match, error) {
|
||||
var matches []*Match
|
||||
|
||||
switch s.Url.Host {
|
||||
case utils.HostMatchendirect:
|
||||
doc, err := requests.GetDocumentFromURL(s.Url.String())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) {
|
||||
row.Children().Each(func (j int, col *goquery.Selection) {
|
||||
colspan, ok := col.Attr("colspan")
|
||||
if ok && colspan == "4" {
|
||||
currentDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(col.Text()))
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return
|
||||
}
|
||||
s.currentDate = ¤tDate
|
||||
fmt.Println(s.currentDate)
|
||||
}
|
||||
})
|
||||
})
|
||||
case utils.HostEurosport, utils.HostRugbyrama:
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected source url %s", s.Url.String())
|
||||
}
|
||||
return matches, nil
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package match
|
||||
|
||||
import (
|
||||
"1bet.fr/scraper/utils"
|
||||
"net/url"
|
||||
"strings"
|
||||
|
||||
|
@ -21,8 +22,8 @@ func TestLeague_ListSources(t *testing.T) {
|
|||
t.Errorf("no sources found")
|
||||
}
|
||||
for _, s := range sources {
|
||||
if !strings.HasPrefix(s.Url.String(), "http://www.matchendirect.fr/france/ligue-1/") {
|
||||
t.Errorf("unexpected source url %s", s.Url)
|
||||
if !strings.HasPrefix(s.URL.String(), "http://www.matchendirect.fr/france/ligue-1/") {
|
||||
t.Errorf("unexpected source url %s", s.URL)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -39,8 +40,8 @@ func TestLeague_ListSources(t *testing.T) {
|
|||
t.Errorf("no sources found")
|
||||
}
|
||||
for _, s := range sources {
|
||||
if !strings.HasPrefix(s.Url.String(), "https://www.eurosport.fr/") {
|
||||
t.Errorf("unexpected source url %s", s.Url)
|
||||
if !strings.HasPrefix(s.URL.String(), "https://www.eurosport.fr/") {
|
||||
t.Errorf("unexpected source url %s", s.URL)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -57,17 +58,47 @@ func TestLeague_ListSources(t *testing.T) {
|
|||
t.Errorf("no sources found")
|
||||
}
|
||||
for _, s := range sources {
|
||||
if !strings.HasPrefix(s.Url.String(), "https://www.rugbyrama.fr/") {
|
||||
t.Errorf("unexpected source url %s", s.Url)
|
||||
if !strings.HasPrefix(s.URL.String(), "https://www.rugbyrama.fr/") {
|
||||
t.Errorf("unexpected source url %s", s.URL)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSource_GetMatches(t *testing.T) {
|
||||
sourceUrl, _ := url.Parse("https://www.matchendirect.fr/france/ligue-1/2020-37/")
|
||||
source := &Source{
|
||||
League: &League{Id: 1},
|
||||
Url: sourceUrl,
|
||||
league := &League{Id: 1, MatchDays: utils.IntPointer(38), MatchesByMatchDay: utils.IntPointer(10)}
|
||||
source := newSource(league, sourceUrl.Scheme, sourceUrl.Host, sourceUrl.Path, "", "", 0)
|
||||
matches, err := source.GetMatches()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
for _, m := range matches {
|
||||
if m.League == nil {
|
||||
t.Error("unexpected nil match.League")
|
||||
}
|
||||
if m.StartDate == nil {
|
||||
t.Error("unexpected nil match.StartDate")
|
||||
}
|
||||
if m.TeamHome.Names == nil {
|
||||
t.Error("unexpected nil match.TeamHome.Names")
|
||||
}
|
||||
if m.TeamAway.Names == nil {
|
||||
t.Error("unexpected nil match.TeamAway.Names")
|
||||
}
|
||||
if m.PlayerHome == nil {
|
||||
t.Error("unexpected nil match.PlayerHome")
|
||||
}
|
||||
if m.PlayerAway == nil {
|
||||
t.Error("unexpected nil match.PlayerAway")
|
||||
}
|
||||
if m.MatchDay == nil {
|
||||
t.Error("unexpected nil match.MatchDay")
|
||||
}
|
||||
if m.MatchDayId == nil {
|
||||
t.Error("unexpected nil match.MatchDayId")
|
||||
}
|
||||
if m.Error != nil {
|
||||
t.Errorf("unexpected not nil match.Error : %s", *m.Error)
|
||||
}
|
||||
}
|
||||
source.GetMatches()
|
||||
}
|
||||
|
|
|
@ -137,7 +137,8 @@ func (n *News) Feed() error {
|
|||
n.Author = utils.StringPointer(doc.Find(".article__author a").Text())
|
||||
|
||||
default:
|
||||
n.Error = utils.StringPointer("unknown host " + parsedLink.Host)
|
||||
n.Error = utils.StringPointer("unknown link host")
|
||||
n.Trace = utils.StringPointer("unknown link host : " + n.Link)
|
||||
}
|
||||
|
||||
if n.Content == nil {
|
||||
|
|
|
@ -94,7 +94,7 @@ func TestNews_Feed(t *testing.T) {
|
|||
t.Errorf("cleanTags does not contain football")
|
||||
}
|
||||
|
||||
t.Logf("testing feed from Foot Mercato")
|
||||
t.Logf("testing feed from Rugbyrama")
|
||||
n = &News{
|
||||
Source: &Source{Sport: &Sport{Name: "Rugby", CleanName: "rugby"}},
|
||||
Link: "https://www.rugbyrama.fr/rugby/top-14/2018-2019/top-14-face-au-racing-92-toulouse-n-aura-pas-de-marge-de-manoeuvre_sto7939622/story.shtml",
|
||||
|
@ -136,10 +136,10 @@ func TestSource_ListNews(t *testing.T) {
|
|||
}
|
||||
for _, n := range newsList {
|
||||
if n.Title == "" {
|
||||
t.Errorf("unexpected empty title")
|
||||
t.Errorf("unexpected empty news title")
|
||||
}
|
||||
if n.Image == nil {
|
||||
t.Errorf("unexpected nil image")
|
||||
if n.Link == "" {
|
||||
t.Errorf("unexpected empty news link")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,13 @@
|
|||
package postgres
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"log"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/lib/pq"
|
||||
|
||||
|
@ -46,6 +49,25 @@ func aValue(a *[]string) interface{} {
|
|||
return pq.Array(*a)
|
||||
}
|
||||
|
||||
func foreignId(i int) interface{} {
|
||||
if i == 0 {
|
||||
return nil
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func concatWS(a []interface{}, s string) string {
|
||||
var b []string
|
||||
for _, x := range a {
|
||||
if x == nil {
|
||||
b = append(b, "")
|
||||
} else {
|
||||
b = append(b, fmt.Sprint(x))
|
||||
}
|
||||
}
|
||||
return strings.Join(b, s)
|
||||
}
|
||||
|
||||
var pg *Postgres
|
||||
|
||||
func init() {
|
||||
|
@ -131,9 +153,9 @@ func ListLeagues() ([]*match.League, error) {
|
|||
}
|
||||
|
||||
for rows.Next() {
|
||||
league := &match.League{Sport: &match.Sport{}}
|
||||
league := &match.League{Sport: &match.Sport{}, Country: &match.Country{}}
|
||||
if err = rows.Scan(
|
||||
&league.Id, &league.Sport.Id, &league.CountryId, &league.Name, &league.CleanName, &league.Gender,
|
||||
&league.Id, &league.Sport.Id, &league.Country.Id, &league.Name, &league.CleanName, &league.Gender,
|
||||
&league.ScheduleUrl, &league.RankingUrl, &league.ChannelUrl,
|
||||
&league.MatchDays, &league.MatchesByMatchDay, pq.Array(&league.Rounds), pq.Array(&league.Groups),
|
||||
&league.Sport.Name, &league.Sport.CleanName,
|
||||
|
@ -145,9 +167,22 @@ func ListLeagues() ([]*match.League, error) {
|
|||
return leagues, nil
|
||||
}
|
||||
|
||||
func UpdateLeague(l *match.League) (int64, error) {
|
||||
res, err := pg.psqlConn.Exec(`
|
||||
UPDATE mainapp_league
|
||||
SET error = $1, trace = $2
|
||||
WHERE id = $3
|
||||
`, sValue(l.Error), sValue(l.Trace), l.Id,
|
||||
)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return res.RowsAffected()
|
||||
}
|
||||
|
||||
func InsertNews(n *news.News) error {
|
||||
return pg.psqlConn.QueryRow(`
|
||||
INSERT INTO public.mainapp_news
|
||||
INSERT INTO mainapp_news
|
||||
(title, clean_title, link, pub_date, description, image, teaser, author,
|
||||
content, redirect, haystack, tags, clean_tags, error, trace,
|
||||
league_id, source_id, team_id)
|
||||
|
@ -165,7 +200,7 @@ func InsertNews(n *news.News) error {
|
|||
|
||||
func UpdateNews(n *news.News) (int64, error) {
|
||||
res, err := pg.psqlConn.Exec(`
|
||||
UPDATE public.mainapp_news
|
||||
UPDATE mainapp_news
|
||||
SET title = $1, clean_title = $2, pub_date = $3, link = $4, description = $5,
|
||||
image = $6, teaser = $7, author = $8, content = $9, redirect = $10,
|
||||
haystack = $11, tags = $12, clean_tags = $13, error = $14, trace = $15,
|
||||
|
@ -185,7 +220,7 @@ func UpdateNews(n *news.News) (int64, error) {
|
|||
|
||||
func DeleteNews(n *news.News) (int64, error) {
|
||||
res, err := pg.psqlConn.Exec(`
|
||||
DELETE FROM public.mainapp_news
|
||||
DELETE FROM mainapp_news
|
||||
WHERE id = $1
|
||||
`, n.Id)
|
||||
if err != nil {
|
||||
|
@ -193,3 +228,84 @@ func DeleteNews(n *news.News) (int64, error) {
|
|||
}
|
||||
return res.RowsAffected()
|
||||
}
|
||||
|
||||
func InsertTeamBySourceName(t *match.Team, l *match.League) error {
|
||||
var host, name string
|
||||
|
||||
if t.Names == nil {
|
||||
return fmt.Errorf("no source name given")
|
||||
}
|
||||
shortName := ""
|
||||
for host, name = range *t.Names {
|
||||
runeName := []rune(strings.ToUpper(name))
|
||||
shortName = string(runeName[:int(math.Min(3, float64(len(runeName))))])
|
||||
break
|
||||
}
|
||||
err := pg.psqlConn.QueryRow("SELECT id, name FROM mainapp_team WHERE names->>$1 = $2", host, name).Scan(&t.Id, &t.Name)
|
||||
if err != nil {
|
||||
cleanName := utils.Sanitize(name)
|
||||
jsonHost := fmt.Sprintf("{\"%s\"}", utils.Sanitize(host))
|
||||
return pg.psqlConn.QueryRow(`
|
||||
INSERT INTO mainapp_team
|
||||
(sport_id, country_id, name, clean_name, short_name, long_name, gender,
|
||||
names, tags, clean_tags, news_count)
|
||||
VALUES
|
||||
($1, $2, $3, $4, $5, $6, $7,
|
||||
jsonb_set('{}', $8, to_jsonb($9::text), true), $10, $11, 0)
|
||||
ON CONFLICT ON CONSTRAINT custom_unique_team DO UPDATE SET
|
||||
names = jsonb_set(mainapp_team.names, $12, to_jsonb($13::text), true)
|
||||
RETURNING id, name
|
||||
`, l.Sport.Id, l.Country.Id, name, cleanName, shortName, name, iValue(l.Gender),
|
||||
jsonHost, name, pq.Array([]string{name}), pq.Array([]string{cleanName}),
|
||||
jsonHost, name).Scan(&t.Id, &t.Name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func DeleteTeam(t *match.Team) (int64, error) {
|
||||
res, err := pg.psqlConn.Exec(`
|
||||
DELETE FROM mainapp_team
|
||||
WHERE id = $1
|
||||
`, t.Id)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return res.RowsAffected()
|
||||
}
|
||||
|
||||
func InsertMatch(m *match.Match) error {
|
||||
var arr []interface{}
|
||||
arr = append(arr, m.League.Id)
|
||||
arr = append(arr, foreignId(m.TeamHome.Id))
|
||||
arr = append(arr, foreignId(m.TeamAway.Id))
|
||||
arr = append(arr, foreignId(m.PlayerHome.Id))
|
||||
arr = append(arr, foreignId(m.PlayerAway.Id))
|
||||
arr = append(arr, sValue(m.Round))
|
||||
hash := sha256.New()
|
||||
hash.Write([]byte(concatWS(arr, "/")))
|
||||
sign := fmt.Sprintf("%x", hash.Sum(nil))
|
||||
|
||||
return pg.psqlConn.QueryRow(`
|
||||
INSERT INTO mainapp_match
|
||||
(league_id, team_home_id, team_away_id, player_home_id, player_away_id, mday, round, leg, sign,
|
||||
mday_id, base_url, start_date, error, trace)
|
||||
VALUES
|
||||
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
|
||||
ON CONFLICT ON CONSTRAINT mainapp_match_sign_key DO UPDATE SET
|
||||
base_url = $11, start_date = $12, error = $13, trace = $14
|
||||
RETURNING id
|
||||
`, m.League.Id, foreignId(m.TeamHome.Id), foreignId(m.TeamAway.Id), foreignId(m.PlayerHome.Id),
|
||||
foreignId(m.PlayerAway.Id), iValue(m.MatchDay), sValue(m.Round), iValue(m.Leg), sign,
|
||||
iValue(m.MatchDayId), sValue(m.BaseUrl), m.StartDate, sValue(m.Error), sValue(m.Trace)).Scan(&m.Id)
|
||||
}
|
||||
|
||||
func DeleteMatch(m *match.Match) (int64, error) {
|
||||
res, err := pg.psqlConn.Exec(`
|
||||
DELETE FROM mainapp_match
|
||||
WHERE id = $1
|
||||
`, m.Id)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return res.RowsAffected()
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package postgres
|
||||
|
||||
import (
|
||||
"1bet.fr/scraper/match"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -8,7 +9,12 @@ import (
|
|||
"1bet.fr/scraper/utils"
|
||||
)
|
||||
|
||||
var n *news.News
|
||||
var (
|
||||
se *news.Source
|
||||
ns *news.News
|
||||
tm *match.Team
|
||||
mh *match.Match
|
||||
)
|
||||
|
||||
func TestConnect(t *testing.T) {
|
||||
return
|
||||
|
@ -24,8 +30,20 @@ func TestListLeagues(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestUpdateLeague(t *testing.T) {
|
||||
league := &match.League{Id: 1}
|
||||
updated, err := UpdateLeague(league)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if updated != 1 {
|
||||
t.Errorf("unexpected %d updated rows", updated)
|
||||
}
|
||||
}
|
||||
|
||||
func TestListSources(t *testing.T) {
|
||||
sources, err := ListSources()
|
||||
se = sources[0]
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error : %s", err)
|
||||
}
|
||||
|
@ -38,8 +56,8 @@ func TestInsertNews(t *testing.T) {
|
|||
tags := []string{"Test", "Hello Toto"}
|
||||
cleanTags := []string{"test", "hello-toto"}
|
||||
nowTime := time.Now()
|
||||
n = &news.News{
|
||||
Source: &news.Source{Id: 1, Sport: &news.Sport{Id: 1}},
|
||||
ns = &news.News{
|
||||
Source: se,
|
||||
PubDate: &nowTime,
|
||||
Link: "https://test.com/toto",
|
||||
Title: "Hello toto",
|
||||
|
@ -47,30 +65,30 @@ func TestInsertNews(t *testing.T) {
|
|||
Tags: &tags,
|
||||
CleanTags: &cleanTags,
|
||||
}
|
||||
err := InsertNews(n)
|
||||
err := InsertNews(ns)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if n.Id == 0 {
|
||||
if ns.Id == 0 {
|
||||
t.Errorf("unexpected value 0 for n.Id")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateNews(t *testing.T) {
|
||||
content := []string{"toto", "test"}
|
||||
n.Content = &content
|
||||
n.Author = utils.StringPointer("T. Toto")
|
||||
updated, err := UpdateNews(n)
|
||||
ns.Content = &content
|
||||
ns.Author = utils.StringPointer("T. Toto")
|
||||
updated, err := UpdateNews(ns)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if updated != 1 {
|
||||
t.Errorf("unexpected %d update rows", updated)
|
||||
t.Errorf("unexpected %d updated rows", updated)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteNews(t *testing.T) {
|
||||
deleted, err := DeleteNews(n)
|
||||
deleted, err := DeleteNews(ns)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
@ -79,6 +97,60 @@ func TestDeleteNews(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestInsertTeamBySourceName(t *testing.T) {
|
||||
teamNames := map[string]string{utils.HostMatchendirect: "Toto"}
|
||||
league := &match.League{
|
||||
Id: 1,
|
||||
Sport: &match.Sport{Id: 1},
|
||||
Country: &match.Country{Id: 1},
|
||||
Gender: utils.IntPointer(match.GenderMale),
|
||||
}
|
||||
tm = &match.Team{Names: &teamNames}
|
||||
for _, _ = range []int{0, 1} {
|
||||
if err := InsertTeamBySourceName(tm, league); err != nil {
|
||||
t.Errorf("unexpected error : %s", err)
|
||||
}
|
||||
if tm.Id == 0 {
|
||||
t.Error("unexpected zero team.Id")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestInsertMatch(t *testing.T) {
|
||||
startDate := time.Now()
|
||||
mh = match.NewMatch(&match.League{Id: 1}, tm, tm, &match.Player{}, &match.Player{}, "", 0, 0)
|
||||
mh.StartDate = &startDate
|
||||
mh.BaseUrl = utils.StringPointer("https://test.com/toto")
|
||||
for _, _ = range []int{0, 1} {
|
||||
if err := InsertMatch(mh); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if mh.Id == 0 {
|
||||
t.Errorf("unexpected zero match.Id")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteMatch(t *testing.T) {
|
||||
deleted, err := DeleteMatch(mh)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if deleted != 1 {
|
||||
t.Errorf("unexpected %d matches deleted", deleted)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteTeam(t *testing.T) {
|
||||
deleted, err := DeleteTeam(tm)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if deleted != 1 {
|
||||
t.Errorf("unexpected %d teams deleted", deleted)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClose(t *testing.T) {
|
||||
Close()
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ predictions between friends.
|
|||
I decline any responsibility about your eventual usages of this project.
|
||||
|
||||
|
||||
## 2- DEPLOYMENT
|
||||
## 2- DEPLOYMENT
|
||||
|
||||
The deployment is very simple as the binary `scraper` can be used directly.
|
||||
|
||||
|
|
|
@ -2,20 +2,23 @@ package requests
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/go-redis/redis"
|
||||
"golang.org/x/net/proxy"
|
||||
)
|
||||
|
||||
const (
|
||||
torAddr = "socks5://127.0.0.1:9050"
|
||||
userAgent = "Mozilla/5.0 (X11; Linux x86_64…) Gecko/20100101 Firefox/68.0"
|
||||
redisAddr = "127.0.0.1:6379"
|
||||
torAddr = "socks5://127.0.0.1:9050"
|
||||
defaultAgent = "Mozilla/5.0 (X11; Linux x86_64…) Gecko/20100101 Firefox/68.0"
|
||||
)
|
||||
|
||||
var cli *http.Client
|
||||
var red *redis.Client
|
||||
|
||||
func init() {
|
||||
proxyUrl, err := url.Parse(torAddr)
|
||||
|
@ -34,6 +37,13 @@ func init() {
|
|||
cli = &http.Client{
|
||||
Transport: transport,
|
||||
}
|
||||
|
||||
red = redis.NewClient(&redis.Options{
|
||||
Addr: redisAddr,
|
||||
})
|
||||
if pong := red.Ping().Val(); pong != "PONG" {
|
||||
log.Fatalf("unexpected response from redis PING conmmand : %s", pong)
|
||||
}
|
||||
}
|
||||
|
||||
func GetDocumentFromURL(url string) (*goquery.Document, error) {
|
||||
|
@ -41,7 +51,11 @@ func GetDocumentFromURL(url string) (*goquery.Document, error) {
|
|||
if err != nil {
|
||||
return nil, fmt.Errorf("error while building request: %s", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", userAgent)
|
||||
agent := red.SRandMember("agents").Val()
|
||||
if agent == "" {
|
||||
agent = defaultAgent
|
||||
}
|
||||
req.Header.Set("User-Agent", agent)
|
||||
|
||||
resp, err := cli.Do(req)
|
||||
if err != nil {
|
||||
|
|
|
@ -32,7 +32,7 @@ func Sanitize(s string) (t string) {
|
|||
} else if int(c) >= int('0') && int(c) <= int('9') {
|
||||
t += string(c)
|
||||
} else if int(c) >= int('A') && int(c) <= int('Z') {
|
||||
t += string(int(c) - int('A') + int('a'))
|
||||
t += string(rune(int(c) - int('A') + int('a')))
|
||||
} else if v, ok := symbols[c]; ok {
|
||||
t += v
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue