New function updateSchedule
This commit is contained in:
parent
bd0366fdc3
commit
f07ed95702
1
go.mod
1
go.mod
|
@ -4,6 +4,7 @@ go 1.12
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/PuerkitoBio/goquery v1.6.0
|
github.com/PuerkitoBio/goquery v1.6.0
|
||||||
|
github.com/go-redis/redis v6.15.9+incompatible
|
||||||
github.com/lib/pq v1.8.0
|
github.com/lib/pq v1.8.0
|
||||||
github.com/mmcdole/gofeed v1.1.0
|
github.com/mmcdole/gofeed v1.1.0
|
||||||
golang.org/x/net v0.0.0-20201009032441-dbdefad45b89
|
golang.org/x/net v0.0.0-20201009032441-dbdefad45b89
|
||||||
|
|
2
go.sum
2
go.sum
|
@ -7,6 +7,8 @@ github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9Pq
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg=
|
||||||
|
github.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA=
|
||||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
|
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
|
||||||
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
|
|
67
main.go
67
main.go
|
@ -1,13 +1,13 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"1bet.fr/scraper/utils"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/lib/pq"
|
||||||
"log"
|
"log"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/lib/pq"
|
|
||||||
|
|
||||||
"1bet.fr/scraper/match"
|
"1bet.fr/scraper/match"
|
||||||
"1bet.fr/scraper/news"
|
"1bet.fr/scraper/news"
|
||||||
"1bet.fr/scraper/postgres"
|
"1bet.fr/scraper/postgres"
|
||||||
|
@ -84,38 +84,59 @@ func updateSchedule() {
|
||||||
}
|
}
|
||||||
|
|
||||||
waitGroup := sync.WaitGroup{}
|
waitGroup := sync.WaitGroup{}
|
||||||
sourceChannel := make(chan *match.Source)
|
leagueChannel := make(chan *match.League)
|
||||||
for i := 0; i < nbProcesses; i++ {
|
for i := 0; i < nbProcesses; i++ {
|
||||||
waitGroup.Add(1)
|
waitGroup.Add(1)
|
||||||
go func(sc chan *match.Source, wg *sync.WaitGroup) {
|
go func(lc chan *match.League, wg *sync.WaitGroup) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
for s := range sc {
|
for l := range lc {
|
||||||
matches, err := s.GetMatches()
|
sources, err := l.ListSources()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
l.Error = utils.StringPointer("list sources error")
|
||||||
|
l.Trace = utils.StringPointer(fmt.Sprint(err))
|
||||||
|
if updated, err := postgres.UpdateLeague(l); err != nil {
|
||||||
|
log.Fatalf("error while updating league : %s", err)
|
||||||
|
} else if updated != 1 {
|
||||||
|
log.Fatalf("error while updating league : %d league(s) updated", updated)
|
||||||
|
}
|
||||||
|
break
|
||||||
}
|
}
|
||||||
|
for _, s := range sources {
|
||||||
for _, m := range matches {
|
log.Printf("[+] Parsing source %s", s.URL)
|
||||||
fmt.Println(m)
|
matches, err := s.GetMatches()
|
||||||
//if err = postgres.InsertMatch(m); err != nil {
|
if err != nil {
|
||||||
// log.Fatal(err)
|
log.Printf("[-] error while getting matches from league source %s : %s", s.URL, err)
|
||||||
//}
|
s.League.Error = utils.StringPointer("league source error")
|
||||||
|
s.League.Trace = utils.StringPointer(fmt.Sprint(err))
|
||||||
|
if updated, err := postgres.UpdateLeague(s.League); err != nil {
|
||||||
|
log.Fatalf("error while updating league : %s", err)
|
||||||
|
} else if updated != 1 {
|
||||||
|
log.Fatalf("error while updating league : %d league(s) updated", updated)
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
for _, m := range matches {
|
||||||
|
if err := postgres.InsertTeamBySourceName(m.TeamHome, s.League); err != nil {
|
||||||
|
log.Fatalf("error while saving team home : %s", err)
|
||||||
|
}
|
||||||
|
if err := postgres.InsertTeamBySourceName(m.TeamAway, s.League); err != nil {
|
||||||
|
log.Fatalf("error while saving team away : %s", err)
|
||||||
|
}
|
||||||
|
if err := postgres.InsertMatch(m); err != nil {
|
||||||
|
log.Fatalf("error while saving match : %s", err)
|
||||||
|
}
|
||||||
|
log.Printf("New match #%d (%s - %s)", m.Id, m.TeamHome.Name, m.TeamAway.Name)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
}(sourceChannel, &waitGroup)
|
}(leagueChannel, &waitGroup)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, league := range leagues {
|
for _, league := range leagues {
|
||||||
sources, err := league.ListSources()
|
leagueChannel <- league
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
for _, s := range sources {
|
|
||||||
log.Printf("Adding source %s", s.Url.String())
|
|
||||||
sourceChannel <- s
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
close(leagueChannel)
|
||||||
|
waitGroup.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|
336
match/match.go
336
match/match.go
|
@ -2,6 +2,7 @@ package match
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -13,122 +14,206 @@ import (
|
||||||
"1bet.fr/scraper/utils"
|
"1bet.fr/scraper/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
_ = iota
|
||||||
|
GenderMale = iota
|
||||||
|
//GenderFemale = iota
|
||||||
|
)
|
||||||
|
|
||||||
type Sport struct {
|
type Sport struct {
|
||||||
Id int
|
Id int
|
||||||
Name string
|
Name string
|
||||||
CleanName string
|
CleanName string
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
type Country struct {
|
||||||
_ = iota
|
Id int
|
||||||
GenderMale = iota
|
}
|
||||||
GenderFemale = iota
|
|
||||||
)
|
type Player struct {
|
||||||
|
Id int
|
||||||
|
FullName string
|
||||||
|
CleanName string
|
||||||
|
Gender int
|
||||||
|
}
|
||||||
|
|
||||||
type Team struct {
|
type Team struct {
|
||||||
Id int
|
Id int
|
||||||
SportId int
|
Sport *Sport
|
||||||
CountryId int
|
Country *Country
|
||||||
|
|
||||||
Name string
|
Name string
|
||||||
CleanName string
|
CleanName string
|
||||||
|
|
||||||
ShortName string
|
ShortName *string
|
||||||
LongName string
|
LongName *string
|
||||||
Gender int
|
Gender *int
|
||||||
|
|
||||||
Names interface{}
|
Names *map[string]string
|
||||||
Url string
|
PlayersUrl *string
|
||||||
Images interface{}
|
Images *interface{}
|
||||||
|
|
||||||
Tags []string
|
Tags *[]string
|
||||||
CleanTags []string
|
CleanTags *[]string
|
||||||
NewsCount int
|
NewsCount int
|
||||||
|
|
||||||
Error string
|
Error *string
|
||||||
Trace string
|
Trace *string
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
|
||||||
_ = iota
|
|
||||||
LegFirst = iota
|
|
||||||
LegSecond = iota
|
|
||||||
LegReplay = iota
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
_ = iota
|
|
||||||
StatusFirstTime = iota
|
|
||||||
StatusHalfTime = iota
|
|
||||||
StatusSecondTime = iota
|
|
||||||
StatusFirstExtra = iota
|
|
||||||
StatusHalfExtra = iota
|
|
||||||
StatusSecondExtra = iota
|
|
||||||
StatusShootout = iota
|
|
||||||
StatusWaitScores = iota
|
|
||||||
StatusOver = iota
|
|
||||||
StatusPostponed = iota
|
|
||||||
StatusCancelled = iota
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
_ = iota
|
|
||||||
WinnerHome = iota
|
|
||||||
WinnerAway = iota
|
|
||||||
WinnerDraw = iota
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
_ = iota
|
|
||||||
ExtraTimeExtraTime = iota
|
|
||||||
ExtraTimeShootout = iota
|
|
||||||
)
|
|
||||||
|
|
||||||
type Match struct {
|
type Match struct {
|
||||||
Id int
|
Id int
|
||||||
LeagueId int
|
League *League
|
||||||
TeamHomeId int
|
TeamHome *Team
|
||||||
TeamAwayId int
|
TeamAway *Team
|
||||||
PlayerHomeId int
|
PlayerHome *Player
|
||||||
PlayerAwayId int
|
PlayerAway *Player
|
||||||
|
|
||||||
MatchDay int
|
MatchDay *int
|
||||||
MatchDayId int
|
Round *string
|
||||||
Round string
|
Leg *int
|
||||||
Leg int
|
|
||||||
|
|
||||||
BaseUrl string
|
BaseUrl *string
|
||||||
ScoreUrl string
|
ScoreUrl *string
|
||||||
LiveUrl string
|
LiveUrl *string
|
||||||
TvChannels []string
|
TvChannels *[]string
|
||||||
|
|
||||||
Status int
|
MatchDayId *int
|
||||||
Minute int
|
Status *int
|
||||||
|
Minute *string
|
||||||
StartDate *time.Time
|
StartDate *time.Time
|
||||||
EndDate *time.Time
|
EndDate *time.Time
|
||||||
HomeScore int
|
HomeScore *int
|
||||||
AwayScore int
|
AwayScore *int
|
||||||
SetsScore int
|
SetsScore *[]interface{}
|
||||||
Winner int
|
Winner *int
|
||||||
ExtraTime int
|
ExtraTime *int
|
||||||
ShootoutHome int
|
ShootoutHome *int
|
||||||
ShootoutAway int
|
ShootoutAway *int
|
||||||
|
|
||||||
Squad []interface{}
|
Squad *[]interface{}
|
||||||
Events []interface{}
|
Events *[]interface{}
|
||||||
Stats []interface{}
|
Stats *[]interface{}
|
||||||
Live []interface{}
|
Live *[]interface{}
|
||||||
LastEvent interface{}
|
LastEvent *interface{}
|
||||||
LastEventDate *time.Time
|
LastEventDate *time.Time
|
||||||
|
|
||||||
Error string
|
Error *string
|
||||||
Trace string
|
Trace *string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMatch(league *League, teamHome *Team, teamAway *Team, playerHome *Player, playerAway *Player, round string, matchDay int, leg int) *Match {
|
||||||
|
return &Match{
|
||||||
|
League: league,
|
||||||
|
TeamHome: teamHome,
|
||||||
|
TeamAway: teamAway,
|
||||||
|
PlayerHome: playerHome,
|
||||||
|
PlayerAway: playerAway,
|
||||||
|
MatchDay: utils.IntPointer(matchDay),
|
||||||
|
Round: utils.StringPointer(round),
|
||||||
|
Leg: utils.IntPointer(leg),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type source struct {
|
||||||
|
League *League
|
||||||
|
URL *url.URL
|
||||||
|
matchDay int
|
||||||
|
round string
|
||||||
|
|
||||||
|
currentDate *time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func newSource(league *League, scheme string, host string, path string, query string, round string, matchDay int) *source {
|
||||||
|
return &source{
|
||||||
|
League: league,
|
||||||
|
URL: &url.URL{
|
||||||
|
Scheme: scheme,
|
||||||
|
Host: host,
|
||||||
|
Path: path,
|
||||||
|
RawQuery: query,
|
||||||
|
},
|
||||||
|
round: round,
|
||||||
|
matchDay: matchDay,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *source) GetMatches() ([]*Match, error) {
|
||||||
|
var matches []*Match
|
||||||
|
|
||||||
|
switch s.URL.Host {
|
||||||
|
case utils.HostMatchendirect:
|
||||||
|
doc, err := requests.GetDocumentFromURL(s.URL.String())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) {
|
||||||
|
headCell := row.Find("th")
|
||||||
|
if len(headCell.Nodes) == 1 {
|
||||||
|
curDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(headCell.Text()))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("unexpected error while parsing date : %s", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.currentDate = &curDate
|
||||||
|
} else {
|
||||||
|
match := NewMatch(s.League, &Team{Gender: s.League.Gender}, &Team{Gender: s.League.Gender}, &Player{}, &Player{}, s.round, s.matchDay, 0)
|
||||||
|
|
||||||
|
startTime := strings.TrimSpace(row.Find("td.lm1").Text())
|
||||||
|
if startTime == "-- : --" {
|
||||||
|
startTime = "00:00"
|
||||||
|
}
|
||||||
|
startDate, err := time.Parse(
|
||||||
|
"2006-01-02 15:04 MST",
|
||||||
|
s.currentDate.Format("2006-01-02 ") + startTime + " CEST",
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
match.Error = utils.StringPointer("parse date error")
|
||||||
|
match.Trace = utils.StringPointer(fmt.Sprint(err))
|
||||||
|
matches = append(matches, match)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
match.StartDate = &startDate
|
||||||
|
|
||||||
|
homeNames := map[string]string{utils.HostMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq1").Text()), "*")}
|
||||||
|
match.TeamHome.Names = &homeNames
|
||||||
|
awayNames := map[string]string{utils.HostMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq2").Text()), "*")}
|
||||||
|
match.TeamAway.Names = &awayNames
|
||||||
|
|
||||||
|
basePath, ok := row.Find(".lm3 a").First().Attr("href")
|
||||||
|
if !ok {
|
||||||
|
match.Error = utils.StringPointer("unable to find href attr")
|
||||||
|
match.Trace = utils.StringPointer("unable to find href attribute for element '.lm3 a'")
|
||||||
|
matches = append(matches, match)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
baseUrl := &url.URL{Scheme: s.URL.Scheme, Host: s.URL.Host, Path: basePath}
|
||||||
|
match.BaseUrl = utils.StringPointer(baseUrl.String())
|
||||||
|
|
||||||
|
if match.Round == nil && match.MatchDay == nil && s.League.MatchDays != nil && s.League.MatchesByMatchDay != nil {
|
||||||
|
if s.League.currentMatchDayId == 0 {
|
||||||
|
s.League.currentMatchDay ++
|
||||||
|
}
|
||||||
|
curMatchDay := s.League.currentMatchDay
|
||||||
|
curMatchDayId := s.League.currentMatchDayId
|
||||||
|
match.MatchDay = &curMatchDay
|
||||||
|
match.MatchDayId = &curMatchDayId
|
||||||
|
s.League.currentMatchDayId = (s.League.currentMatchDayId + 1) % *s.League.MatchesByMatchDay
|
||||||
|
}
|
||||||
|
matches = append(matches, match)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unexpected source url %s", s.URL.String())
|
||||||
|
}
|
||||||
|
return matches, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type League struct {
|
type League struct {
|
||||||
Id int
|
Id int
|
||||||
Sport *Sport
|
Sport *Sport
|
||||||
CountryId int
|
Country *Country
|
||||||
|
|
||||||
Name string
|
Name string
|
||||||
CleanName string
|
CleanName string
|
||||||
|
@ -140,7 +225,6 @@ type League struct {
|
||||||
ChannelUrl *string
|
ChannelUrl *string
|
||||||
|
|
||||||
MatchDays *int
|
MatchDays *int
|
||||||
CurrentMatchDay *int
|
|
||||||
MatchesByMatchDay *int
|
MatchesByMatchDay *int
|
||||||
TeamCount int
|
TeamCount int
|
||||||
|
|
||||||
|
@ -158,10 +242,13 @@ type League struct {
|
||||||
|
|
||||||
Error *string
|
Error *string
|
||||||
Trace *string
|
Trace *string
|
||||||
|
|
||||||
|
currentMatchDay int
|
||||||
|
currentMatchDayId int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *League) ListSources() ([]*Source, error) {
|
func (l *League) ListSources() ([]*source, error) {
|
||||||
var sources []*Source
|
var sources []*source
|
||||||
|
|
||||||
if l.ScheduleUrl == nil {
|
if l.ScheduleUrl == nil {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
|
@ -187,19 +274,13 @@ func (l *League) ListSources() ([]*Source, error) {
|
||||||
parsedDate := strings.Split(strings.Split(value, "/")[3], "-")
|
parsedDate := strings.Split(strings.Split(value, "/")[3], "-")
|
||||||
year := utils.AtoI(parsedDate[0])
|
year := utils.AtoI(parsedDate[0])
|
||||||
week := utils.AtoI(parsedDate[1])
|
week := utils.AtoI(parsedDate[1])
|
||||||
if year >= 2020 && week >= 34 {
|
if (year == 2020 && week >= 34) || year > 2020 {
|
||||||
sources = append(sources, &Source{
|
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, value, "", "", 0))
|
||||||
League: l,
|
|
||||||
Url: &url.URL{
|
|
||||||
Scheme: originUrl.Scheme,
|
|
||||||
Host: originUrl.Host,
|
|
||||||
Path: value,
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
case utils.HostEurosport, utils.HostRugbyrama:
|
case utils.HostEurosport, utils.HostRugbyrama:
|
||||||
eurosportRegexp := regexp.MustCompile(`(\d)+e\s+Journée`)
|
curRegexp := regexp.MustCompile(`(\d)+e\s+Journée`)
|
||||||
|
|
||||||
ajaxUrl, ok := doc.Find(".ajax-container").Attr("data-ajax-url")
|
ajaxUrl, ok := doc.Find(".ajax-container").Attr("data-ajax-url")
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -212,74 +293,23 @@ func (l *League) ListSources() ([]*Source, error) {
|
||||||
ajaxQuery := ajaxParsedUrl.Query()
|
ajaxQuery := ajaxParsedUrl.Query()
|
||||||
|
|
||||||
doc.Find("#results-match-nav .rounds-dropdown__round").Each(func (i int, s *goquery.Selection) {
|
doc.Find("#results-match-nav .rounds-dropdown__round").Each(func (i int, s *goquery.Selection) {
|
||||||
var round *string
|
matchDay := 0
|
||||||
var matchDay *int
|
round, _ := s.Attr("data-label")
|
||||||
|
reMatch := curRegexp.FindStringSubmatch(round)
|
||||||
roundStr, _ := s.Attr("data-label")
|
|
||||||
reMatch := eurosportRegexp.FindStringSubmatch(roundStr)
|
|
||||||
if reMatch != nil {
|
if reMatch != nil {
|
||||||
mdayInt := utils.AtoI(reMatch[1])
|
round = ""
|
||||||
matchDay = &mdayInt
|
matchDay = utils.AtoI(reMatch[1])
|
||||||
} else {
|
|
||||||
round = &roundStr
|
|
||||||
}
|
}
|
||||||
|
|
||||||
roundId, _ := s.Attr("data-round-id")
|
roundId, _ := s.Attr("data-round-id")
|
||||||
ajaxQuery.Set("roundid", roundId)
|
ajaxQuery.Set("roundid", roundId)
|
||||||
|
sources = append(sources, newSource(l, originUrl.Scheme, originUrl.Host, ajaxParsedUrl.Path, ajaxQuery.Encode(), round, matchDay))
|
||||||
sources = append(sources, &Source{
|
|
||||||
League: l,
|
|
||||||
Url: &url.URL{
|
|
||||||
Scheme: originUrl.Scheme,
|
|
||||||
Host: originUrl.Host,
|
|
||||||
Path: ajaxParsedUrl.Path,
|
|
||||||
RawQuery: ajaxQuery.Encode(),
|
|
||||||
},
|
|
||||||
Round: round,
|
|
||||||
MatchDay: matchDay,
|
|
||||||
})
|
|
||||||
})
|
})
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unknown source url : %s", *l.ScheduleUrl)
|
return nil, fmt.Errorf("unknown source url : %s", *l.ScheduleUrl)
|
||||||
}
|
}
|
||||||
return sources, nil
|
return sources, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type Source struct {
|
|
||||||
League *League
|
|
||||||
Url *url.URL
|
|
||||||
MatchDay *int
|
|
||||||
Round *string
|
|
||||||
currentDate *time.Time
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Source) GetMatches() ([]*Match, error) {
|
|
||||||
var matches []*Match
|
|
||||||
|
|
||||||
switch s.Url.Host {
|
|
||||||
case utils.HostMatchendirect:
|
|
||||||
doc, err := requests.GetDocumentFromURL(s.Url.String())
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) {
|
|
||||||
row.Children().Each(func (j int, col *goquery.Selection) {
|
|
||||||
colspan, ok := col.Attr("colspan")
|
|
||||||
if ok && colspan == "4" {
|
|
||||||
currentDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(col.Text()))
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
s.currentDate = ¤tDate
|
|
||||||
fmt.Println(s.currentDate)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
|
||||||
case utils.HostEurosport, utils.HostRugbyrama:
|
|
||||||
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("unexpected source url %s", s.Url.String())
|
|
||||||
}
|
|
||||||
return matches, nil
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package match
|
package match
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"1bet.fr/scraper/utils"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
@ -21,8 +22,8 @@ func TestLeague_ListSources(t *testing.T) {
|
||||||
t.Errorf("no sources found")
|
t.Errorf("no sources found")
|
||||||
}
|
}
|
||||||
for _, s := range sources {
|
for _, s := range sources {
|
||||||
if !strings.HasPrefix(s.Url.String(), "http://www.matchendirect.fr/france/ligue-1/") {
|
if !strings.HasPrefix(s.URL.String(), "http://www.matchendirect.fr/france/ligue-1/") {
|
||||||
t.Errorf("unexpected source url %s", s.Url)
|
t.Errorf("unexpected source url %s", s.URL)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -39,8 +40,8 @@ func TestLeague_ListSources(t *testing.T) {
|
||||||
t.Errorf("no sources found")
|
t.Errorf("no sources found")
|
||||||
}
|
}
|
||||||
for _, s := range sources {
|
for _, s := range sources {
|
||||||
if !strings.HasPrefix(s.Url.String(), "https://www.eurosport.fr/") {
|
if !strings.HasPrefix(s.URL.String(), "https://www.eurosport.fr/") {
|
||||||
t.Errorf("unexpected source url %s", s.Url)
|
t.Errorf("unexpected source url %s", s.URL)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,17 +58,47 @@ func TestLeague_ListSources(t *testing.T) {
|
||||||
t.Errorf("no sources found")
|
t.Errorf("no sources found")
|
||||||
}
|
}
|
||||||
for _, s := range sources {
|
for _, s := range sources {
|
||||||
if !strings.HasPrefix(s.Url.String(), "https://www.rugbyrama.fr/") {
|
if !strings.HasPrefix(s.URL.String(), "https://www.rugbyrama.fr/") {
|
||||||
t.Errorf("unexpected source url %s", s.Url)
|
t.Errorf("unexpected source url %s", s.URL)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSource_GetMatches(t *testing.T) {
|
func TestSource_GetMatches(t *testing.T) {
|
||||||
sourceUrl, _ := url.Parse("https://www.matchendirect.fr/france/ligue-1/2020-37/")
|
sourceUrl, _ := url.Parse("https://www.matchendirect.fr/france/ligue-1/2020-37/")
|
||||||
source := &Source{
|
league := &League{Id: 1, MatchDays: utils.IntPointer(38), MatchesByMatchDay: utils.IntPointer(10)}
|
||||||
League: &League{Id: 1},
|
source := newSource(league, sourceUrl.Scheme, sourceUrl.Host, sourceUrl.Path, "", "", 0)
|
||||||
Url: sourceUrl,
|
matches, err := source.GetMatches()
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
for _, m := range matches {
|
||||||
|
if m.League == nil {
|
||||||
|
t.Error("unexpected nil match.League")
|
||||||
|
}
|
||||||
|
if m.StartDate == nil {
|
||||||
|
t.Error("unexpected nil match.StartDate")
|
||||||
|
}
|
||||||
|
if m.TeamHome.Names == nil {
|
||||||
|
t.Error("unexpected nil match.TeamHome.Names")
|
||||||
|
}
|
||||||
|
if m.TeamAway.Names == nil {
|
||||||
|
t.Error("unexpected nil match.TeamAway.Names")
|
||||||
|
}
|
||||||
|
if m.PlayerHome == nil {
|
||||||
|
t.Error("unexpected nil match.PlayerHome")
|
||||||
|
}
|
||||||
|
if m.PlayerAway == nil {
|
||||||
|
t.Error("unexpected nil match.PlayerAway")
|
||||||
|
}
|
||||||
|
if m.MatchDay == nil {
|
||||||
|
t.Error("unexpected nil match.MatchDay")
|
||||||
|
}
|
||||||
|
if m.MatchDayId == nil {
|
||||||
|
t.Error("unexpected nil match.MatchDayId")
|
||||||
|
}
|
||||||
|
if m.Error != nil {
|
||||||
|
t.Errorf("unexpected not nil match.Error : %s", *m.Error)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
source.GetMatches()
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -137,7 +137,8 @@ func (n *News) Feed() error {
|
||||||
n.Author = utils.StringPointer(doc.Find(".article__author a").Text())
|
n.Author = utils.StringPointer(doc.Find(".article__author a").Text())
|
||||||
|
|
||||||
default:
|
default:
|
||||||
n.Error = utils.StringPointer("unknown host " + parsedLink.Host)
|
n.Error = utils.StringPointer("unknown link host")
|
||||||
|
n.Trace = utils.StringPointer("unknown link host : " + n.Link)
|
||||||
}
|
}
|
||||||
|
|
||||||
if n.Content == nil {
|
if n.Content == nil {
|
||||||
|
|
|
@ -94,7 +94,7 @@ func TestNews_Feed(t *testing.T) {
|
||||||
t.Errorf("cleanTags does not contain football")
|
t.Errorf("cleanTags does not contain football")
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Logf("testing feed from Foot Mercato")
|
t.Logf("testing feed from Rugbyrama")
|
||||||
n = &News{
|
n = &News{
|
||||||
Source: &Source{Sport: &Sport{Name: "Rugby", CleanName: "rugby"}},
|
Source: &Source{Sport: &Sport{Name: "Rugby", CleanName: "rugby"}},
|
||||||
Link: "https://www.rugbyrama.fr/rugby/top-14/2018-2019/top-14-face-au-racing-92-toulouse-n-aura-pas-de-marge-de-manoeuvre_sto7939622/story.shtml",
|
Link: "https://www.rugbyrama.fr/rugby/top-14/2018-2019/top-14-face-au-racing-92-toulouse-n-aura-pas-de-marge-de-manoeuvre_sto7939622/story.shtml",
|
||||||
|
@ -136,10 +136,10 @@ func TestSource_ListNews(t *testing.T) {
|
||||||
}
|
}
|
||||||
for _, n := range newsList {
|
for _, n := range newsList {
|
||||||
if n.Title == "" {
|
if n.Title == "" {
|
||||||
t.Errorf("unexpected empty title")
|
t.Errorf("unexpected empty news title")
|
||||||
}
|
}
|
||||||
if n.Image == nil {
|
if n.Link == "" {
|
||||||
t.Errorf("unexpected nil image")
|
t.Errorf("unexpected empty news link")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
package postgres
|
package postgres
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"math"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/lib/pq"
|
"github.com/lib/pq"
|
||||||
|
|
||||||
|
@ -46,6 +49,25 @@ func aValue(a *[]string) interface{} {
|
||||||
return pq.Array(*a)
|
return pq.Array(*a)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func foreignId(i int) interface{} {
|
||||||
|
if i == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
|
func concatWS(a []interface{}, s string) string {
|
||||||
|
var b []string
|
||||||
|
for _, x := range a {
|
||||||
|
if x == nil {
|
||||||
|
b = append(b, "")
|
||||||
|
} else {
|
||||||
|
b = append(b, fmt.Sprint(x))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return strings.Join(b, s)
|
||||||
|
}
|
||||||
|
|
||||||
var pg *Postgres
|
var pg *Postgres
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
@ -131,9 +153,9 @@ func ListLeagues() ([]*match.League, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
league := &match.League{Sport: &match.Sport{}}
|
league := &match.League{Sport: &match.Sport{}, Country: &match.Country{}}
|
||||||
if err = rows.Scan(
|
if err = rows.Scan(
|
||||||
&league.Id, &league.Sport.Id, &league.CountryId, &league.Name, &league.CleanName, &league.Gender,
|
&league.Id, &league.Sport.Id, &league.Country.Id, &league.Name, &league.CleanName, &league.Gender,
|
||||||
&league.ScheduleUrl, &league.RankingUrl, &league.ChannelUrl,
|
&league.ScheduleUrl, &league.RankingUrl, &league.ChannelUrl,
|
||||||
&league.MatchDays, &league.MatchesByMatchDay, pq.Array(&league.Rounds), pq.Array(&league.Groups),
|
&league.MatchDays, &league.MatchesByMatchDay, pq.Array(&league.Rounds), pq.Array(&league.Groups),
|
||||||
&league.Sport.Name, &league.Sport.CleanName,
|
&league.Sport.Name, &league.Sport.CleanName,
|
||||||
|
@ -145,9 +167,22 @@ func ListLeagues() ([]*match.League, error) {
|
||||||
return leagues, nil
|
return leagues, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func UpdateLeague(l *match.League) (int64, error) {
|
||||||
|
res, err := pg.psqlConn.Exec(`
|
||||||
|
UPDATE mainapp_league
|
||||||
|
SET error = $1, trace = $2
|
||||||
|
WHERE id = $3
|
||||||
|
`, sValue(l.Error), sValue(l.Trace), l.Id,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return res.RowsAffected()
|
||||||
|
}
|
||||||
|
|
||||||
func InsertNews(n *news.News) error {
|
func InsertNews(n *news.News) error {
|
||||||
return pg.psqlConn.QueryRow(`
|
return pg.psqlConn.QueryRow(`
|
||||||
INSERT INTO public.mainapp_news
|
INSERT INTO mainapp_news
|
||||||
(title, clean_title, link, pub_date, description, image, teaser, author,
|
(title, clean_title, link, pub_date, description, image, teaser, author,
|
||||||
content, redirect, haystack, tags, clean_tags, error, trace,
|
content, redirect, haystack, tags, clean_tags, error, trace,
|
||||||
league_id, source_id, team_id)
|
league_id, source_id, team_id)
|
||||||
|
@ -165,7 +200,7 @@ func InsertNews(n *news.News) error {
|
||||||
|
|
||||||
func UpdateNews(n *news.News) (int64, error) {
|
func UpdateNews(n *news.News) (int64, error) {
|
||||||
res, err := pg.psqlConn.Exec(`
|
res, err := pg.psqlConn.Exec(`
|
||||||
UPDATE public.mainapp_news
|
UPDATE mainapp_news
|
||||||
SET title = $1, clean_title = $2, pub_date = $3, link = $4, description = $5,
|
SET title = $1, clean_title = $2, pub_date = $3, link = $4, description = $5,
|
||||||
image = $6, teaser = $7, author = $8, content = $9, redirect = $10,
|
image = $6, teaser = $7, author = $8, content = $9, redirect = $10,
|
||||||
haystack = $11, tags = $12, clean_tags = $13, error = $14, trace = $15,
|
haystack = $11, tags = $12, clean_tags = $13, error = $14, trace = $15,
|
||||||
|
@ -185,7 +220,7 @@ func UpdateNews(n *news.News) (int64, error) {
|
||||||
|
|
||||||
func DeleteNews(n *news.News) (int64, error) {
|
func DeleteNews(n *news.News) (int64, error) {
|
||||||
res, err := pg.psqlConn.Exec(`
|
res, err := pg.psqlConn.Exec(`
|
||||||
DELETE FROM public.mainapp_news
|
DELETE FROM mainapp_news
|
||||||
WHERE id = $1
|
WHERE id = $1
|
||||||
`, n.Id)
|
`, n.Id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -193,3 +228,84 @@ func DeleteNews(n *news.News) (int64, error) {
|
||||||
}
|
}
|
||||||
return res.RowsAffected()
|
return res.RowsAffected()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func InsertTeamBySourceName(t *match.Team, l *match.League) error {
|
||||||
|
var host, name string
|
||||||
|
|
||||||
|
if t.Names == nil {
|
||||||
|
return fmt.Errorf("no source name given")
|
||||||
|
}
|
||||||
|
shortName := ""
|
||||||
|
for host, name = range *t.Names {
|
||||||
|
runeName := []rune(strings.ToUpper(name))
|
||||||
|
shortName = string(runeName[:int(math.Min(3, float64(len(runeName))))])
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err := pg.psqlConn.QueryRow("SELECT id, name FROM mainapp_team WHERE names->>$1 = $2", host, name).Scan(&t.Id, &t.Name)
|
||||||
|
if err != nil {
|
||||||
|
cleanName := utils.Sanitize(name)
|
||||||
|
jsonHost := fmt.Sprintf("{\"%s\"}", utils.Sanitize(host))
|
||||||
|
return pg.psqlConn.QueryRow(`
|
||||||
|
INSERT INTO mainapp_team
|
||||||
|
(sport_id, country_id, name, clean_name, short_name, long_name, gender,
|
||||||
|
names, tags, clean_tags, news_count)
|
||||||
|
VALUES
|
||||||
|
($1, $2, $3, $4, $5, $6, $7,
|
||||||
|
jsonb_set('{}', $8, to_jsonb($9::text), true), $10, $11, 0)
|
||||||
|
ON CONFLICT ON CONSTRAINT custom_unique_team DO UPDATE SET
|
||||||
|
names = jsonb_set(mainapp_team.names, $12, to_jsonb($13::text), true)
|
||||||
|
RETURNING id, name
|
||||||
|
`, l.Sport.Id, l.Country.Id, name, cleanName, shortName, name, iValue(l.Gender),
|
||||||
|
jsonHost, name, pq.Array([]string{name}), pq.Array([]string{cleanName}),
|
||||||
|
jsonHost, name).Scan(&t.Id, &t.Name)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func DeleteTeam(t *match.Team) (int64, error) {
|
||||||
|
res, err := pg.psqlConn.Exec(`
|
||||||
|
DELETE FROM mainapp_team
|
||||||
|
WHERE id = $1
|
||||||
|
`, t.Id)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return res.RowsAffected()
|
||||||
|
}
|
||||||
|
|
||||||
|
func InsertMatch(m *match.Match) error {
|
||||||
|
var arr []interface{}
|
||||||
|
arr = append(arr, m.League.Id)
|
||||||
|
arr = append(arr, foreignId(m.TeamHome.Id))
|
||||||
|
arr = append(arr, foreignId(m.TeamAway.Id))
|
||||||
|
arr = append(arr, foreignId(m.PlayerHome.Id))
|
||||||
|
arr = append(arr, foreignId(m.PlayerAway.Id))
|
||||||
|
arr = append(arr, sValue(m.Round))
|
||||||
|
hash := sha256.New()
|
||||||
|
hash.Write([]byte(concatWS(arr, "/")))
|
||||||
|
sign := fmt.Sprintf("%x", hash.Sum(nil))
|
||||||
|
|
||||||
|
return pg.psqlConn.QueryRow(`
|
||||||
|
INSERT INTO mainapp_match
|
||||||
|
(league_id, team_home_id, team_away_id, player_home_id, player_away_id, mday, round, leg, sign,
|
||||||
|
mday_id, base_url, start_date, error, trace)
|
||||||
|
VALUES
|
||||||
|
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
|
||||||
|
ON CONFLICT ON CONSTRAINT mainapp_match_sign_key DO UPDATE SET
|
||||||
|
base_url = $11, start_date = $12, error = $13, trace = $14
|
||||||
|
RETURNING id
|
||||||
|
`, m.League.Id, foreignId(m.TeamHome.Id), foreignId(m.TeamAway.Id), foreignId(m.PlayerHome.Id),
|
||||||
|
foreignId(m.PlayerAway.Id), iValue(m.MatchDay), sValue(m.Round), iValue(m.Leg), sign,
|
||||||
|
iValue(m.MatchDayId), sValue(m.BaseUrl), m.StartDate, sValue(m.Error), sValue(m.Trace)).Scan(&m.Id)
|
||||||
|
}
|
||||||
|
|
||||||
|
func DeleteMatch(m *match.Match) (int64, error) {
|
||||||
|
res, err := pg.psqlConn.Exec(`
|
||||||
|
DELETE FROM mainapp_match
|
||||||
|
WHERE id = $1
|
||||||
|
`, m.Id)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return res.RowsAffected()
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
package postgres
|
package postgres
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"1bet.fr/scraper/match"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -8,7 +9,12 @@ import (
|
||||||
"1bet.fr/scraper/utils"
|
"1bet.fr/scraper/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
var n *news.News
|
var (
|
||||||
|
se *news.Source
|
||||||
|
ns *news.News
|
||||||
|
tm *match.Team
|
||||||
|
mh *match.Match
|
||||||
|
)
|
||||||
|
|
||||||
func TestConnect(t *testing.T) {
|
func TestConnect(t *testing.T) {
|
||||||
return
|
return
|
||||||
|
@ -24,8 +30,20 @@ func TestListLeagues(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestUpdateLeague(t *testing.T) {
|
||||||
|
league := &match.League{Id: 1}
|
||||||
|
updated, err := UpdateLeague(league)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if updated != 1 {
|
||||||
|
t.Errorf("unexpected %d updated rows", updated)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestListSources(t *testing.T) {
|
func TestListSources(t *testing.T) {
|
||||||
sources, err := ListSources()
|
sources, err := ListSources()
|
||||||
|
se = sources[0]
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("unexpected error : %s", err)
|
t.Errorf("unexpected error : %s", err)
|
||||||
}
|
}
|
||||||
|
@ -38,8 +56,8 @@ func TestInsertNews(t *testing.T) {
|
||||||
tags := []string{"Test", "Hello Toto"}
|
tags := []string{"Test", "Hello Toto"}
|
||||||
cleanTags := []string{"test", "hello-toto"}
|
cleanTags := []string{"test", "hello-toto"}
|
||||||
nowTime := time.Now()
|
nowTime := time.Now()
|
||||||
n = &news.News{
|
ns = &news.News{
|
||||||
Source: &news.Source{Id: 1, Sport: &news.Sport{Id: 1}},
|
Source: se,
|
||||||
PubDate: &nowTime,
|
PubDate: &nowTime,
|
||||||
Link: "https://test.com/toto",
|
Link: "https://test.com/toto",
|
||||||
Title: "Hello toto",
|
Title: "Hello toto",
|
||||||
|
@ -47,30 +65,30 @@ func TestInsertNews(t *testing.T) {
|
||||||
Tags: &tags,
|
Tags: &tags,
|
||||||
CleanTags: &cleanTags,
|
CleanTags: &cleanTags,
|
||||||
}
|
}
|
||||||
err := InsertNews(n)
|
err := InsertNews(ns)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
if n.Id == 0 {
|
if ns.Id == 0 {
|
||||||
t.Errorf("unexpected value 0 for n.Id")
|
t.Errorf("unexpected value 0 for n.Id")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUpdateNews(t *testing.T) {
|
func TestUpdateNews(t *testing.T) {
|
||||||
content := []string{"toto", "test"}
|
content := []string{"toto", "test"}
|
||||||
n.Content = &content
|
ns.Content = &content
|
||||||
n.Author = utils.StringPointer("T. Toto")
|
ns.Author = utils.StringPointer("T. Toto")
|
||||||
updated, err := UpdateNews(n)
|
updated, err := UpdateNews(ns)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
if updated != 1 {
|
if updated != 1 {
|
||||||
t.Errorf("unexpected %d update rows", updated)
|
t.Errorf("unexpected %d updated rows", updated)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDeleteNews(t *testing.T) {
|
func TestDeleteNews(t *testing.T) {
|
||||||
deleted, err := DeleteNews(n)
|
deleted, err := DeleteNews(ns)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
|
@ -79,6 +97,60 @@ func TestDeleteNews(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestInsertTeamBySourceName(t *testing.T) {
|
||||||
|
teamNames := map[string]string{utils.HostMatchendirect: "Toto"}
|
||||||
|
league := &match.League{
|
||||||
|
Id: 1,
|
||||||
|
Sport: &match.Sport{Id: 1},
|
||||||
|
Country: &match.Country{Id: 1},
|
||||||
|
Gender: utils.IntPointer(match.GenderMale),
|
||||||
|
}
|
||||||
|
tm = &match.Team{Names: &teamNames}
|
||||||
|
for _, _ = range []int{0, 1} {
|
||||||
|
if err := InsertTeamBySourceName(tm, league); err != nil {
|
||||||
|
t.Errorf("unexpected error : %s", err)
|
||||||
|
}
|
||||||
|
if tm.Id == 0 {
|
||||||
|
t.Error("unexpected zero team.Id")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInsertMatch(t *testing.T) {
|
||||||
|
startDate := time.Now()
|
||||||
|
mh = match.NewMatch(&match.League{Id: 1}, tm, tm, &match.Player{}, &match.Player{}, "", 0, 0)
|
||||||
|
mh.StartDate = &startDate
|
||||||
|
mh.BaseUrl = utils.StringPointer("https://test.com/toto")
|
||||||
|
for _, _ = range []int{0, 1} {
|
||||||
|
if err := InsertMatch(mh); err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if mh.Id == 0 {
|
||||||
|
t.Errorf("unexpected zero match.Id")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeleteMatch(t *testing.T) {
|
||||||
|
deleted, err := DeleteMatch(mh)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if deleted != 1 {
|
||||||
|
t.Errorf("unexpected %d matches deleted", deleted)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeleteTeam(t *testing.T) {
|
||||||
|
deleted, err := DeleteTeam(tm)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if deleted != 1 {
|
||||||
|
t.Errorf("unexpected %d teams deleted", deleted)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestClose(t *testing.T) {
|
func TestClose(t *testing.T) {
|
||||||
Close()
|
Close()
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,7 +32,7 @@ predictions between friends.
|
||||||
I decline any responsibility about your eventual usages of this project.
|
I decline any responsibility about your eventual usages of this project.
|
||||||
|
|
||||||
|
|
||||||
## 2- DEPLOYMENT
|
## 2- DEPLOYMENT
|
||||||
|
|
||||||
The deployment is very simple as the binary `scraper` can be used directly.
|
The deployment is very simple as the binary `scraper` can be used directly.
|
||||||
|
|
||||||
|
|
|
@ -2,20 +2,23 @@ package requests
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/PuerkitoBio/goquery"
|
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
"github.com/go-redis/redis"
|
||||||
"golang.org/x/net/proxy"
|
"golang.org/x/net/proxy"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
torAddr = "socks5://127.0.0.1:9050"
|
redisAddr = "127.0.0.1:6379"
|
||||||
userAgent = "Mozilla/5.0 (X11; Linux x86_64…) Gecko/20100101 Firefox/68.0"
|
torAddr = "socks5://127.0.0.1:9050"
|
||||||
|
defaultAgent = "Mozilla/5.0 (X11; Linux x86_64…) Gecko/20100101 Firefox/68.0"
|
||||||
)
|
)
|
||||||
|
|
||||||
var cli *http.Client
|
var cli *http.Client
|
||||||
|
var red *redis.Client
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
proxyUrl, err := url.Parse(torAddr)
|
proxyUrl, err := url.Parse(torAddr)
|
||||||
|
@ -34,6 +37,13 @@ func init() {
|
||||||
cli = &http.Client{
|
cli = &http.Client{
|
||||||
Transport: transport,
|
Transport: transport,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
red = redis.NewClient(&redis.Options{
|
||||||
|
Addr: redisAddr,
|
||||||
|
})
|
||||||
|
if pong := red.Ping().Val(); pong != "PONG" {
|
||||||
|
log.Fatalf("unexpected response from redis PING conmmand : %s", pong)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetDocumentFromURL(url string) (*goquery.Document, error) {
|
func GetDocumentFromURL(url string) (*goquery.Document, error) {
|
||||||
|
@ -41,7 +51,11 @@ func GetDocumentFromURL(url string) (*goquery.Document, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error while building request: %s", err)
|
return nil, fmt.Errorf("error while building request: %s", err)
|
||||||
}
|
}
|
||||||
req.Header.Set("User-Agent", userAgent)
|
agent := red.SRandMember("agents").Val()
|
||||||
|
if agent == "" {
|
||||||
|
agent = defaultAgent
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", agent)
|
||||||
|
|
||||||
resp, err := cli.Do(req)
|
resp, err := cli.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -32,7 +32,7 @@ func Sanitize(s string) (t string) {
|
||||||
} else if int(c) >= int('0') && int(c) <= int('9') {
|
} else if int(c) >= int('0') && int(c) <= int('9') {
|
||||||
t += string(c)
|
t += string(c)
|
||||||
} else if int(c) >= int('A') && int(c) <= int('Z') {
|
} else if int(c) >= int('A') && int(c) <= int('Z') {
|
||||||
t += string(int(c) - int('A') + int('a'))
|
t += string(rune(int(c) - int('A') + int('a')))
|
||||||
} else if v, ok := symbols[c]; ok {
|
} else if v, ok := symbols[c]; ok {
|
||||||
t += v
|
t += v
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue