Improve updateNews function
Start developping updateSchedule function Use of go modules
This commit is contained in:
parent
d87bedc8d6
commit
98d3cbe7e2
|
@ -0,0 +1,10 @@
|
||||||
|
module 1bet.fr/scraper
|
||||||
|
|
||||||
|
go 1.12
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/PuerkitoBio/goquery v1.6.0
|
||||||
|
github.com/lib/pq v1.8.0
|
||||||
|
github.com/mmcdole/gofeed v1.1.0
|
||||||
|
golang.org/x/net v0.0.0-20201009032441-dbdefad45b89
|
||||||
|
)
|
|
@ -0,0 +1,47 @@
|
||||||
|
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||||
|
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
||||||
|
github.com/PuerkitoBio/goquery v1.6.0 h1:j7taAbelrdcsOlGeMenZxc2AWXD5fieT1/znArdnx94=
|
||||||
|
github.com/PuerkitoBio/goquery v1.6.0/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
||||||
|
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
||||||
|
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||||
|
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
|
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
|
||||||
|
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
|
github.com/lib/pq v1.8.0 h1:9xohqzkUwzR4Ga4ivdTcawVS89YSDVxXMa3xJX3cGzg=
|
||||||
|
github.com/lib/pq v1.8.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||||
|
github.com/mmcdole/gofeed v1.1.0 h1:T2WrGLVJRV04PY2qwhEJLHCt9JiCtBhb6SmC8ZvJH08=
|
||||||
|
github.com/mmcdole/gofeed v1.1.0/go.mod h1:PPiVwgDXLlz2N83KB4TrIim2lyYM5Zn7ZWH9Pi4oHUk=
|
||||||
|
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
|
||||||
|
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
|
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
|
||||||
|
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
|
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
|
github.com/urfave/cli v1.22.3/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
|
||||||
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||||
|
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
|
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||||
|
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
|
||||||
|
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
|
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
|
golang.org/x/net v0.0.0-20201009032441-dbdefad45b89 h1:1GKfLldebiSdhTlt3nalwrb7L40Tixr/0IH+kSbRgmk=
|
||||||
|
golang.org/x/net v0.0.0-20201009032441-dbdefad45b89/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||||
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||||
|
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
|
||||||
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
135
main.go
135
main.go
|
@ -1,12 +1,16 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/lib/pq"
|
"flag"
|
||||||
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"./news"
|
"github.com/lib/pq"
|
||||||
"./postgres"
|
|
||||||
|
"1bet.fr/scraper/match"
|
||||||
|
"1bet.fr/scraper/news"
|
||||||
|
"1bet.fr/scraper/postgres"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -17,18 +21,9 @@ const (
|
||||||
func updateNews() {
|
func updateNews() {
|
||||||
defer postgres.Close()
|
defer postgres.Close()
|
||||||
|
|
||||||
sports, err := postgres.ListSports()
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
mapSports := map[int]*news.Sport{}
|
|
||||||
for _, sport := range sports {
|
|
||||||
mapSports[sport.Id] = sport
|
|
||||||
}
|
|
||||||
|
|
||||||
sources, err := postgres.ListSources()
|
sources, err := postgres.ListSources()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatalf("error while getting list of sources : %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
waitGroup := sync.WaitGroup{}
|
waitGroup := sync.WaitGroup{}
|
||||||
|
@ -39,49 +34,103 @@ func updateNews() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
for n := range nc {
|
for n := range nc {
|
||||||
if err := n.Feed(); err != nil {
|
if err := n.Feed(); err != nil {
|
||||||
log.Fatal(err)
|
log.Fatalf("error while feeding news : %s", err)
|
||||||
}
|
}
|
||||||
if err := postgres.UpdateNews(n); err != nil {
|
if _, err := postgres.UpdateNews(n); err != nil {
|
||||||
log.Fatal(err)
|
log.Fatalf("error while update news in postgres : %s", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}(newsChannel, &waitGroup)
|
}(newsChannel, &waitGroup)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, source := range sources {
|
for _, source := range sources {
|
||||||
for sportId, url := range source.Urls {
|
log.Printf("[+] Starting parse of source : %s", source.FeedUrl)
|
||||||
log.Printf("[+] Starting parse of source : %s", url)
|
newsList, err := source.ListNews()
|
||||||
newsList, err := source.ListNews(mapSports[sportId], url)
|
if err != nil {
|
||||||
if err != nil {
|
log.Fatal(err)
|
||||||
log.Fatal(err)
|
}
|
||||||
}
|
|
||||||
|
|
||||||
for i, n := range newsList {
|
for i, n := range newsList {
|
||||||
if i >= maxNewsPerSource {
|
if i >= maxNewsPerSource {
|
||||||
log.Printf("Stopping parse of source with %d news added", i)
|
log.Printf("Stopping parse of source with %d news added", i)
|
||||||
break
|
break
|
||||||
}
|
|
||||||
if err := postgres.InsertNews(n); err != nil {
|
|
||||||
if err, ok := err.(*pq.Error); ok {
|
|
||||||
if err.Code.Name() == "unique_violation" {
|
|
||||||
log.Printf("Stopping parse of source (unique violation) with %d news added", i)
|
|
||||||
break
|
|
||||||
} else {
|
|
||||||
log.Fatalf("error while getting pq.Error object")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
log.Printf("Adding news %s", n.Link)
|
|
||||||
newsChannel <- n
|
|
||||||
}
|
}
|
||||||
|
if err := postgres.InsertNews(n); err != nil {
|
||||||
|
if err, ok := err.(*pq.Error); ok {
|
||||||
|
if err.Code.Name() == "unique_violation" {
|
||||||
|
log.Printf("Stopping parse of source (unique violation) with %d news added", i)
|
||||||
|
break
|
||||||
|
} else {
|
||||||
|
log.Fatalf("error while inserting news in postgres : %s", err.Message)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.Fatalf("error while getting pq.Error object")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Printf("Adding news %s", n.Link)
|
||||||
|
newsChannel <- n
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
close(newsChannel)
|
close(newsChannel)
|
||||||
waitGroup.Wait()
|
waitGroup.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func updateSchedule() {
|
||||||
updateNews()
|
defer postgres.Close()
|
||||||
|
|
||||||
|
leagues, err := postgres.ListLeagues()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
waitGroup := sync.WaitGroup{}
|
||||||
|
sourceChannel := make(chan *match.Source)
|
||||||
|
for i := 0; i < nbProcesses; i++ {
|
||||||
|
waitGroup.Add(1)
|
||||||
|
go func(sc chan *match.Source, wg *sync.WaitGroup) {
|
||||||
|
defer wg.Done()
|
||||||
|
for s := range sc {
|
||||||
|
matches, err := s.GetMatches()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, m := range matches {
|
||||||
|
fmt.Println(m)
|
||||||
|
//if err = postgres.InsertMatch(m); err != nil {
|
||||||
|
// log.Fatal(err)
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}(sourceChannel, &waitGroup)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, league := range leagues {
|
||||||
|
sources, err := league.ListSources()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
for _, s := range sources {
|
||||||
|
log.Printf("Adding source %s", s.Url.String())
|
||||||
|
sourceChannel <- s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
args := flag.Args()
|
||||||
|
if len(args) != 1 {
|
||||||
|
log.Fatalf("unexpected number of args : len(%s) != 1", args)
|
||||||
|
}
|
||||||
|
|
||||||
|
switch args[0] {
|
||||||
|
case "news":
|
||||||
|
updateNews()
|
||||||
|
case "schedule":
|
||||||
|
updateSchedule()
|
||||||
|
default:
|
||||||
|
log.Fatalf("unexpected arg : %s", args[0])
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,285 @@
|
||||||
|
package match
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
|
||||||
|
"1bet.fr/scraper/requests"
|
||||||
|
"1bet.fr/scraper/utils"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Sport struct {
|
||||||
|
Id int
|
||||||
|
Name string
|
||||||
|
CleanName string
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
_ = iota
|
||||||
|
GenderMale = iota
|
||||||
|
GenderFemale = iota
|
||||||
|
)
|
||||||
|
|
||||||
|
type Team struct {
|
||||||
|
Id int
|
||||||
|
SportId int
|
||||||
|
CountryId int
|
||||||
|
|
||||||
|
Name string
|
||||||
|
CleanName string
|
||||||
|
|
||||||
|
ShortName string
|
||||||
|
LongName string
|
||||||
|
Gender int
|
||||||
|
|
||||||
|
Names interface{}
|
||||||
|
Url string
|
||||||
|
Images interface{}
|
||||||
|
|
||||||
|
Tags []string
|
||||||
|
CleanTags []string
|
||||||
|
NewsCount int
|
||||||
|
|
||||||
|
Error string
|
||||||
|
Trace string
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
_ = iota
|
||||||
|
LegFirst = iota
|
||||||
|
LegSecond = iota
|
||||||
|
LegReplay = iota
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
_ = iota
|
||||||
|
StatusFirstTime = iota
|
||||||
|
StatusHalfTime = iota
|
||||||
|
StatusSecondTime = iota
|
||||||
|
StatusFirstExtra = iota
|
||||||
|
StatusHalfExtra = iota
|
||||||
|
StatusSecondExtra = iota
|
||||||
|
StatusShootout = iota
|
||||||
|
StatusWaitScores = iota
|
||||||
|
StatusOver = iota
|
||||||
|
StatusPostponed = iota
|
||||||
|
StatusCancelled = iota
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
_ = iota
|
||||||
|
WinnerHome = iota
|
||||||
|
WinnerAway = iota
|
||||||
|
WinnerDraw = iota
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
_ = iota
|
||||||
|
ExtraTimeExtraTime = iota
|
||||||
|
ExtraTimeShootout = iota
|
||||||
|
)
|
||||||
|
|
||||||
|
type Match struct {
|
||||||
|
Id int
|
||||||
|
LeagueId int
|
||||||
|
TeamHomeId int
|
||||||
|
TeamAwayId int
|
||||||
|
PlayerHomeId int
|
||||||
|
PlayerAwayId int
|
||||||
|
|
||||||
|
MatchDay int
|
||||||
|
MatchDayId int
|
||||||
|
Round string
|
||||||
|
Leg int
|
||||||
|
|
||||||
|
BaseUrl string
|
||||||
|
ScoreUrl string
|
||||||
|
LiveUrl string
|
||||||
|
TvChannels []string
|
||||||
|
|
||||||
|
Status int
|
||||||
|
Minute int
|
||||||
|
StartDate *time.Time
|
||||||
|
EndDate *time.Time
|
||||||
|
HomeScore int
|
||||||
|
AwayScore int
|
||||||
|
SetsScore int
|
||||||
|
Winner int
|
||||||
|
ExtraTime int
|
||||||
|
ShootoutHome int
|
||||||
|
ShootoutAway int
|
||||||
|
|
||||||
|
Squad []interface{}
|
||||||
|
Events []interface{}
|
||||||
|
Stats []interface{}
|
||||||
|
Live []interface{}
|
||||||
|
LastEvent interface{}
|
||||||
|
LastEventDate *time.Time
|
||||||
|
|
||||||
|
Error string
|
||||||
|
Trace string
|
||||||
|
}
|
||||||
|
|
||||||
|
type League struct {
|
||||||
|
Id int
|
||||||
|
Sport *Sport
|
||||||
|
CountryId int
|
||||||
|
|
||||||
|
Name string
|
||||||
|
CleanName string
|
||||||
|
Gender *int
|
||||||
|
Degree *int
|
||||||
|
|
||||||
|
ScheduleUrl *string
|
||||||
|
RankingUrl *string
|
||||||
|
ChannelUrl *string
|
||||||
|
|
||||||
|
MatchDays *int
|
||||||
|
CurrentMatchDay *int
|
||||||
|
MatchesByMatchDay *int
|
||||||
|
TeamCount int
|
||||||
|
|
||||||
|
Rounds []string
|
||||||
|
Groups []string
|
||||||
|
Points interface{}
|
||||||
|
Promotions interface{}
|
||||||
|
|
||||||
|
Images interface{}
|
||||||
|
Schedule interface{}
|
||||||
|
|
||||||
|
Tags []string
|
||||||
|
CleanTags []string
|
||||||
|
NewsCount int
|
||||||
|
|
||||||
|
Error *string
|
||||||
|
Trace *string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *League) ListSources() ([]*Source, error) {
|
||||||
|
var sources []*Source
|
||||||
|
|
||||||
|
if l.ScheduleUrl == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
doc, err := requests.GetDocumentFromURL(*l.ScheduleUrl)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
originUrl, err := url.Parse(*l.ScheduleUrl)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch originUrl.Host {
|
||||||
|
case utils.HostMatchendirect:
|
||||||
|
doc.Find(".fDate option").Each(func (i int, s *goquery.Selection) {
|
||||||
|
value, ok := s.Attr("value")
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
parsedDate := strings.Split(strings.Split(value, "/")[3], "-")
|
||||||
|
year := utils.AtoI(parsedDate[0])
|
||||||
|
week := utils.AtoI(parsedDate[1])
|
||||||
|
if year >= 2020 && week >= 34 {
|
||||||
|
sources = append(sources, &Source{
|
||||||
|
League: l,
|
||||||
|
Url: &url.URL{
|
||||||
|
Scheme: originUrl.Scheme,
|
||||||
|
Host: originUrl.Host,
|
||||||
|
Path: value,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
case utils.HostEurosport, utils.HostRugbyrama:
|
||||||
|
eurosportRegexp := regexp.MustCompile(`(\d)+e\s+Journée`)
|
||||||
|
|
||||||
|
ajaxUrl, ok := doc.Find(".ajax-container").Attr("data-ajax-url")
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("ajax-container url not found")
|
||||||
|
}
|
||||||
|
ajaxParsedUrl, err := url.Parse(ajaxUrl)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
ajaxQuery := ajaxParsedUrl.Query()
|
||||||
|
|
||||||
|
doc.Find("#results-match-nav .rounds-dropdown__round").Each(func (i int, s *goquery.Selection) {
|
||||||
|
var round *string
|
||||||
|
var matchDay *int
|
||||||
|
|
||||||
|
roundStr, _ := s.Attr("data-label")
|
||||||
|
reMatch := eurosportRegexp.FindStringSubmatch(roundStr)
|
||||||
|
if reMatch != nil {
|
||||||
|
mdayInt := utils.AtoI(reMatch[1])
|
||||||
|
matchDay = &mdayInt
|
||||||
|
} else {
|
||||||
|
round = &roundStr
|
||||||
|
}
|
||||||
|
|
||||||
|
roundId, _ := s.Attr("data-round-id")
|
||||||
|
ajaxQuery.Set("roundid", roundId)
|
||||||
|
|
||||||
|
sources = append(sources, &Source{
|
||||||
|
League: l,
|
||||||
|
Url: &url.URL{
|
||||||
|
Scheme: originUrl.Scheme,
|
||||||
|
Host: originUrl.Host,
|
||||||
|
Path: ajaxParsedUrl.Path,
|
||||||
|
RawQuery: ajaxQuery.Encode(),
|
||||||
|
},
|
||||||
|
Round: round,
|
||||||
|
MatchDay: matchDay,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unknown source url : %s", *l.ScheduleUrl)
|
||||||
|
}
|
||||||
|
return sources, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type Source struct {
|
||||||
|
League *League
|
||||||
|
Url *url.URL
|
||||||
|
MatchDay *int
|
||||||
|
Round *string
|
||||||
|
currentDate *time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Source) GetMatches() ([]*Match, error) {
|
||||||
|
var matches []*Match
|
||||||
|
|
||||||
|
switch s.Url.Host {
|
||||||
|
case utils.HostMatchendirect:
|
||||||
|
doc, err := requests.GetDocumentFromURL(s.Url.String())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
doc.Find("#livescore tr").Each(func (i int, row *goquery.Selection) {
|
||||||
|
row.Children().Each(func (j int, col *goquery.Selection) {
|
||||||
|
colspan, ok := col.Attr("colspan")
|
||||||
|
if ok && colspan == "4" {
|
||||||
|
currentDate, err := time.Parse("Monday 02 January 2006", utils.EnglishDateString(col.Text()))
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.currentDate = ¤tDate
|
||||||
|
fmt.Println(s.currentDate)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
case utils.HostEurosport, utils.HostRugbyrama:
|
||||||
|
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unexpected source url %s", s.Url.String())
|
||||||
|
}
|
||||||
|
return matches, nil
|
||||||
|
}
|
|
@ -0,0 +1,73 @@
|
||||||
|
package match
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLeague_ListSources(t *testing.T) {
|
||||||
|
t.Log("Testing matchendirect.fr sources...")
|
||||||
|
scheduleUrl := "http://www.matchendirect.fr/france/ligue-1/"
|
||||||
|
league := League{
|
||||||
|
ScheduleUrl: &scheduleUrl,
|
||||||
|
}
|
||||||
|
sources, err := league.ListSources()
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if len(sources) == 0 {
|
||||||
|
t.Errorf("no sources found")
|
||||||
|
}
|
||||||
|
for _, s := range sources {
|
||||||
|
if !strings.HasPrefix(s.Url.String(), "http://www.matchendirect.fr/france/ligue-1/") {
|
||||||
|
t.Errorf("unexpected source url %s", s.Url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Log("Testing eurosport.fr sources...")
|
||||||
|
scheduleUrl = "https://www.eurosport.fr/tennis/open-d-australie-messieurs/2020/standing.shtml"
|
||||||
|
league = League{
|
||||||
|
ScheduleUrl: &scheduleUrl,
|
||||||
|
}
|
||||||
|
sources, err = league.ListSources()
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if len(sources) == 0 {
|
||||||
|
t.Errorf("no sources found")
|
||||||
|
}
|
||||||
|
for _, s := range sources {
|
||||||
|
if !strings.HasPrefix(s.Url.String(), "https://www.eurosport.fr/") {
|
||||||
|
t.Errorf("unexpected source url %s", s.Url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Log("Testing rugbyrama.fr sources...")
|
||||||
|
scheduleUrl = "https://www.rugbyrama.fr/rugby/top-14/calendar-result.shtml"
|
||||||
|
league = League{
|
||||||
|
ScheduleUrl: &scheduleUrl,
|
||||||
|
}
|
||||||
|
sources, err = league.ListSources()
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if len(sources) == 0 {
|
||||||
|
t.Errorf("no sources found")
|
||||||
|
}
|
||||||
|
for _, s := range sources {
|
||||||
|
if !strings.HasPrefix(s.Url.String(), "https://www.rugbyrama.fr/") {
|
||||||
|
t.Errorf("unexpected source url %s", s.Url)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSource_GetMatches(t *testing.T) {
|
||||||
|
sourceUrl, _ := url.Parse("https://www.matchendirect.fr/france/ligue-1/2020-37/")
|
||||||
|
source := &Source{
|
||||||
|
League: &League{Id: 1},
|
||||||
|
Url: sourceUrl,
|
||||||
|
}
|
||||||
|
source.GetMatches()
|
||||||
|
}
|
151
news/news.go
151
news/news.go
|
@ -2,16 +2,16 @@ package news
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
|
||||||
"github.com/mmcdole/gofeed"
|
"github.com/mmcdole/gofeed"
|
||||||
|
|
||||||
"../requests"
|
"1bet.fr/scraper/requests"
|
||||||
"../utils"
|
"1bet.fr/scraper/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Sport struct {
|
type Sport struct {
|
||||||
|
@ -22,9 +22,10 @@ type Sport struct {
|
||||||
|
|
||||||
type Source struct {
|
type Source struct {
|
||||||
Id int
|
Id int
|
||||||
|
Sport *Sport
|
||||||
Name string
|
Name string
|
||||||
CleanName string
|
CleanName string
|
||||||
Urls map[int]string
|
FeedUrl string
|
||||||
|
|
||||||
Error *string
|
Error *string
|
||||||
Trace *string
|
Trace *string
|
||||||
|
@ -33,105 +34,128 @@ type Source struct {
|
||||||
type News struct {
|
type News struct {
|
||||||
Id int
|
Id int
|
||||||
Source *Source
|
Source *Source
|
||||||
Sport *Sport
|
LeagueId *int
|
||||||
LeagueId int
|
TeamId *int
|
||||||
TeamId int
|
|
||||||
|
|
||||||
Title string
|
Title string
|
||||||
CleanTitle string
|
CleanTitle string
|
||||||
PubDate *time.Time
|
|
||||||
Description string
|
|
||||||
Link string
|
Link string
|
||||||
Image string
|
PubDate *time.Time
|
||||||
|
Description *string
|
||||||
|
Image *string
|
||||||
|
|
||||||
Teaser string
|
Teaser *string
|
||||||
Author string
|
Author *string
|
||||||
Content []string
|
Content *[]string
|
||||||
Redirect string
|
Redirect *string
|
||||||
|
|
||||||
Haystack string
|
Haystack *string
|
||||||
Tags []string
|
Tags *[]string
|
||||||
CleanTags []string
|
CleanTags *[]string
|
||||||
|
|
||||||
Error string
|
Error *string
|
||||||
Trace string
|
Trace *string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *News) Feed() error {
|
func (n *News) Feed() error {
|
||||||
|
parsedLink, err := url.Parse(n.Link)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
doc, err := requests.GetDocumentFromURL(n.Link)
|
doc, err := requests.GetDocumentFromURL(n.Link)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
switch n.Source.Name {
|
switch parsedLink.Host {
|
||||||
case "Eurosport":
|
case utils.HostEurosport:
|
||||||
n.Teaser = strings.TrimSpace(doc.Find("h2").Text())
|
n.Teaser = utils.StringPointer(doc.Find("h2").First().Text())
|
||||||
doc.Find(".article-body .article-s4-rs p").Each(func(i int, s *goquery.Selection) {
|
doc.Find(".article-body .article-s4-rs p").Each(func(i int, s *goquery.Selection) {
|
||||||
n.Content = append(n.Content, s.Text())
|
n.Content = utils.ArrayPointerAppend(n.Content, s.Text())
|
||||||
})
|
})
|
||||||
n.Author = strings.TrimSpace(doc.Find(".flex a.caption-s5-fx div.font-bold").Text())
|
n.Author = utils.StringPointer(doc.Find(".flex a.caption-s5-fx div.font-bold").Text())
|
||||||
doc.Find(".related-topics .atom-tag").Each(func(i int, s *goquery.Selection) {
|
doc.Find(".related-topics .atom-tag").Each(func(i int, s *goquery.Selection) {
|
||||||
tag := strings.TrimSpace(s.Text())
|
tag := strings.TrimSpace(s.Text())
|
||||||
cleanTag := utils.Sanitize(tag)
|
cleanTag := utils.Sanitize(tag)
|
||||||
if !utils.ArrayContains(n.CleanTags, cleanTag) {
|
if !utils.ArrayPointerContains(n.CleanTags, cleanTag) {
|
||||||
n.Tags = append(n.Tags, tag)
|
n.Tags = utils.ArrayPointerAppend(n.Tags, tag)
|
||||||
n.CleanTags = append(n.CleanTags, cleanTag)
|
n.CleanTags = utils.ArrayPointerAppend(n.CleanTags, cleanTag)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
case "L'équipe":
|
|
||||||
n.Teaser = strings.TrimSpace(doc.Find("h2.Article__chapo").Text())
|
case utils.HostRugbyrama:
|
||||||
doc.Find(".Paragraph__content").Each(func(i int, s *goquery.Selection) {
|
n.Teaser = utils.StringPointer(doc.Find("h2.storyfull__teaser").Text())
|
||||||
n.Content = append(n.Content, s.Text())
|
doc.Find(".storyfull__paragraphs p.storyfull__paragraph").Each(func(i int, s *goquery.Selection) {
|
||||||
|
n.Content = utils.ArrayPointerAppend(n.Content, s.Text())
|
||||||
})
|
})
|
||||||
n.Author = strings.TrimSpace(doc.Find(".Author__name").Text())
|
n.Author = utils.StringPointer(strings.Replace(doc.Find(".storyfull__publisher-author-name").Text(), "Par ", "", 1))
|
||||||
|
doc.Find(".storyfull__linkentities-infos a.storyfull__linkentities-name").Each(func(i int, s *goquery.Selection) {
|
||||||
|
tag := strings.TrimSpace(s.Text())
|
||||||
|
cleanTag := utils.Sanitize(tag)
|
||||||
|
if !utils.ArrayPointerContains(n.CleanTags, cleanTag) {
|
||||||
|
n.Tags = utils.ArrayPointerAppend(n.Tags, tag)
|
||||||
|
n.CleanTags = utils.ArrayPointerAppend(n.CleanTags, cleanTag)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
case utils.HostLequipe:
|
||||||
|
n.Teaser = utils.StringPointer(doc.Find("h2.Article__chapo").Text())
|
||||||
|
doc.Find(".Paragraph__content").Each(func(i int, s *goquery.Selection) {
|
||||||
|
n.Content = utils.ArrayPointerAppend(n.Content, s.Text())
|
||||||
|
})
|
||||||
|
n.Author = utils.StringPointer(doc.Find(".Author__name").Text())
|
||||||
doc.Find(".RelatedLinks a.RelatedLinks__link").Each(func(i int, s *goquery.Selection) {
|
doc.Find(".RelatedLinks a.RelatedLinks__link").Each(func(i int, s *goquery.Selection) {
|
||||||
tag := strings.TrimSpace(s.Text())
|
tag := strings.TrimSpace(s.Text())
|
||||||
cleanTag := utils.Sanitize(tag)
|
cleanTag := utils.Sanitize(tag)
|
||||||
if !utils.ArrayContains(n.CleanTags, cleanTag) {
|
if !utils.ArrayPointerContains(n.CleanTags, cleanTag) {
|
||||||
n.Tags = append(n.Tags, tag)
|
n.Tags = utils.ArrayPointerAppend(n.Tags, tag)
|
||||||
n.CleanTags = append(n.CleanTags, cleanTag)
|
n.CleanTags = utils.ArrayPointerAppend(n.CleanTags, cleanTag)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
case "FFTT":
|
|
||||||
n.Teaser = strings.TrimSpace(doc.Find(".news-description p").First().Text())
|
case utils.HostFFTT:
|
||||||
|
n.Teaser = utils.StringPointer(doc.Find(".news-description p").First().Text())
|
||||||
doc.Find(".news-description p").Each(func(i int, s *goquery.Selection) {
|
doc.Find(".news-description p").Each(func(i int, s *goquery.Selection) {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
n.Content = append(n.Content, s.Text())
|
n.Content = utils.ArrayPointerAppend(n.Content, s.Text())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
doc.Find(".social-shares-large-wrapper a.link").Each(func(i int, s *goquery.Selection) {
|
doc.Find(".social-shares-large-wrapper a.link").Each(func(i int, s *goquery.Selection) {
|
||||||
tag := strings.TrimSpace(s.Text())
|
tag := strings.TrimSpace(s.Text())
|
||||||
cleanTag := utils.Sanitize(tag)
|
cleanTag := utils.Sanitize(tag)
|
||||||
if !utils.ArrayContains(n.CleanTags, cleanTag) {
|
if !utils.ArrayPointerContains(n.CleanTags, cleanTag) {
|
||||||
n.Tags = append(n.Tags, tag)
|
n.Tags = utils.ArrayPointerAppend(n.Tags, tag)
|
||||||
n.CleanTags = append(n.CleanTags, cleanTag)
|
n.CleanTags = utils.ArrayPointerAppend(n.CleanTags, cleanTag)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
case "Foot Mercato":
|
|
||||||
n.Teaser = strings.TrimSpace(doc.Find("h2.article__lead").Text())
|
case utils.HostFootmercato:
|
||||||
|
n.Teaser = utils.StringPointer(doc.Find("h2.article__lead").Text())
|
||||||
doc.Find(".article__content p").Each(func(i int, s *goquery.Selection) {
|
doc.Find(".article__content p").Each(func(i int, s *goquery.Selection) {
|
||||||
n.Content = append(n.Content, s.Text())
|
n.Content = utils.ArrayPointerAppend(n.Content, s.Text())
|
||||||
})
|
})
|
||||||
n.Author = strings.TrimSpace(doc.Find(".article__author a").Text())
|
n.Author = utils.StringPointer(doc.Find(".article__author a").Text())
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("unknown source %s", n.Source.Name)
|
n.Error = utils.StringPointer("unknown host " + parsedLink.Host)
|
||||||
}
|
}
|
||||||
if len(n.Content) == 0 {
|
|
||||||
n.Redirect = n.Link
|
if n.Content == nil {
|
||||||
|
n.Redirect = utils.StringPointer(n.Link)
|
||||||
}
|
}
|
||||||
if len(n.CleanTags) == 0 {
|
if n.CleanTags == nil {
|
||||||
n.Tags = append(n.Tags, n.Sport.Name)
|
n.Tags = utils.ArrayPointerAppend(n.Tags, n.Source.Sport.Name)
|
||||||
n.CleanTags = append(n.CleanTags, n.Sport.CleanName)
|
n.CleanTags = utils.ArrayPointerAppend(n.CleanTags, n.Source.Sport.CleanName)
|
||||||
}
|
}
|
||||||
n.Haystack = fmt.Sprintf("%s-%s", n.CleanTitle, strings.Join(n.CleanTags, "-"))
|
n.Haystack = utils.StringPointer(fmt.Sprintf("%s-%s", n.CleanTitle, utils.ArrayPointerJoin(n.CleanTags, "-")))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Source) ListNews(sport *Sport, url string) ([]*News, error) {
|
func (s *Source) ListNews() ([]*News, error) {
|
||||||
var newsList []*News
|
var newsList []*News
|
||||||
|
|
||||||
fp := gofeed.NewParser()
|
fp := gofeed.NewParser()
|
||||||
feed, err := fp.ParseURL(url)
|
feed, err := fp.ParseURL(s.FeedUrl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -139,33 +163,32 @@ func (s *Source) ListNews(sport *Sport, url string) ([]*News, error) {
|
||||||
for _, item := range feed.Items {
|
for _, item := range feed.Items {
|
||||||
n := &News{
|
n := &News{
|
||||||
Source: s,
|
Source: s,
|
||||||
Sport: sport,
|
|
||||||
Title: item.Title,
|
Title: item.Title,
|
||||||
Description: regexp.MustCompile(`<[^>]*>`).ReplaceAllLiteralString(item.Description, ""),
|
Description: utils.StringPointer(regexp.MustCompile(`<[^>]*>`).ReplaceAllLiteralString(item.Description, "")),
|
||||||
CleanTitle: utils.Sanitize(item.Title),
|
CleanTitle: utils.Sanitize(item.Title),
|
||||||
PubDate: item.PublishedParsed,
|
PubDate: item.PublishedParsed,
|
||||||
Link: item.Link,
|
Link: item.Link,
|
||||||
}
|
}
|
||||||
for _, tags := range item.Categories {
|
for _, tags := range item.Categories {
|
||||||
for _, tag := range strings.Split(tags, ",") {
|
for _, tag := range strings.Split(tags, ",") {
|
||||||
n.Tags = append(n.Tags, strings.TrimSpace(tag))
|
n.Tags = utils.ArrayPointerAppend(n.Tags, tag)
|
||||||
n.CleanTags = append(n.CleanTags, utils.Sanitize(strings.TrimSpace(tag)))
|
n.CleanTags = utils.ArrayPointerAppend(n.CleanTags, utils.Sanitize(tag))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if item.Image != nil {
|
if item.Image != nil {
|
||||||
n.Image = item.Image.URL
|
n.Image = utils.StringPointer(item.Image.URL)
|
||||||
} else if len(item.Enclosures) > 0 {
|
} else if len(item.Enclosures) > 0 {
|
||||||
n.Image = item.Enclosures[0].URL
|
n.Image = utils.StringPointer(item.Enclosures[0].URL)
|
||||||
} else if s.Name == "Eurosport" {
|
} else {
|
||||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(item.Description))
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(item.Description))
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if src, ok := doc.Find("img").Attr("src"); ok {
|
if src, ok := doc.Find("img").Attr("src"); ok {
|
||||||
n.Image = src
|
n.Image = utils.StringPointer(src)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if item.Author != nil {
|
if item.Author != nil {
|
||||||
n.Author = item.Author.Name
|
n.Author = utils.StringPointer(item.Author.Name)
|
||||||
}
|
}
|
||||||
newsList = append(newsList, n)
|
newsList = append(newsList, n)
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,114 +4,130 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"../utils"
|
"1bet.fr/scraper/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
type expectedResult struct {
|
|
||||||
news *News
|
|
||||||
teaser string
|
|
||||||
paragraph string
|
|
||||||
author string
|
|
||||||
urlTags []string
|
|
||||||
haystack string
|
|
||||||
|
|
||||||
source *Source
|
|
||||||
sourceUrl string
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNews_Feed(t *testing.T) {
|
func TestNews_Feed(t *testing.T) {
|
||||||
expList := [4]*expectedResult{
|
var n *News
|
||||||
{
|
|
||||||
news: &News{
|
t.Logf("testing feed from Eurosport")
|
||||||
Source: &Source{Id: 1, Name: "Eurosport"},
|
n = &News{
|
||||||
Sport: &Sport{Id: 1, Name: "Football", UrlName: "football"},
|
Source: &Source{Sport: &Sport{Name: "Football", CleanName: "football"}},
|
||||||
Link: "https://www.eurosport.fr/football/bundesliga/2020-2021/dortmund-au-tapis-thuram-debloque-son-compteur_sto7905745/story.shtml",
|
Link: "https://www.eurosport.fr/football/bundesliga/2020-2021/dortmund-au-tapis-thuram-debloque-son-compteur_sto7905745/story.shtml",
|
||||||
},
|
}
|
||||||
teaser: "BUNDESLIGA – Le Borussia Dortmund et ses jeunes stars ont chuté",
|
if err := n.Feed(); err != nil {
|
||||||
paragraph: "Etonnante Bundesliga. Dortmund battu, Leipzig tenu en échec samedi,",
|
t.Errorf("unexpected error : %s", err)
|
||||||
author: "Eurosport",
|
}
|
||||||
urlTags: []string{"football", "bundesliga"},
|
if !strings.HasPrefix(*n.Teaser, "BUNDESLIGA – Le Borussia Dortmund et ses jeunes stars ont chuté") {
|
||||||
},
|
t.Errorf("unexpected teaser : %s", *n.Teaser)
|
||||||
{
|
}
|
||||||
news: &News{
|
if !strings.HasPrefix((*n.Content)[0], "Etonnante Bundesliga. Dortmund battu, Leipzig tenu en échec samedi,") {
|
||||||
Source: &Source{Id: 2, Name: "L'équipe"},
|
t.Errorf("unexpected content : %s", (*n.Content)[0])
|
||||||
Sport: &Sport{Id: 1, Name: "Football", UrlName: "football"},
|
}
|
||||||
Link: "https://www.lequipe.fr/Football/Actualites/Mitchel-bakker-psg-je-vais-devoir-elever-mon-niveau-de-jeu/1176182",
|
if *n.Author != "Eurosport" {
|
||||||
},
|
t.Errorf("unexpected author : %s", *n.Author)
|
||||||
teaser: "Mitchel Bakker, le latéral néerlandais du PSG",
|
}
|
||||||
paragraph: "« Les absences de Juan Bernat et Layvin Kurzawa",
|
if !utils.ArrayPointerContains(n.CleanTags, "bundesliga") {
|
||||||
author: "H. De.",
|
t.Errorf("cleanTags does not contain bundesliga")
|
||||||
urlTags: []string{"ligue-1", "paris-sg--fra-", "reims--fra-"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
news: &News{
|
|
||||||
Source: &Source{Id: 3, Name: "FFTT"},
|
|
||||||
Sport: &Sport{Id: 6, Name: "Tennis de Table", UrlName: "tennis-de-table"},
|
|
||||||
Link: "http://www.fftt.com/site/actualites/2020-09-22/laura-gasnier-page-qui-se-tourne-avec-bleues",
|
|
||||||
},
|
|
||||||
teaser: "Après 15 années en équipe de France, Laura Gasnier a décidé",
|
|
||||||
paragraph: "Elle évoque un choix personnel qui a demandé plusieurs mois de réflexion",
|
|
||||||
author: "",
|
|
||||||
urlTags: []string{"equipe-de-france", "gasnier-laura"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
news: &News{
|
|
||||||
Source: &Source{Id: 4, Name: "Foot Mercato"},
|
|
||||||
Sport: &Sport{Id: 1, Name: "Football", UrlName: "football"},
|
|
||||||
Link: "https://www.footmercato.net/a3190892483125730002-real-madrid-personne-ne-veut-de-luka-jovic",
|
|
||||||
},
|
|
||||||
teaser: "Alors que la date de fin du mercato approche considérablement,",
|
|
||||||
paragraph: "Tic-tac, tic-tac... Le chrono défile, et le Real Madrid",
|
|
||||||
author: "Max Franco Sanchez",
|
|
||||||
urlTags: []string{"football"},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, exp := range expList {
|
t.Logf("testing feed from L'équipe")
|
||||||
t.Logf("testing feed from %s", exp.news.Source.Name)
|
n = &News{
|
||||||
if err := exp.news.Feed(); err != nil {
|
Source: &Source{Sport: &Sport{Name: "Football", CleanName: "football"}},
|
||||||
t.Errorf("unexpected error : %s", err)
|
Link: "https://www.lequipe.fr/Football/Actualites/Mitchel-bakker-psg-je-vais-devoir-elever-mon-niveau-de-jeu/1176182",
|
||||||
}
|
}
|
||||||
if !strings.HasPrefix(exp.news.Teaser, exp.teaser) {
|
if err := n.Feed(); err != nil {
|
||||||
t.Errorf("unexpected teaser : %s", exp.news.Teaser)
|
t.Errorf("unexpected error : %s", err)
|
||||||
}
|
}
|
||||||
if !strings.HasPrefix(exp.news.Content[0], exp.paragraph) {
|
if !strings.HasPrefix(*n.Teaser, "Mitchel Bakker, le latéral néerlandais du PSG") {
|
||||||
t.Errorf("unexpected content : %s", exp.news.Content[0])
|
t.Errorf("unexpected teaser : %s", *n.Teaser)
|
||||||
}
|
}
|
||||||
if exp.news.Author != exp.author {
|
if !strings.HasPrefix((*n.Content)[0], "« Les absences de Juan Bernat et Layvin Kurzawa") {
|
||||||
t.Errorf("unexpected author : %s", exp.news.Author)
|
t.Errorf("unexpected content : %s", (*n.Content)[0])
|
||||||
}
|
}
|
||||||
for _, urlTag := range exp.urlTags {
|
if *n.Author != "H. De." {
|
||||||
if !utils.ArrayContains(exp.news.UrlTags, urlTag) {
|
t.Errorf("unexpected author : %s", *n.Author)
|
||||||
t.Errorf("urltags does not contain %s", urlTag)
|
}
|
||||||
}
|
if !utils.ArrayPointerContains(n.CleanTags, "paris-sg--fra-") {
|
||||||
}
|
t.Errorf("cleanTags does not contain paris-sg--fra-")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("testing feed from FFTT")
|
||||||
|
n = &News{
|
||||||
|
Source: &Source{Sport: &Sport{Name: "Tennis de Table", CleanName: "tennis-de-table"}},
|
||||||
|
Link: "http://www.fftt.com/site/actualites/2020-09-22/laura-gasnier-page-qui-se-tourne-avec-bleues",
|
||||||
|
}
|
||||||
|
if err := n.Feed(); err != nil {
|
||||||
|
t.Errorf("unexpected error : %s", err)
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(*n.Teaser, "Après 15 années en équipe de France, Laura Gasnier a décidé") {
|
||||||
|
t.Errorf("unexpected teaser : %s", *n.Teaser)
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix((*n.Content)[0], "Elle évoque un choix personnel qui a demandé plusieurs mois de réflexion") {
|
||||||
|
t.Errorf("unexpected content : %s", (*n.Content)[0])
|
||||||
|
}
|
||||||
|
if n.Author != nil {
|
||||||
|
t.Errorf("unexpected author : %s", *n.Author)
|
||||||
|
}
|
||||||
|
if !utils.ArrayPointerContains(n.CleanTags, "gasnier-laura") {
|
||||||
|
t.Errorf("cleanTags does not contain gasnier-laura")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("testing feed from Foot Mercato")
|
||||||
|
n = &News{
|
||||||
|
Source: &Source{Sport: &Sport{Name: "Football", CleanName: "football"}},
|
||||||
|
Link: "https://www.footmercato.net/a3190892483125730002-real-madrid-personne-ne-veut-de-luka-jovic",
|
||||||
|
}
|
||||||
|
if err := n.Feed(); err != nil {
|
||||||
|
t.Errorf("unexpected error : %s", err)
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(*n.Teaser, "Alors que la date de fin du mercato approche considérablement,") {
|
||||||
|
t.Errorf("unexpected teaser : %s", *n.Teaser)
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix((*n.Content)[0], "Tic-tac, tic-tac... Le chrono défile, et le Real Madrid") {
|
||||||
|
t.Errorf("unexpected content : %s", (*n.Content)[0])
|
||||||
|
}
|
||||||
|
if *n.Author != "Max Franco Sanchez" {
|
||||||
|
t.Errorf("unexpected author : %s", *n.Author)
|
||||||
|
}
|
||||||
|
if !utils.ArrayPointerContains(n.CleanTags, "football") {
|
||||||
|
t.Errorf("cleanTags does not contain football")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Logf("testing feed from Foot Mercato")
|
||||||
|
n = &News{
|
||||||
|
Source: &Source{Sport: &Sport{Name: "Rugby", CleanName: "rugby"}},
|
||||||
|
Link: "https://www.rugbyrama.fr/rugby/top-14/2018-2019/top-14-face-au-racing-92-toulouse-n-aura-pas-de-marge-de-manoeuvre_sto7939622/story.shtml",
|
||||||
|
}
|
||||||
|
if err := n.Feed(); err != nil {
|
||||||
|
t.Errorf("unexpected error : %s", err)
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(*n.Teaser, "TOP 14 - Opposé au Racing 92 à la Paris la Défense Arena") {
|
||||||
|
t.Errorf("unexpected teaser : %s", *n.Teaser)
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix((*n.Content)[0], "Réaliser et produire le même contenu") {
|
||||||
|
t.Errorf("unexpected content : %s", (*n.Content)[0])
|
||||||
|
}
|
||||||
|
if *n.Author != "Rugbyrama" {
|
||||||
|
t.Errorf("unexpected author : %s", *n.Author)
|
||||||
|
}
|
||||||
|
if !utils.ArrayPointerContains(n.CleanTags, "top-14") {
|
||||||
|
t.Errorf("cleanTags does not contain football")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSource_ListNews(t *testing.T) {
|
func TestSource_ListNews(t *testing.T) {
|
||||||
expList := []*expectedResult{
|
links := []string{
|
||||||
{
|
"http://www.eurosport.fr/football/rss.xml",
|
||||||
source: &Source{Id: 1, Name: "Eurosport"},
|
"https://www.lequipe.fr/rss/actu_rss_Football.xml",
|
||||||
sourceUrl: "http://www.eurosport.fr/football/rss.xml",
|
"http://www.fftt.com/site/medias/flux/rss_competition.xml",
|
||||||
},
|
"http://www.footmercato.net/flux-rss",
|
||||||
{
|
|
||||||
source: &Source{Id: 1, Name: "L'équipe"},
|
|
||||||
sourceUrl: "https://www.lequipe.fr/rss/actu_rss_Football.xml",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
source: &Source{Id: 1, Name: "FFTT"},
|
|
||||||
sourceUrl: "http://www.fftt.com/site/medias/flux/rss_competition.xml",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
source: &Source{Id: 1, Name: "Foot Mercato"},
|
|
||||||
sourceUrl: "http://www.footmercato.net/flux-rss",
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, exp := range expList {
|
for _, link := range links {
|
||||||
t.Logf("testing newsList from %s", exp.source.Name)
|
t.Logf("testing ListNews from %s", link)
|
||||||
newsList, err := exp.source.ListNews(&Sport{Id: 1}, exp.sourceUrl)
|
source := &Source{FeedUrl: link}
|
||||||
|
newsList, err := source.ListNews()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("unexpected error : %s", err)
|
t.Errorf("unexpected error : %s", err)
|
||||||
}
|
}
|
||||||
|
@ -122,8 +138,8 @@ func TestSource_ListNews(t *testing.T) {
|
||||||
if n.Title == "" {
|
if n.Title == "" {
|
||||||
t.Errorf("unexpected empty title")
|
t.Errorf("unexpected empty title")
|
||||||
}
|
}
|
||||||
if n.Image == "" {
|
if n.Image == nil {
|
||||||
t.Errorf("unexpected empty image")
|
t.Errorf("unexpected nil image")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,15 +2,15 @@ package postgres
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/lib/pq"
|
"github.com/lib/pq"
|
||||||
|
|
||||||
"../news"
|
"1bet.fr/scraper/match"
|
||||||
"../utils"
|
"1bet.fr/scraper/news"
|
||||||
|
"1bet.fr/scraper/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Postgres struct {
|
type Postgres struct {
|
||||||
|
@ -25,6 +25,27 @@ type Postgres struct {
|
||||||
isConnected bool
|
isConnected bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sValue(s *string) interface{} {
|
||||||
|
if s == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return *s
|
||||||
|
}
|
||||||
|
|
||||||
|
func iValue(i *int) interface{} {
|
||||||
|
if i == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return *i
|
||||||
|
}
|
||||||
|
|
||||||
|
func aValue(a *[]string) interface{} {
|
||||||
|
if a == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return pq.Array(*a)
|
||||||
|
}
|
||||||
|
|
||||||
var pg *Postgres
|
var pg *Postgres
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
@ -64,41 +85,26 @@ func Close() {
|
||||||
pg.isConnected = false
|
pg.isConnected = false
|
||||||
}
|
}
|
||||||
|
|
||||||
func ListSports() ([]*news.Sport, error) {
|
|
||||||
var sports []*news.Sport
|
|
||||||
|
|
||||||
rows, err := pg.psqlConn.Query("SELECT id, name, clean_name FROM public.mainapp_sport")
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error while querying postgres : %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for rows.Next() {
|
|
||||||
sport := &news.Sport{}
|
|
||||||
err = rows.Scan(&sport.Id, &sport.Name, &sport.CleanName)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error while scanning row from postgres : %s", err)
|
|
||||||
}
|
|
||||||
sports = append(sports, sport)
|
|
||||||
}
|
|
||||||
return sports, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func ListSources() ([]*news.Source, error) {
|
func ListSources() ([]*news.Source, error) {
|
||||||
var sources []*news.Source
|
var sources []*news.Source
|
||||||
|
|
||||||
rows, err := pg.psqlConn.Query("SELECT id, name, clean_name, urls FROM public.mainapp_source")
|
rows, err := pg.psqlConn.Query(`
|
||||||
|
SELECT
|
||||||
|
mainapp_source.id, sport_id, mainapp_source.name, mainapp_source.clean_name, feed_url,
|
||||||
|
mainapp_sport.name, mainapp_sport.clean_name
|
||||||
|
FROM
|
||||||
|
mainapp_source, mainapp_sport
|
||||||
|
WHERE
|
||||||
|
mainapp_sport.id = mainapp_source.sport_id
|
||||||
|
`)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error while querying postgres : %s", err)
|
return nil, fmt.Errorf("error while querying postgres : %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
source := &news.Source{}
|
source := &news.Source{Sport: &news.Sport{}}
|
||||||
sourceUrls := ""
|
if err = rows.Scan(&source.Id, &source.Sport.Id, &source.Name, &source.CleanName, &source.FeedUrl,
|
||||||
err = rows.Scan(&source.Id, &source.Name, &source.CleanName, &sourceUrls)
|
&source.Sport.Name, &source.Sport.CleanName); err != nil {
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error while scanning row from postgres : %s", err)
|
|
||||||
}
|
|
||||||
if err = json.Unmarshal([]byte(sourceUrls), &source.Urls); err != nil {
|
|
||||||
return nil, fmt.Errorf("error while scanning row from postgres : %s", err)
|
return nil, fmt.Errorf("error while scanning row from postgres : %s", err)
|
||||||
}
|
}
|
||||||
sources = append(sources, source)
|
sources = append(sources, source)
|
||||||
|
@ -106,43 +112,84 @@ func ListSources() ([]*news.Source, error) {
|
||||||
return sources, nil
|
return sources, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ListLeagues() ([]*match.League, error) {
|
||||||
|
var leagues []*match.League
|
||||||
|
|
||||||
|
rows, err := pg.psqlConn.Query(`
|
||||||
|
SELECT
|
||||||
|
mainapp_league.id, sport_id, country_id, mainapp_league.name, mainapp_league.clean_name, gender,
|
||||||
|
schedule_url, ranking_url, channel_url,
|
||||||
|
mdays, matches_by_mday, rounds, groups,
|
||||||
|
mainapp_sport.name, mainapp_sport.clean_name
|
||||||
|
FROM
|
||||||
|
mainapp_league, mainapp_sport
|
||||||
|
WHERE
|
||||||
|
mainapp_sport.id = mainapp_league.sport_id
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error while querying postgres : %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
league := &match.League{Sport: &match.Sport{}}
|
||||||
|
if err = rows.Scan(
|
||||||
|
&league.Id, &league.Sport.Id, &league.CountryId, &league.Name, &league.CleanName, &league.Gender,
|
||||||
|
&league.ScheduleUrl, &league.RankingUrl, &league.ChannelUrl,
|
||||||
|
&league.MatchDays, &league.MatchesByMatchDay, pq.Array(&league.Rounds), pq.Array(&league.Groups),
|
||||||
|
&league.Sport.Name, &league.Sport.CleanName,
|
||||||
|
); err != nil {
|
||||||
|
return nil, fmt.Errorf("error while scanning row from postgres : %s", err)
|
||||||
|
}
|
||||||
|
leagues = append(leagues, league)
|
||||||
|
}
|
||||||
|
return leagues, nil
|
||||||
|
}
|
||||||
|
|
||||||
func InsertNews(n *news.News) error {
|
func InsertNews(n *news.News) error {
|
||||||
err := pg.psqlConn.QueryRow(`
|
return pg.psqlConn.QueryRow(`
|
||||||
INSERT INTO public.mainapp_news
|
INSERT INTO public.mainapp_news
|
||||||
(title, clean_title, link, pub_date, description, image, teaser, author,
|
(title, clean_title, link, pub_date, description, image, teaser, author,
|
||||||
content, redirect, haystack, tags, clean_tags, error, trace,
|
content, redirect, haystack, tags, clean_tags, error, trace,
|
||||||
league_id, source_id, sport_id, team_id)
|
league_id, source_id, team_id)
|
||||||
VALUES
|
VALUES
|
||||||
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19)
|
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18)
|
||||||
RETURNING
|
RETURNING
|
||||||
id
|
id
|
||||||
`, n.Title, n.CleanTitle, n.Link, n.PubDate, utils.NullableString(n.Description),
|
`, n.Title, n.CleanTitle, n.Link, n.PubDate, sValue(n.Description),
|
||||||
utils.NullableString(n.Image), utils.NullableString(n.Teaser), utils.NullableString(n.Author),
|
sValue(n.Image), sValue(n.Teaser), sValue(n.Author),
|
||||||
pq.Array(n.Content), utils.NullableString(n.Redirect), utils.NullableString(n.Haystack),
|
aValue(n.Content), sValue(n.Redirect), sValue(n.Haystack),
|
||||||
pq.Array(n.Tags), pq.Array(n.CleanTags), utils.NullableString(n.Error), utils.NullableString(n.Trace),
|
aValue(n.Tags), aValue(n.CleanTags), sValue(n.Error), sValue(n.Trace),
|
||||||
utils.NullableInt(n.LeagueId), n.Source.Id, n.Sport.Id, utils.NullableInt(n.TeamId),
|
iValue(n.LeagueId), n.Source.Id, iValue(n.TeamId),
|
||||||
).Scan(&n.Id)
|
).Scan(&n.Id)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func UpdateNews(n *news.News) error {
|
func UpdateNews(n *news.News) (int64, error) {
|
||||||
if _, err := pg.psqlConn.Exec(`
|
res, err := pg.psqlConn.Exec(`
|
||||||
UPDATE public.mainapp_news
|
UPDATE public.mainapp_news
|
||||||
SET title = $1, clean_title = $2, pub_date = $3, link = $4, description = $5,
|
SET title = $1, clean_title = $2, pub_date = $3, link = $4, description = $5,
|
||||||
image = $6, teaser = $7, author = $8, content = $9, redirect = $10,
|
image = $6, teaser = $7, author = $8, content = $9, redirect = $10,
|
||||||
haystack = $11, tags = $12, clean_tags = $13, error = $14, trace = $15,
|
haystack = $11, tags = $12, clean_tags = $13, error = $14, trace = $15,
|
||||||
league_id = $16, source_id = $17, sport_id = $18, team_id = $19
|
league_id = get_matching_league($11, $18), source_id = $16, team_id = $17
|
||||||
WHERE id = $20
|
WHERE id = $19
|
||||||
`, n.Title, n.CleanTitle, n.PubDate, n.Link, utils.NullableString(n.Description),
|
`, n.Title, n.CleanTitle, n.PubDate, n.Link,sValue(n.Description),
|
||||||
utils.NullableString(n.Image), utils.NullableString(n.Teaser), utils.NullableString(n.Author),
|
sValue(n.Image), sValue(n.Teaser), sValue(n.Author),
|
||||||
pq.Array(n.Content), utils.NullableString(n.Redirect), utils.NullableString(n.Haystack),
|
aValue(n.Content), sValue(n.Redirect), sValue(n.Haystack),
|
||||||
pq.Array(n.Tags), pq.Array(n.CleanTags), utils.NullableString(n.Error), utils.NullableString(n.Trace),
|
aValue(n.Tags), aValue(n.CleanTags), sValue(n.Error), sValue(n.Trace),
|
||||||
utils.NullableInt(n.LeagueId), n.Source.Id, n.Sport.Id, utils.NullableInt(n.TeamId), n.Id,
|
n.Source.Id, iValue(n.TeamId), n.Source.Sport.Id, n.Id,
|
||||||
); err != nil {
|
)
|
||||||
return err
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
}
|
}
|
||||||
return nil
|
return res.RowsAffected()
|
||||||
|
}
|
||||||
|
|
||||||
|
func DeleteNews(n *news.News) (int64, error) {
|
||||||
|
res, err := pg.psqlConn.Exec(`
|
||||||
|
DELETE FROM public.mainapp_news
|
||||||
|
WHERE id = $1
|
||||||
|
`, n.Id)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return res.RowsAffected()
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,19 +2,83 @@ package postgres
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"1bet.fr/scraper/news"
|
||||||
|
"1bet.fr/scraper/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var n *news.News
|
||||||
|
|
||||||
func TestConnect(t *testing.T) {
|
func TestConnect(t *testing.T) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestListLeagues(t *testing.T) {
|
||||||
|
leagues, err := ListLeagues()
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("unexpected error : %s", err)
|
||||||
|
}
|
||||||
|
if len(leagues) == 0 {
|
||||||
|
t.Errorf("no league got from ListLeagues function")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestListSources(t *testing.T) {
|
func TestListSources(t *testing.T) {
|
||||||
defer Close()
|
|
||||||
sources, err := ListSources()
|
sources, err := ListSources()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("unexpected error : %s", err)
|
t.Errorf("unexpected error : %s", err)
|
||||||
}
|
}
|
||||||
if len(sources) == 0 {
|
if len(sources) == 0 {
|
||||||
t.Errorf("no sources got from ListSources function")
|
t.Errorf("no source got from ListSources function")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestInsertNews(t *testing.T) {
|
||||||
|
tags := []string{"Test", "Hello Toto"}
|
||||||
|
cleanTags := []string{"test", "hello-toto"}
|
||||||
|
nowTime := time.Now()
|
||||||
|
n = &news.News{
|
||||||
|
Source: &news.Source{Id: 1, Sport: &news.Sport{Id: 1}},
|
||||||
|
PubDate: &nowTime,
|
||||||
|
Link: "https://test.com/toto",
|
||||||
|
Title: "Hello toto",
|
||||||
|
CleanTitle: "hello-toto",
|
||||||
|
Tags: &tags,
|
||||||
|
CleanTags: &cleanTags,
|
||||||
|
}
|
||||||
|
err := InsertNews(n)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if n.Id == 0 {
|
||||||
|
t.Errorf("unexpected value 0 for n.Id")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateNews(t *testing.T) {
|
||||||
|
content := []string{"toto", "test"}
|
||||||
|
n.Content = &content
|
||||||
|
n.Author = utils.StringPointer("T. Toto")
|
||||||
|
updated, err := UpdateNews(n)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if updated != 1 {
|
||||||
|
t.Errorf("unexpected %d update rows", updated)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeleteNews(t *testing.T) {
|
||||||
|
deleted, err := DeleteNews(n)
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
if deleted != 1 {
|
||||||
|
t.Errorf("unexpected %d news deleted", deleted)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClose(t *testing.T) {
|
||||||
|
Close()
|
||||||
|
}
|
||||||
|
|
|
@ -2,11 +2,11 @@ package requests
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
|
||||||
"golang.org/x/net/proxy"
|
"golang.org/x/net/proxy"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
117
utils/utils.go
117
utils/utils.go
|
@ -3,6 +3,16 @@ package utils
|
||||||
import (
|
import (
|
||||||
"log"
|
"log"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
HostMatchendirect = "www.matchendirect.fr"
|
||||||
|
HostEurosport = "www.eurosport.fr"
|
||||||
|
HostRugbyrama = "www.rugbyrama.fr"
|
||||||
|
HostFFTT = "www.fftt.com"
|
||||||
|
HostFootmercato = "www.footmercato.net"
|
||||||
|
HostLequipe = "www.lequipe.fr"
|
||||||
)
|
)
|
||||||
|
|
||||||
func Sanitize(s string) (t string) {
|
func Sanitize(s string) (t string) {
|
||||||
|
@ -32,29 +42,6 @@ func Sanitize(s string) (t string) {
|
||||||
return t
|
return t
|
||||||
}
|
}
|
||||||
|
|
||||||
func NullableString(s string) interface{} {
|
|
||||||
if len(s) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
func NullableInt(i int) interface{} {
|
|
||||||
if i == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return i
|
|
||||||
}
|
|
||||||
|
|
||||||
func ArrayContains(arr []string, val string) bool {
|
|
||||||
for _, elt := range arr {
|
|
||||||
if elt == val {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func AtoI(s string) int {
|
func AtoI(s string) int {
|
||||||
res, err := strconv.Atoi(s)
|
res, err := strconv.Atoi(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -62,3 +49,87 @@ func AtoI(s string) int {
|
||||||
}
|
}
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func EnglishDateString(s string) string {
|
||||||
|
months := map[string]string{
|
||||||
|
"janvier": "January",
|
||||||
|
"février": "February",
|
||||||
|
"mars": "March",
|
||||||
|
"avril": "April",
|
||||||
|
"mai": "May",
|
||||||
|
"juin": "June",
|
||||||
|
"juillet": "July",
|
||||||
|
"août": "August",
|
||||||
|
"septembre": "September",
|
||||||
|
"octobre": "October",
|
||||||
|
"novembre": "November",
|
||||||
|
"décembre": "December",
|
||||||
|
}
|
||||||
|
days := map[string]string{
|
||||||
|
"lundi": "Monday",
|
||||||
|
"mardi": "Tuesday",
|
||||||
|
"mercredi": "Wednesday",
|
||||||
|
"jeudi": "Thursday",
|
||||||
|
"vendredi": "Friday",
|
||||||
|
"samedi": "Saturday",
|
||||||
|
"dimanche": "Sunday",
|
||||||
|
}
|
||||||
|
|
||||||
|
s = strings.TrimSpace(strings.ToLower(s))
|
||||||
|
for fr, en := range months {
|
||||||
|
if strings.Contains(s, fr) {
|
||||||
|
s = strings.ReplaceAll(s, fr, en)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for fr, en := range days {
|
||||||
|
if strings.Contains(s, fr) {
|
||||||
|
s = strings.ReplaceAll(s, fr, en)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
|
||||||
|
func StringPointer(s string) *string {
|
||||||
|
if s == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
s = strings.TrimSpace(s)
|
||||||
|
return &s
|
||||||
|
}
|
||||||
|
|
||||||
|
func IntPointer(i int) *int {
|
||||||
|
if i == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &i
|
||||||
|
}
|
||||||
|
|
||||||
|
func ArrayPointerContains(arr *[]string, val string) bool {
|
||||||
|
if arr == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, elt := range *arr {
|
||||||
|
if elt == val {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func ArrayPointerAppend(a *[]string, v string) *[]string {
|
||||||
|
if a == nil {
|
||||||
|
r := []string{strings.TrimSpace(v)}
|
||||||
|
return &r
|
||||||
|
}
|
||||||
|
r := append(*a, strings.TrimSpace(v))
|
||||||
|
return &r
|
||||||
|
}
|
||||||
|
|
||||||
|
func ArrayPointerJoin(a *[]string, sep string) string {
|
||||||
|
if a == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.Join(*a, sep)
|
||||||
|
}
|
||||||
|
|
|
@ -10,32 +10,17 @@ func TestSanitize(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNullableString(t *testing.T) {
|
|
||||||
if res := NullableString("test"); res != "test" {
|
|
||||||
t.Errorf("unexepected NullableString() answer '%s' != 'test'", res)
|
|
||||||
}
|
|
||||||
|
|
||||||
if res := NullableString(""); res != nil {
|
|
||||||
t.Errorf("unexepected NullableString() answer '%s' != nil", res)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestNullableInt(t *testing.T) {
|
|
||||||
if res := NullableInt(3); res != 3 {
|
|
||||||
t.Errorf("unexepected NullableInt() answer %s != 3", res)
|
|
||||||
}
|
|
||||||
|
|
||||||
if res := NullableInt(0); res != nil {
|
|
||||||
t.Errorf("unexepected NullableInt() answer %s != nil", res)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestArrayContains(t *testing.T) {
|
func TestArrayContains(t *testing.T) {
|
||||||
if !ArrayContains([]string{"bird", "apple", "ocean", "fork", "anchor"}, "bird") {
|
if ArrayPointerContains(nil, "toto") {
|
||||||
t.Errorf("unexpected ArrayContains() false answer for 'bird'")
|
t.Errorf("unexpected contains true for nil array")
|
||||||
}
|
}
|
||||||
if ArrayContains([]string{"bird", "apple", "ocean", "fork", "anchor"}, "potato") {
|
arr := []string{"bird", "apple", "ocean", "fork", "anchor"}
|
||||||
t.Errorf("unexpected ArrayContains() true answer for 'potato'")
|
if !ArrayPointerContains(&arr, "bird") {
|
||||||
|
t.Errorf("unexpected contains false")
|
||||||
|
}
|
||||||
|
arr = []string{"bird", "apple", "ocean", "fork", "anchor"}
|
||||||
|
if ArrayPointerContains(&arr, "potato") {
|
||||||
|
t.Errorf("unexpected contains true")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,3 +29,62 @@ func TestAtoI(t *testing.T) {
|
||||||
t.Errorf("unexpected answer %d != 3", res)
|
t.Errorf("unexpected answer %d != 3", res)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestEnglishDateString(t *testing.T) {
|
||||||
|
if res := EnglishDateString("Mercredi 03 février 2021"); res != "Wednesday 03 February 2021" {
|
||||||
|
t.Errorf("unexpected date format : %s", res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStringPointer(t *testing.T) {
|
||||||
|
if res := StringPointer(""); res != nil {
|
||||||
|
t.Errorf("unexpected res : %s", *res)
|
||||||
|
}
|
||||||
|
if res := StringPointer("toto"); res == nil {
|
||||||
|
t.Errorf("unexpected res : nil")
|
||||||
|
} else {
|
||||||
|
if *res != "toto" {
|
||||||
|
t.Errorf("unexpected res : %s", *res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIntPointer(t *testing.T) {
|
||||||
|
if res := IntPointer(0); res != nil {
|
||||||
|
t.Errorf("unexpected res : %d", *res)
|
||||||
|
}
|
||||||
|
if res := IntPointer(123); res == nil {
|
||||||
|
t.Errorf("unexpected res : nil")
|
||||||
|
} else if *res != 123 {
|
||||||
|
t.Errorf("unexpected res : %d", *res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestArrayPointerAppend(t *testing.T) {
|
||||||
|
var arr *[]string
|
||||||
|
if arr = ArrayPointerAppend(arr, "toto"); arr == nil {
|
||||||
|
t.Errorf("unexpected arr : nil")
|
||||||
|
} else if len(*arr) != 1 {
|
||||||
|
t.Errorf("unexpected arr len : %d", len(*arr))
|
||||||
|
} else if (*arr)[0] != "toto" {
|
||||||
|
t.Errorf("unexpected arr content : %s", *arr)
|
||||||
|
}
|
||||||
|
|
||||||
|
if arr = ArrayPointerAppend(arr, "test"); arr == nil {
|
||||||
|
t.Errorf("unexpected arr : nil")
|
||||||
|
} else if len(*arr) != 2 {
|
||||||
|
t.Errorf("unexpected arr len : %d", len(*arr))
|
||||||
|
} else if (*arr)[1] != "test" {
|
||||||
|
t.Errorf("unexpected arr content : %s", *arr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestArrayPointerJoin(t *testing.T) {
|
||||||
|
if s := ArrayPointerJoin(nil, "-"); s != "" {
|
||||||
|
t.Errorf("unexpected join result : %s", s)
|
||||||
|
}
|
||||||
|
arr := []string{"toto", "test"}
|
||||||
|
if s:= ArrayPointerJoin(&arr, "-"); s != "toto-test" {
|
||||||
|
t.Errorf("unexpected join result : %s", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue