158 lines
4.0 KiB
Go
158 lines
4.0 KiB
Go
package main
|
|
|
|
import (
|
|
"1bet.fr/scraper/utils"
|
|
"flag"
|
|
"fmt"
|
|
"github.com/lib/pq"
|
|
"log"
|
|
"sync"
|
|
|
|
"1bet.fr/scraper/match"
|
|
"1bet.fr/scraper/news"
|
|
"1bet.fr/scraper/postgres"
|
|
)
|
|
|
|
const (
|
|
nbProcesses = 50
|
|
maxNewsPerSource = 50
|
|
)
|
|
|
|
func updateNews() {
|
|
defer postgres.Close()
|
|
|
|
sources, err := postgres.ListSources()
|
|
if err != nil {
|
|
log.Fatalf("error while getting list of sources : %s", err)
|
|
}
|
|
|
|
waitGroup := sync.WaitGroup{}
|
|
newsChannel := make(chan *news.News)
|
|
for i := 0; i < nbProcesses; i++ {
|
|
waitGroup.Add(1)
|
|
go func(nc chan *news.News, wg *sync.WaitGroup) {
|
|
defer wg.Done()
|
|
for n := range nc {
|
|
if err := n.Feed(); err != nil {
|
|
log.Fatalf("error while feeding news : %s", err)
|
|
}
|
|
if _, err := postgres.UpdateNews(n); err != nil {
|
|
log.Fatalf("error while update news in postgres : %s", err)
|
|
}
|
|
}
|
|
}(newsChannel, &waitGroup)
|
|
}
|
|
|
|
for _, source := range sources {
|
|
log.Printf("[+] Starting parse of source : %s", source.FeedUrl)
|
|
newsList, err := source.ListNews()
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
for i, n := range newsList {
|
|
if i >= maxNewsPerSource {
|
|
log.Printf("Stopping parse of source with %d news added", i)
|
|
break
|
|
}
|
|
if err := postgres.InsertNews(n); err != nil {
|
|
if err, ok := err.(*pq.Error); ok {
|
|
if err.Code.Name() == "unique_violation" {
|
|
log.Printf("Stopping parse of source (unique violation) with %d news added", i)
|
|
break
|
|
} else {
|
|
log.Fatalf("error while inserting news in postgres : %s", err.Message)
|
|
}
|
|
} else {
|
|
log.Fatalf("error while getting pq.Error object")
|
|
}
|
|
}
|
|
log.Printf("Adding news %s", n.Link)
|
|
newsChannel <- n
|
|
}
|
|
}
|
|
close(newsChannel)
|
|
waitGroup.Wait()
|
|
}
|
|
|
|
func updateSchedule() {
|
|
defer postgres.Close()
|
|
|
|
leagues, err := postgres.ListLeagues()
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
waitGroup := sync.WaitGroup{}
|
|
leagueChannel := make(chan *match.League)
|
|
for i := 0; i < nbProcesses; i++ {
|
|
waitGroup.Add(1)
|
|
go func(lc chan *match.League, wg *sync.WaitGroup) {
|
|
defer wg.Done()
|
|
for l := range lc {
|
|
sources, err := l.ListSources()
|
|
if err != nil {
|
|
l.Error = utils.StringPointer("list sources error")
|
|
l.Trace = utils.StringPointer(fmt.Sprint(err))
|
|
if updated, err := postgres.UpdateLeague(l); err != nil {
|
|
log.Fatalf("error while updating league : %s", err)
|
|
} else if updated != 1 {
|
|
log.Fatalf("error while updating league : %d league(s) updated", updated)
|
|
}
|
|
break
|
|
}
|
|
for _, s := range sources {
|
|
log.Printf("[+] Parsing source %s", s.URL)
|
|
matches, err := s.GetMatches()
|
|
if err != nil {
|
|
log.Printf("[-] error while getting matches from league source %s : %s", s.URL, err)
|
|
s.League.Error = utils.StringPointer("league source error")
|
|
s.League.Trace = utils.StringPointer(fmt.Sprint(err))
|
|
if updated, err := postgres.UpdateLeague(s.League); err != nil {
|
|
log.Fatalf("error while updating league : %s", err)
|
|
} else if updated != 1 {
|
|
log.Fatalf("error while updating league : %d league(s) updated", updated)
|
|
}
|
|
break
|
|
}
|
|
for _, m := range matches {
|
|
if err := postgres.InsertTeamBySourceName(m.TeamHome, s.League); err != nil {
|
|
log.Fatalf("error while saving team home : %s", err)
|
|
}
|
|
if err := postgres.InsertTeamBySourceName(m.TeamAway, s.League); err != nil {
|
|
log.Fatalf("error while saving team away : %s", err)
|
|
}
|
|
if err := postgres.InsertMatch(m); err != nil {
|
|
log.Fatalf("error while saving match : %s", err)
|
|
}
|
|
log.Printf("New match #%d (%s - %s)", m.Id, m.TeamHome.Name, m.TeamAway.Name)
|
|
}
|
|
}
|
|
}
|
|
}(leagueChannel, &waitGroup)
|
|
}
|
|
|
|
for _, league := range leagues {
|
|
leagueChannel <- league
|
|
}
|
|
close(leagueChannel)
|
|
waitGroup.Wait()
|
|
}
|
|
|
|
func main() {
|
|
flag.Parse()
|
|
args := flag.Args()
|
|
if len(args) != 1 {
|
|
log.Fatalf("unexpected number of args : len(%s) != 1", args)
|
|
}
|
|
|
|
switch args[0] {
|
|
case "news":
|
|
updateNews()
|
|
case "schedule":
|
|
updateSchedule()
|
|
default:
|
|
log.Fatalf("unexpected arg : %s", args[0])
|
|
}
|
|
}
|