scraper/main.go

88 lines
1.7 KiB
Go
Raw Normal View History

2020-10-05 08:24:33 +00:00
package main
import (
"github.com/lib/pq"
"log"
"sync"
"./news"
"./postgres"
)
const (
nbProcesses = 50
maxNewsPerSource = 50
)
func updateNews() {
defer postgres.Close()
sports, err := postgres.ListSports()
if err != nil {
log.Fatal(err)
}
mapSports := map[int]*news.Sport{}
for _, sport := range sports {
mapSports[sport.Id] = sport
}
sources, err := postgres.ListSources()
if err != nil {
log.Fatal(err)
}
waitGroup := sync.WaitGroup{}
newsChannel := make(chan *news.News)
for i := 0; i < nbProcesses; i++ {
waitGroup.Add(1)
go func(nc chan *news.News, wg *sync.WaitGroup) {
defer wg.Done()
for n := range nc {
if err := n.Feed(); err != nil {
log.Fatal(err)
}
if err := postgres.UpdateNews(n); err != nil {
log.Fatal(err)
}
}
}(newsChannel, &waitGroup)
}
for _, source := range sources {
for sportId, url := range source.Urls {
log.Printf("[+] Starting parse of source : %s", url)
newsList, err := source.ListNews(mapSports[sportId], url)
if err != nil {
log.Fatal(err)
}
for i, n := range newsList {
if i >= maxNewsPerSource {
log.Printf("Stopping parse of source with %d news added", i)
break
}
if err := postgres.InsertNews(n); err != nil {
if err, ok := err.(*pq.Error); ok {
if err.Code.Name() == "unique_violation" {
log.Printf("Stopping parse of source (unique violation) with %d news added", i)
break
} else {
log.Fatalf("error while getting pq.Error object")
}
} else {
log.Fatal(err)
}
}
log.Printf("Adding news %s", n.Link)
newsChannel <- n
}
}
}
close(newsChannel)
waitGroup.Wait()
}
func main() {
updateNews()
}