package main import ( "1bet.fr/scraper/utils" "flag" "fmt" "github.com/lib/pq" "log" "sync" "1bet.fr/scraper/match" "1bet.fr/scraper/news" "1bet.fr/scraper/postgres" ) const ( nbProcesses = 50 maxNewsPerSource = 50 ) func updateNews() { defer postgres.Close() sources, err := postgres.ListSources() if err != nil { log.Fatalf("error while getting list of sources : %s", err) } waitGroup := sync.WaitGroup{} newsChannel := make(chan *news.News) for i := 0; i < nbProcesses; i++ { waitGroup.Add(1) go func(nc chan *news.News, wg *sync.WaitGroup) { defer wg.Done() for n := range nc { if err := n.Feed(); err != nil { log.Fatalf("error while feeding news : %s", err) } if _, err := postgres.UpdateNews(n); err != nil { log.Fatalf("error while update news in postgres : %s", err) } } }(newsChannel, &waitGroup) } for _, source := range sources { log.Printf("[+] Starting parse of source : %s", source.FeedUrl) newsList, err := source.ListNews() if err != nil { log.Fatal(err) } for i, n := range newsList { if i >= maxNewsPerSource { log.Printf("Stopping parse of source with %d news added", i) break } if err := postgres.InsertNews(n); err != nil { if err, ok := err.(*pq.Error); ok { if err.Code.Name() == "unique_violation" { log.Printf("Stopping parse of source (unique violation) with %d news added", i) break } else { log.Fatalf("error while inserting news in postgres : %s", err.Message) } } else { log.Fatalf("error while getting pq.Error object") } } log.Printf("Adding news %s", n.Link) newsChannel <- n } } close(newsChannel) waitGroup.Wait() } func updateSchedule() { defer postgres.Close() leagues, err := postgres.ListLeagues() if err != nil { log.Fatal(err) } waitGroup := sync.WaitGroup{} leagueChannel := make(chan *match.League) for i := 0; i < nbProcesses; i++ { waitGroup.Add(1) go func(lc chan *match.League, wg *sync.WaitGroup) { defer wg.Done() for l := range lc { sources, err := l.ListSources() if err != nil { l.Error = utils.StringPointer("list sources error") l.Trace = utils.StringPointer(fmt.Sprint(err)) if updated, err := postgres.UpdateLeague(l); err != nil { log.Fatalf("error while updating league : %s", err) } else if updated != 1 { log.Fatalf("error while updating league : %d league(s) updated", updated) } break } for _, s := range sources { log.Printf("[+] Parsing source %s", s.URL) matches, err := s.GetMatches() if err != nil { log.Printf("[-] error while getting matches from league source %s : %s", s.URL, err) s.League.Error = utils.StringPointer("league source error") s.League.Trace = utils.StringPointer(fmt.Sprint(err)) if updated, err := postgres.UpdateLeague(s.League); err != nil { log.Fatalf("error while updating league : %s", err) } else if updated != 1 { log.Fatalf("error while updating league : %d league(s) updated", updated) } break } for _, m := range matches { if err := postgres.InsertTeamBySourceName(m.TeamHome, s.League); err != nil { log.Fatalf("error while saving team home : %s", err) } if err := postgres.InsertTeamBySourceName(m.TeamAway, s.League); err != nil { log.Fatalf("error while saving team away : %s", err) } if err := postgres.InsertMatch(m); err != nil { log.Fatalf("error while saving match : %s", err) } log.Printf("New match #%d (%s - %s)", m.Id, m.TeamHome.Name, m.TeamAway.Name) } } } }(leagueChannel, &waitGroup) } for _, league := range leagues { leagueChannel <- league } close(leagueChannel) waitGroup.Wait() } func main() { flag.Parse() args := flag.Args() if len(args) != 1 { log.Fatalf("unexpected number of args : len(%s) != 1", args) } switch args[0] { case "news": updateNews() case "schedule": updateSchedule() default: log.Fatalf("unexpected arg : %s", args[0]) } }