174 lines
4.3 KiB
Go
174 lines
4.3 KiB
Go
|
package news
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"regexp"
|
||
|
"strings"
|
||
|
"time"
|
||
|
|
||
|
"github.com/PuerkitoBio/goquery"
|
||
|
|
||
|
"github.com/mmcdole/gofeed"
|
||
|
|
||
|
"../requests"
|
||
|
"../utils"
|
||
|
)
|
||
|
|
||
|
type Sport struct {
|
||
|
Id int
|
||
|
Name string
|
||
|
CleanName string
|
||
|
}
|
||
|
|
||
|
type Source struct {
|
||
|
Id int
|
||
|
Name string
|
||
|
CleanName string
|
||
|
Urls map[int]string
|
||
|
|
||
|
Error *string
|
||
|
Trace *string
|
||
|
}
|
||
|
|
||
|
type News struct {
|
||
|
Id int
|
||
|
Source *Source
|
||
|
Sport *Sport
|
||
|
LeagueId int
|
||
|
TeamId int
|
||
|
|
||
|
Title string
|
||
|
CleanTitle string
|
||
|
PubDate *time.Time
|
||
|
Description string
|
||
|
Link string
|
||
|
Image string
|
||
|
|
||
|
Teaser string
|
||
|
Author string
|
||
|
Content []string
|
||
|
Redirect string
|
||
|
|
||
|
Haystack string
|
||
|
Tags []string
|
||
|
CleanTags []string
|
||
|
|
||
|
Error string
|
||
|
Trace string
|
||
|
}
|
||
|
|
||
|
func (n *News) Feed() error {
|
||
|
doc, err := requests.GetDocumentFromURL(n.Link)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
switch n.Source.Name {
|
||
|
case "Eurosport":
|
||
|
n.Teaser = strings.TrimSpace(doc.Find("h2").Text())
|
||
|
doc.Find(".article-body .article-s4-rs p").Each(func(i int, s *goquery.Selection) {
|
||
|
n.Content = append(n.Content, s.Text())
|
||
|
})
|
||
|
n.Author = strings.TrimSpace(doc.Find(".flex a.caption-s5-fx div.font-bold").Text())
|
||
|
doc.Find(".related-topics .atom-tag").Each(func(i int, s *goquery.Selection) {
|
||
|
tag := strings.TrimSpace(s.Text())
|
||
|
cleanTag := utils.Sanitize(tag)
|
||
|
if !utils.ArrayContains(n.CleanTags, cleanTag) {
|
||
|
n.Tags = append(n.Tags, tag)
|
||
|
n.CleanTags = append(n.CleanTags, cleanTag)
|
||
|
}
|
||
|
})
|
||
|
case "L'équipe":
|
||
|
n.Teaser = strings.TrimSpace(doc.Find("h2.Article__chapo").Text())
|
||
|
doc.Find(".Paragraph__content").Each(func(i int, s *goquery.Selection) {
|
||
|
n.Content = append(n.Content, s.Text())
|
||
|
})
|
||
|
n.Author = strings.TrimSpace(doc.Find(".Author__name").Text())
|
||
|
doc.Find(".RelatedLinks a.RelatedLinks__link").Each(func(i int, s *goquery.Selection) {
|
||
|
tag := strings.TrimSpace(s.Text())
|
||
|
cleanTag := utils.Sanitize(tag)
|
||
|
if !utils.ArrayContains(n.CleanTags, cleanTag) {
|
||
|
n.Tags = append(n.Tags, tag)
|
||
|
n.CleanTags = append(n.CleanTags, cleanTag)
|
||
|
}
|
||
|
})
|
||
|
case "FFTT":
|
||
|
n.Teaser = strings.TrimSpace(doc.Find(".news-description p").First().Text())
|
||
|
doc.Find(".news-description p").Each(func(i int, s *goquery.Selection) {
|
||
|
if i > 0 {
|
||
|
n.Content = append(n.Content, s.Text())
|
||
|
}
|
||
|
})
|
||
|
doc.Find(".social-shares-large-wrapper a.link").Each(func(i int, s *goquery.Selection) {
|
||
|
tag := strings.TrimSpace(s.Text())
|
||
|
cleanTag := utils.Sanitize(tag)
|
||
|
if !utils.ArrayContains(n.CleanTags, cleanTag) {
|
||
|
n.Tags = append(n.Tags, tag)
|
||
|
n.CleanTags = append(n.CleanTags, cleanTag)
|
||
|
}
|
||
|
})
|
||
|
case "Foot Mercato":
|
||
|
n.Teaser = strings.TrimSpace(doc.Find("h2.article__lead").Text())
|
||
|
doc.Find(".article__content p").Each(func(i int, s *goquery.Selection) {
|
||
|
n.Content = append(n.Content, s.Text())
|
||
|
})
|
||
|
n.Author = strings.TrimSpace(doc.Find(".article__author a").Text())
|
||
|
default:
|
||
|
return fmt.Errorf("unknown source %s", n.Source.Name)
|
||
|
}
|
||
|
if len(n.Content) == 0 {
|
||
|
n.Redirect = n.Link
|
||
|
}
|
||
|
if len(n.CleanTags) == 0 {
|
||
|
n.Tags = append(n.Tags, n.Sport.Name)
|
||
|
n.CleanTags = append(n.CleanTags, n.Sport.CleanName)
|
||
|
}
|
||
|
n.Haystack = fmt.Sprintf("%s-%s", n.CleanTitle, strings.Join(n.CleanTags, "-"))
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
func (s *Source) ListNews(sport *Sport, url string) ([]*News, error) {
|
||
|
var newsList []*News
|
||
|
|
||
|
fp := gofeed.NewParser()
|
||
|
feed, err := fp.ParseURL(url)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
for _, item := range feed.Items {
|
||
|
n := &News{
|
||
|
Source: s,
|
||
|
Sport: sport,
|
||
|
Title: item.Title,
|
||
|
Description: regexp.MustCompile(`<[^>]*>`).ReplaceAllLiteralString(item.Description, ""),
|
||
|
CleanTitle: utils.Sanitize(item.Title),
|
||
|
PubDate: item.PublishedParsed,
|
||
|
Link: item.Link,
|
||
|
}
|
||
|
for _, tags := range item.Categories {
|
||
|
for _, tag := range strings.Split(tags, ",") {
|
||
|
n.Tags = append(n.Tags, strings.TrimSpace(tag))
|
||
|
n.CleanTags = append(n.CleanTags, utils.Sanitize(strings.TrimSpace(tag)))
|
||
|
}
|
||
|
}
|
||
|
if item.Image != nil {
|
||
|
n.Image = item.Image.URL
|
||
|
} else if len(item.Enclosures) > 0 {
|
||
|
n.Image = item.Enclosures[0].URL
|
||
|
} else if s.Name == "Eurosport" {
|
||
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(item.Description))
|
||
|
if err == nil {
|
||
|
if src, ok := doc.Find("img").Attr("src"); ok {
|
||
|
n.Image = src
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if item.Author != nil {
|
||
|
n.Author = item.Author.Name
|
||
|
}
|
||
|
newsList = append(newsList, n)
|
||
|
}
|
||
|
return newsList, nil
|
||
|
}
|