Some improvments in postgres client with match and team objects + add some utils functions

This commit is contained in:
Samuel Campos 2020-12-02 17:58:49 +01:00
parent f07ed95702
commit 748bbb8048
8 changed files with 125 additions and 28 deletions

2
.gitignore vendored
View File

@ -1,2 +1,2 @@
.idea
*.env

View File

@ -116,10 +116,10 @@ func updateSchedule() {
break
}
for _, m := range matches {
if err := postgres.InsertTeamBySourceName(m.TeamHome, s.League); err != nil {
if err := postgres.InsertTeamBySourceName(m.TeamHome); err != nil {
log.Fatalf("error while saving team home : %s", err)
}
if err := postgres.InsertTeamBySourceName(m.TeamAway, s.League); err != nil {
if err := postgres.InsertTeamBySourceName(m.TeamAway); err != nil {
log.Fatalf("error while saving team away : %s", err)
}
if err := postgres.InsertMatch(m); err != nil {

View File

@ -17,7 +17,11 @@ import (
const (
_ = iota
GenderMale = iota
//GenderFemale = iota
)
const (
_ = iota
StatusComing = iota
)
type Sport struct {
@ -37,6 +41,12 @@ type Player struct {
Gender int
}
type TeamImages struct {
H30 string
H50 string
H80 string
}
type Team struct {
Id int
Sport *Sport
@ -51,7 +61,7 @@ type Team struct {
Names *map[string]string
PlayersUrl *string
Images *interface{}
Images *TeamImages
Tags *[]string
CleanTags *[]string
@ -61,6 +71,19 @@ type Team struct {
Trace *string
}
func NewTeam(gender *int, sport *Sport, country *Country) *Team {
return &Team{
Gender: gender,
Images: &TeamImages{
H30: "t0-h30.svg",
H50: "t0-h50.svg",
H80: "t0-h80.svg",
},
Sport: sport,
Country: country,
}
}
type Match struct {
Id int
League *League
@ -122,6 +145,7 @@ type source struct {
round string
currentDate *time.Time
currentTimezone string
}
func newSource(league *League, scheme string, host string, path string, query string, round string, matchDay int) *source {
@ -156,8 +180,11 @@ func (s *source) GetMatches() ([]*Match, error) {
return
}
s.currentDate = &curDate
s.currentTimezone = utils.FrenchTimezone(curDate)
} else {
match := NewMatch(s.League, &Team{Gender: s.League.Gender}, &Team{Gender: s.League.Gender}, &Player{}, &Player{}, s.round, s.matchDay, 0)
teamHome := NewTeam(s.League.Gender, s.League.Sport, s.League.Country)
teamAway := NewTeam(s.League.Gender, s.League.Sport, s.League.Country)
match := NewMatch(s.League, teamHome, teamAway, &Player{}, &Player{}, s.round, s.matchDay, 0)
startTime := strings.TrimSpace(row.Find("td.lm1").Text())
if startTime == "-- : --" {
@ -165,7 +192,7 @@ func (s *source) GetMatches() ([]*Match, error) {
}
startDate, err := time.Parse(
"2006-01-02 15:04 MST",
s.currentDate.Format("2006-01-02 ") + startTime + " CEST",
s.currentDate.Format("2006-01-02 ") + startTime + " " + s.currentTimezone,
)
if err != nil {
match.Error = utils.StringPointer("parse date error")
@ -175,9 +202,9 @@ func (s *source) GetMatches() ([]*Match, error) {
}
match.StartDate = &startDate
homeNames := map[string]string{utils.HostMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq1").Text()), "*")}
homeNames := map[string]string{utils.KeywordMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq1").Text()), "*")}
match.TeamHome.Names = &homeNames
awayNames := map[string]string{utils.HostMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq2").Text()), "*")}
awayNames := map[string]string{utils.KeywordMatchendirect: strings.TrimSuffix(strings.TrimSpace(row.Find(".lm3_eq2").Text()), "*")}
match.TeamAway.Names = &awayNames
basePath, ok := row.Find(".lm3 a").First().Attr("href")

View File

@ -3,6 +3,7 @@ package postgres
import (
"crypto/sha256"
"database/sql"
"encoding/json"
"fmt"
"log"
"math"
@ -229,7 +230,7 @@ func DeleteNews(n *news.News) (int64, error) {
return res.RowsAffected()
}
func InsertTeamBySourceName(t *match.Team, l *match.League) error {
func InsertTeamBySourceName(t *match.Team) error {
var host, name string
if t.Names == nil {
@ -241,21 +242,35 @@ func InsertTeamBySourceName(t *match.Team, l *match.League) error {
shortName = string(runeName[:int(math.Min(3, float64(len(runeName))))])
break
}
err := pg.psqlConn.QueryRow("SELECT id, name FROM mainapp_team WHERE names->>$1 = $2", host, name).Scan(&t.Id, &t.Name)
// First try to get existing team if any from database
err := pg.psqlConn.QueryRow(`
SELECT id, name
FROM mainapp_team
WHERE names->>$1 = $2 AND sport_id = $3 AND gender = $4
ORDER BY id ASC
LIMIT 1
`, utils.Sanitize(host), name, t.Sport.Id, t.Gender).Scan(&t.Id, &t.Name)
// Else create team in database
if err != nil {
cleanName := utils.Sanitize(name)
jsonHost := fmt.Sprintf("{\"%s\"}", utils.Sanitize(host))
jsonImages, err := json.Marshal(t.Images)
if err != nil {
return err
}
return pg.psqlConn.QueryRow(`
INSERT INTO mainapp_team
(sport_id, country_id, name, clean_name, short_name, long_name, gender,
(sport_id, country_id, name, clean_name, short_name, long_name, gender, images,
names, tags, clean_tags, news_count)
VALUES
($1, $2, $3, $4, $5, $6, $7,
jsonb_set('{}', $8, to_jsonb($9::text), true), $10, $11, 0)
($1, $2, $3, $4, $5, $6, $7, $8,
jsonb_set('{}', $9, to_jsonb($10::text), true), $11, $12, 0)
ON CONFLICT ON CONSTRAINT custom_unique_team DO UPDATE SET
names = jsonb_set(mainapp_team.names, $12, to_jsonb($13::text), true)
names = jsonb_set(mainapp_team.names, $13, to_jsonb($14::text), true)
RETURNING id, name
`, l.Sport.Id, l.Country.Id, name, cleanName, shortName, name, iValue(l.Gender),
`, t.Sport.Id, t.Country.Id, name, cleanName, shortName, name, iValue(t.Gender), jsonImages,
jsonHost, name, pq.Array([]string{name}), pq.Array([]string{cleanName}),
jsonHost, name).Scan(&t.Id, &t.Name)
}
@ -282,21 +297,22 @@ func InsertMatch(m *match.Match) error {
arr = append(arr, foreignId(m.PlayerAway.Id))
arr = append(arr, sValue(m.Round))
hash := sha256.New()
hash.Write([]byte(concatWS(arr, "/")))
hash.Write([]byte(concatWS(arr, "|")))
sign := fmt.Sprintf("%x", hash.Sum(nil))
return pg.psqlConn.QueryRow(`
INSERT INTO mainapp_match
(league_id, team_home_id, team_away_id, player_home_id, player_away_id, mday, round, leg, sign,
mday_id, base_url, start_date, error, trace)
(league_id, team_home_id, team_away_id, player_home_id,
player_away_id, mday, round, leg, sign, mday_id,
status, base_url, start_date, error, trace)
VALUES
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
ON CONFLICT ON CONSTRAINT mainapp_match_sign_key DO UPDATE SET
base_url = $11, start_date = $12, error = $13, trace = $14
base_url = $12, start_date = $13, error = $14, trace = $15
RETURNING id
`, m.League.Id, foreignId(m.TeamHome.Id), foreignId(m.TeamAway.Id), foreignId(m.PlayerHome.Id),
foreignId(m.PlayerAway.Id), iValue(m.MatchDay), sValue(m.Round), iValue(m.Leg), sign,
iValue(m.MatchDayId), sValue(m.BaseUrl), m.StartDate, sValue(m.Error), sValue(m.Trace)).Scan(&m.Id)
foreignId(m.PlayerAway.Id), iValue(m.MatchDay), sValue(m.Round), iValue(m.Leg), sign, iValue(m.MatchDayId),
match.StatusComing, sValue(m.BaseUrl), m.StartDate, sValue(m.Error), sValue(m.Trace)).Scan(&m.Id)
}
func DeleteMatch(m *match.Match) (int64, error) {

View File

@ -99,15 +99,16 @@ func TestDeleteNews(t *testing.T) {
func TestInsertTeamBySourceName(t *testing.T) {
teamNames := map[string]string{utils.HostMatchendirect: "Toto"}
league := &match.League{
lg := &match.League{
Id: 1,
Sport: &match.Sport{Id: 1},
Country: &match.Country{Id: 1},
Gender: utils.IntPointer(match.GenderMale),
}
tm = &match.Team{Names: &teamNames}
for _, _ = range []int{0, 1} {
if err := InsertTeamBySourceName(tm, league); err != nil {
tm = match.NewTeam(utils.IntPointer(match.GenderMale), lg.Sport, lg.Country)
tm.Names = &teamNames
for range []int{0, 1} {
if err := InsertTeamBySourceName(tm); err != nil {
t.Errorf("unexpected error : %s", err)
}
if tm.Id == 0 {
@ -121,7 +122,7 @@ func TestInsertMatch(t *testing.T) {
mh = match.NewMatch(&match.League{Id: 1}, tm, tm, &match.Player{}, &match.Player{}, "", 0, 0)
mh.StartDate = &startDate
mh.BaseUrl = utils.StringPointer("https://test.com/toto")
for _, _ = range []int{0, 1} {
for range []int{0, 1} {
if err := InsertMatch(mh); err != nil {
t.Error(err)
}

BIN
scraper

Binary file not shown.

View File

@ -4,6 +4,7 @@ import (
"log"
"strconv"
"strings"
"time"
)
const (
@ -13,6 +14,13 @@ const (
HostFFTT = "www.fftt.com"
HostFootmercato = "www.footmercato.net"
HostLequipe = "www.lequipe.fr"
KeywordMatchendirect = "matchendirect"
//KeywordEurosport = "eurosport"
//KeywordRugbyrama = "rugbyrama"
//KeywordFFTT = "fftt"
//KeywordFootmercato = "footmercato"
//KeywordLequipe = "lequipe"
)
func Sanitize(s string) (t string) {
@ -133,3 +141,20 @@ func ArrayPointerJoin(a *[]string, sep string) string {
}
return strings.Join(*a, sep)
}
func FrenchTimezone(t time.Time) string {
lastMarchSunday, _ := time.Parse("02 January 2006", "31 March " + strconv.FormatInt(int64(t.Year()), 10))
for int(lastMarchSunday.Weekday()) > 0 {
lastMarchSunday = lastMarchSunday.Add(-24 * time.Hour)
}
lastOctoberSunday, _ := time.Parse("02 January 2006", "31 October " + strconv.FormatInt(int64(t.Year()), 10))
for int(lastOctoberSunday.Weekday()) > 0 {
lastOctoberSunday = lastOctoberSunday.Add(-24 * time.Hour)
}
if (t.After(lastMarchSunday) || t.Equal(lastMarchSunday)) && t.Before(lastOctoberSunday) {
return "CEST"
}
return "CET"
}

View File

@ -2,6 +2,7 @@ package utils
import (
"testing"
"time"
)
func TestSanitize(t *testing.T) {
@ -88,3 +89,30 @@ func TestArrayPointerJoin(t *testing.T) {
t.Errorf("unexpected join result : %s", s)
}
}
func TestFrenchTimezone(t *testing.T) {
t1, _ := time.Parse("02 January 2006", "01 January 2020")
if tz1 := FrenchTimezone(t1); tz1 != "CET" {
t.Errorf("unexpected tz %s for date %s", tz1, t1)
}
t2, _ := time.Parse("02 January 2006", "28 March 2020")
if tz2 := FrenchTimezone(t2); tz2 != "CET" {
t.Errorf("unexpected tz %s for date %s", tz2, t2)
}
t3, _ := time.Parse("02 January 2006", "29 March 2020")
if tz3 := FrenchTimezone(t3); tz3 != "CEST" {
t.Errorf("unexpected tz %s for date %s", tz3, t3)
}
t4, _ := time.Parse("02 January 2006", "24 October 2020")
if tz4 := FrenchTimezone(t4); tz4 != "CEST" {
t.Errorf("unexpected tz %s for date %s", tz4, t4)
}
t5, _ := time.Parse("02 January 2006", "25 October 2020")
if tz5 := FrenchTimezone(t5); tz5 != "CET" {
t.Errorf("unexpected tz %s for date %s", tz5, t5)
}
t6, _ := time.Parse("02 January 2006", "31 December 2020")
if tz6 := FrenchTimezone(t6); tz6 != "CET" {
t.Errorf("unexpected tz %s for date %s", tz6, t6)
}
}