scraper/requests/requests.go

77 lines
1.6 KiB
Go

package requests
import (
"fmt"
"log"
"net/http"
"net/url"
"github.com/PuerkitoBio/goquery"
"github.com/go-redis/redis"
"golang.org/x/net/proxy"
)
const (
redisAddr = "127.0.0.1:6379"
torAddr = "socks5://127.0.0.1:9050"
defaultAgent = "Mozilla/5.0 (X11; Linux x86_64…) Gecko/20100101 Firefox/68.0"
)
var cli *http.Client
var red *redis.Client
func init() {
proxyUrl, err := url.Parse(torAddr)
if err != nil {
log.Fatalf("error while parsing torAddr %s : %s", torAddr, err)
}
dialer, err := proxy.FromURL(proxyUrl, proxy.Direct)
if err != nil {
log.Fatalf("error while creating dialer : %s", err)
}
transport := &http.Transport{
Dial: dialer.Dial,
}
cli = &http.Client{
Transport: transport,
}
red = redis.NewClient(&redis.Options{
Addr: redisAddr,
})
if pong := red.Ping().Val(); pong != "PONG" {
log.Fatalf("unexpected response from redis PING conmmand : %s", pong)
}
}
func GetDocumentFromURL(url string) (*goquery.Document, error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("error while building request: %s", err)
}
agent := red.SRandMember("agents").Val()
if agent == "" {
agent = defaultAgent
}
req.Header.Set("User-Agent", agent)
resp, err := cli.Do(req)
if err != nil {
return nil, fmt.Errorf("error while sending request: %s", err)
}
defer func() {
if err := resp.Body.Close(); err != nil {
log.Fatalf("error while closing body for %s : %s", url, err)
}
}()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("error while parsing response: %s", err)
}
return doc, nil
}