scraper/requests/requests.go

63 lines
1.3 KiB
Go
Raw Normal View History

2020-10-05 08:24:33 +00:00
package requests
import (
"fmt"
"github.com/PuerkitoBio/goquery"
2020-10-05 08:24:33 +00:00
"log"
"net/http"
"net/url"
"golang.org/x/net/proxy"
)
const (
torAddr = "socks5://127.0.0.1:9050"
userAgent = "Mozilla/5.0 (X11; Linux x86_64…) Gecko/20100101 Firefox/68.0"
)
var cli *http.Client
func init() {
proxyUrl, err := url.Parse(torAddr)
if err != nil {
log.Fatalf("error while parsing torAddr %s : %s", torAddr, err)
}
dialer, err := proxy.FromURL(proxyUrl, proxy.Direct)
if err != nil {
log.Fatalf("error while creating dialer : %s", err)
}
transport := &http.Transport{
Dial: dialer.Dial,
}
cli = &http.Client{
Transport: transport,
}
}
func GetDocumentFromURL(url string) (*goquery.Document, error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("error while building request: %s", err)
}
req.Header.Set("User-Agent", userAgent)
resp, err := cli.Do(req)
if err != nil {
return nil, fmt.Errorf("error while sending request: %s", err)
}
defer func() {
if err := resp.Body.Close(); err != nil {
log.Fatalf("error while closing body for %s : %s", url, err)
}
}()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("error while parsing response: %s", err)
}
return doc, nil
}