Skip to content
This repository has been archived by the owner on May 11, 2022. It is now read-only.

Commit

Permalink
fourth iteration of refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
kamilsk committed Apr 26, 2018
1 parent 202aae8 commit ef9e27c
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 66 deletions.
4 changes: 2 additions & 2 deletions cmd/urls.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ var urlsCmd = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
availability.
NewPrinter(
availability.Output(cmd.OutOrStdout()),
availability.OutputForPrinting(cmd.OutOrStdout()),
).
For(
availability.NewReport(
availability.ClientProvider(availability.Colly),
availability.CrawlerForSites(availability.CrawlerColly("curl/7.54.0")),
).
For(args).
Fill(),
Expand Down
113 changes: 76 additions & 37 deletions http/availability/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,96 @@ import (

"github.com/gocolly/colly"
"github.com/gocolly/colly/debug"
"github.com/pkg/errors"
)

// ~
const (
location = "Location"
)

type Crawler interface {
Visit(url string, bus EventBus) error
}

type CrawlerFunc func(string, EventBus) error

var Colly FunctionalBinder = func(s *Site) Client {
c := colly.NewCollector(
UserAgent(), NoRedirect(), colly.IgnoreRobotsTxt(),
func (fn CrawlerFunc) Visit(url string, bus EventBus) error { return fn(url, bus) }

TempOption(s),
)
return c
func CrawlerColly(userAgent string) Crawler {
return CrawlerFunc(func(entry string, bus EventBus) error {
base, err := url.Parse(entry)
if err != nil {
return errors.Wrapf(err, "parse entry point URL %q", entry)
}
return colly.NewCollector(
colly.UserAgent(userAgent), colly.IgnoreRobotsTxt(), NoRedirect(),
OnError(bus), OnResponse(bus), OnHTML(base, bus),
).Visit(entry)
})
}

type FunctionalBinder func(*Site) Client
func OnError(bus EventBus) func(*colly.Collector) {
return func(c *colly.Collector) {
c.OnError(func(resp *colly.Response, err error) {
var redirect string
if err == http.ErrUseLastResponse {
redirect = resp.Headers.Get(location)
}
bus <- ErrorEvent{
StatusCode: resp.StatusCode,
Location: resp.Request.URL.String(),
Redirect: redirect,
Error: err,
}
})
}
}

func (fn FunctionalBinder) Bind(s *Site) Client { return fn(s) }
func OnResponse(bus EventBus) func(*colly.Collector) {
return func(c *colly.Collector) {
c.OnResponse(func(resp *colly.Response) {
bus <- ResponseEvent{
StatusCode: resp.StatusCode,
Location: resp.Request.URL.String(),
}
})
}
}

// ~
func OnHTML(base *url.URL, bus EventBus) func(*colly.Collector) {
isPage := func(current *url.URL) bool {
return current.Hostname() == base.Hostname()
}
return func(c *colly.Collector) {
c.OnHTML("a[href]", func(el *colly.HTMLElement) {
if isPage(el.Request.URL) {
href := el.Request.AbsoluteURL(el.Attr("href"))
bus <- WalkEvent{
Page: el.Request.URL.String(),
Href: href,
}
el.Request.Visit(href)
}
})
}
}

const (
location = "Location"
)
// ~

var redirects = map[int]struct{}{
http.StatusMovedPermanently: {},
http.StatusFound: {},
http.StatusTemporaryRedirect: {},
http.StatusPermanentRedirect: {},
type Debugger interface {
debug.Debugger
}

func UserAgent() func(*colly.Collector) {
return colly.UserAgent("check")
type Option func(*Site)

func WithDebugger() Option {
return func(*Site) {
//
}
}

// ~

func NoRedirect() func(*colly.Collector) {
return func(c *colly.Collector) {
c.RedirectHandler = func(req *http.Request, via []*http.Request) error {
Expand Down Expand Up @@ -165,21 +221,4 @@ func (r *Site) setStatus(resp *colly.Response) {

}
link.StatusCode = resp.StatusCode
if _, is := redirects[link.StatusCode]; is {
link.Redirect = resp.Headers.Get(location)
}
}

// ~

type Debugger interface {
debug.Debugger
}

type Option func(*Site)

func WithDebugger() Option {
return func(*Site) {
//
}
}
27 changes: 15 additions & 12 deletions http/availability/printer.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,6 @@ var colors = map[string]*color.Color{
danger: color.New(color.FgRed, color.Bold),
}

func Output(output io.Writer) func(*Printer) {
return func(p *Printer) {
p.output = output
}
}

func NewPrinter(options ...func(*Printer)) *Printer {
p := &Printer{}
for _, f := range options {
Expand All @@ -40,6 +34,12 @@ func NewPrinter(options ...func(*Printer)) *Printer {
return p
}

func OutputForPrinting(output io.Writer) func(*Printer) {
return func(p *Printer) {
p.output = output
}
}

type Printer struct {
output io.Writer
report *Report
Expand All @@ -52,10 +52,13 @@ func (p *Printer) For(report *Report) *Printer {

func (p *Printer) Print() {
w := p.outOrStdout()

if p.report == nil {
critical().Fprintf(w, "nothing to print")
return
}
for _, site := range p.report.Sites() {
if err := site.Error(); err != nil {
important().Fprintf(w, "report %q has error %q\n", site.Name(), err)
critical().Fprintf(w, "report %q has error %q\n", site.Name(), err)
continue
}
sort.Sort(pagesByLocation(site.Pages))
Expand Down Expand Up @@ -92,20 +95,20 @@ func colorize(statusCode int) typewriter {
tw, _ = colors[danger]
}
if tw == nil {
tw = defaultTypewriter(fmt.Fprintf)
tw = typewriterFunc(fmt.Fprintf)
}
return tw
}

func important() typewriter { return colorize(999) }
func critical() typewriter { return colorize(999) }

type typewriter interface {
Fprintf(io.Writer, string, ...interface{}) (int, error)
}

type defaultTypewriter func(io.Writer, string, ...interface{}) (int, error)
type typewriterFunc func(io.Writer, string, ...interface{}) (int, error)

func (fn defaultTypewriter) Fprintf(w io.Writer, format string, a ...interface{}) (int, error) {
func (fn typewriterFunc) Fprintf(w io.Writer, format string, a ...interface{}) (int, error) {
return fn(w, format, a...)
}

Expand Down
1 change: 1 addition & 0 deletions http/availability/printer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package availability_test
65 changes: 50 additions & 15 deletions http/availability/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"net/url"
"sync"

"fmt"

"github.com/pkg/errors"
)

Expand All @@ -15,9 +17,15 @@ func NewReport(options ...func(*Report)) *Report {
return r
}

func CrawlerForSites(crawler Crawler) func(*Report) {
return func(r *Report) {
r.crawler = crawler
}
}

type Report struct {
binder Binder
sites []*Site
crawler Crawler
sites []*Site
}

func (r *Report) For(rawURLs []string) *Report {
Expand All @@ -35,7 +43,7 @@ func (r *Report) Fill() *Report {
go func(site *Site) {
defer wg.Done()
<-start
site.Fetch(r.binder)
site.Fetch(r.crawler)
}(site)
}
close(start)
Expand Down Expand Up @@ -70,8 +78,7 @@ func NewSite(rawURL string) *Site {
error: errors.Wrapf(err, "parse rawURL %q for report", rawURL),

Pages: make([]*Page, 0, 8),

mu: &sync.RWMutex{}, journal: make(map[string]*Link),
mu: &sync.RWMutex{}, journal: make(map[string]*Link),
}
}

Expand All @@ -84,18 +91,33 @@ func hostOrRawURL(u *url.URL, raw string) string {

// ~

func ClientProvider(b Binder) func(*Report) {
return func(r *Report) {
r.binder = b
}
type event interface {
family()
}

type Binder interface {
Bind(*Site) Client
type EventBus chan event

type ErrorEvent struct {
event

StatusCode int
Location string
Redirect string
Error error
}

type Client interface {
Visit(URL string) error
type ResponseEvent struct {
event

StatusCode int
Location string
}

type WalkEvent struct {
event

Page string
Href string
}

// ~
Expand All @@ -116,11 +138,24 @@ func (r *Site) Name() string { return r.name }

func (r *Site) Error() error { return r.error }

func (r *Site) Fetch(binder Binder) error {
func (r *Site) Fetch(crawler Crawler) error {
if r.error != nil {
return r.error
}
return binder.Bind(r).Visit(r.url.String())
wg, bus := &sync.WaitGroup{}, make(EventBus, 1024)
wg.Add(1)
go func() {
defer wg.Done()
for e := range bus {
fmt.Printf("%#+v ~\n", e)
}
}()
if err := crawler.Visit(r.url.String(), bus); err != nil {
return err
}
close(bus)
wg.Wait()
return nil
}

type Page struct {
Expand Down

0 comments on commit ef9e27c

Please sign in to comment.