all: reformat

This commit is contained in:
2018-05-29 12:32:24 +02:00
parent 546188fb7d
commit af61b5fd75

View File

@@ -25,12 +25,11 @@ var Visited = struct {
visited map[string]bool visited map[string]bool
}{visited: make(map[string]bool)} }{visited: make(map[string]bool)}
// visitNode inspects the current node and, if it contains a link we havent // visitNode inspects the current node and, if it contains a link we havent
// visited it yet, will spawn a goroutine for it. It will also return that link, // visited it yet, will spawn a goroutine for it. It will also return that link,
// because we have to add it to our list of linked nodes. // because we have to add it to our list of linked nodes.
func visitNode(node *html.Node, parent, current string, wg *sync.WaitGroup) (*string, error) { func visitNode(node *html.Node, parent, current string, wg *sync.WaitGroup) (*string, error) {
var val *string var val *string
if node.Type == html.ElementNode && node.Data == "a" { if node.Type == html.ElementNode && node.Data == "a" {
for _, a := range node.Attr { for _, a := range node.Attr {
if a.Key != "href" { if a.Key != "href" {
@@ -56,17 +55,17 @@ func visitNode(node *html.Node, parent, current string, wg *sync.WaitGroup) (*st
Visited.RLock() Visited.RLock()
if !Visited.visited[val] { if !Visited.visited[val] {
Visited.RUnlock() Visited.RUnlock()
Visited.Lock() Visited.Lock()
Visited.visited[val] = true Visited.visited[val] = true
Visited.Unlock() Visited.Unlock()
go doCrawl(val, parent, wg) go doCrawl(val, parent, wg)
} else { } else {
Visited.RUnlock() Visited.RUnlock()
} }
} }
} }
return val, nil return val, nil
} }
// parseNode parses a single node. It is recursive, and will first be called // parseNode parses a single node. It is recursive, and will first be called
@@ -74,15 +73,15 @@ func visitNode(node *html.Node, parent, current string, wg *sync.WaitGroup) (*st
// but we dont know the amount of links we will encounter yet. // but we dont know the amount of links we will encounter yet.
func parseNode(node *html.Node, parent, current string, wg *sync.WaitGroup) ([]string, error) { func parseNode(node *html.Node, parent, current string, wg *sync.WaitGroup) ([]string, error) {
links := []string{} links := []string{}
val, err := visitNode(node, parent, current, wg) val, err := visitNode(node, parent, current, wg)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if val != nil { if val != nil {
links = append(links, val) links = append(links, val)
} }
for c := node.FirstChild; c != nil; c = c.NextSibling { for c := node.FirstChild; c != nil; c = c.NextSibling {
newLinks, err := parseNode(c, parent, current, wg) newLinks, err := parseNode(c, parent, current, wg)
@@ -109,8 +108,8 @@ func parseRequest(body io.ReadCloser, parent, url string, wg *sync.WaitGroup) {
links, err := parseNode(document, parent, url, wg) links, err := parseNode(document, parent, url, wg)
// this can look weird with concurrent printing, but oh well. Im not sure // this can look weird with concurrent printing, but oh well. Im not sure
// its worth it to make this linear for now. // its worth it to make this linear for now.
pretty.Print(url, links) pretty.Print(url, links)
} }
@@ -119,9 +118,10 @@ func parseRequest(body io.ReadCloser, parent, url string, wg *sync.WaitGroup) {
// dont exit prematurely, since this is all concurrent. // dont exit prematurely, since this is all concurrent.
func doCrawl(toVisit string, parent string, wg *sync.WaitGroup) { func doCrawl(toVisit string, parent string, wg *sync.WaitGroup) {
wg.Add(1) wg.Add(1)
resp, err := http.Get(toVisit)
defer wg.Done() defer wg.Done()
resp, err := http.Get(toVisit)
if err != nil { if err != nil {
log.Println(err) log.Println(err)
return return