all: reformat
This commit is contained in:
@@ -25,12 +25,11 @@ var Visited = struct {
|
|||||||
visited map[string]bool
|
visited map[string]bool
|
||||||
}{visited: make(map[string]bool)}
|
}{visited: make(map[string]bool)}
|
||||||
|
|
||||||
|
|
||||||
// visitNode inspects the current node and, if it contains a link we haven’t
|
// visitNode inspects the current node and, if it contains a link we haven’t
|
||||||
// visited it yet, will spawn a goroutine for it. It will also return that link,
|
// visited it yet, will spawn a goroutine for it. It will also return that link,
|
||||||
// because we have to add it to our list of linked nodes.
|
// because we have to add it to our list of linked nodes.
|
||||||
func visitNode(node *html.Node, parent, current string, wg *sync.WaitGroup) (*string, error) {
|
func visitNode(node *html.Node, parent, current string, wg *sync.WaitGroup) (*string, error) {
|
||||||
var val *string
|
var val *string
|
||||||
if node.Type == html.ElementNode && node.Data == "a" {
|
if node.Type == html.ElementNode && node.Data == "a" {
|
||||||
for _, a := range node.Attr {
|
for _, a := range node.Attr {
|
||||||
if a.Key != "href" {
|
if a.Key != "href" {
|
||||||
@@ -56,17 +55,17 @@ func visitNode(node *html.Node, parent, current string, wg *sync.WaitGroup) (*st
|
|||||||
|
|
||||||
Visited.RLock()
|
Visited.RLock()
|
||||||
if !Visited.visited[val] {
|
if !Visited.visited[val] {
|
||||||
Visited.RUnlock()
|
Visited.RUnlock()
|
||||||
Visited.Lock()
|
Visited.Lock()
|
||||||
Visited.visited[val] = true
|
Visited.visited[val] = true
|
||||||
Visited.Unlock()
|
Visited.Unlock()
|
||||||
go doCrawl(val, parent, wg)
|
go doCrawl(val, parent, wg)
|
||||||
} else {
|
} else {
|
||||||
Visited.RUnlock()
|
Visited.RUnlock()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return val, nil
|
return val, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseNode parses a single node. It is recursive, and will first be called
|
// parseNode parses a single node. It is recursive, and will first be called
|
||||||
@@ -74,15 +73,15 @@ func visitNode(node *html.Node, parent, current string, wg *sync.WaitGroup) (*st
|
|||||||
// but we don’t know the amount of links we will encounter yet.
|
// but we don’t know the amount of links we will encounter yet.
|
||||||
func parseNode(node *html.Node, parent, current string, wg *sync.WaitGroup) ([]string, error) {
|
func parseNode(node *html.Node, parent, current string, wg *sync.WaitGroup) ([]string, error) {
|
||||||
links := []string{}
|
links := []string{}
|
||||||
val, err := visitNode(node, parent, current, wg)
|
val, err := visitNode(node, parent, current, wg)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if val != nil {
|
if val != nil {
|
||||||
links = append(links, val)
|
links = append(links, val)
|
||||||
}
|
}
|
||||||
|
|
||||||
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
||||||
newLinks, err := parseNode(c, parent, current, wg)
|
newLinks, err := parseNode(c, parent, current, wg)
|
||||||
@@ -109,8 +108,8 @@ func parseRequest(body io.ReadCloser, parent, url string, wg *sync.WaitGroup) {
|
|||||||
|
|
||||||
links, err := parseNode(document, parent, url, wg)
|
links, err := parseNode(document, parent, url, wg)
|
||||||
|
|
||||||
// this can look weird with concurrent printing, but oh well. I’m not sure
|
// this can look weird with concurrent printing, but oh well. I’m not sure
|
||||||
// it’s worth it to make this linear for now.
|
// it’s worth it to make this linear for now.
|
||||||
pretty.Print(url, links)
|
pretty.Print(url, links)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -119,9 +118,10 @@ func parseRequest(body io.ReadCloser, parent, url string, wg *sync.WaitGroup) {
|
|||||||
// don’t exit prematurely, since this is all concurrent.
|
// don’t exit prematurely, since this is all concurrent.
|
||||||
func doCrawl(toVisit string, parent string, wg *sync.WaitGroup) {
|
func doCrawl(toVisit string, parent string, wg *sync.WaitGroup) {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
resp, err := http.Get(toVisit)
|
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
|
resp, err := http.Get(toVisit)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
return
|
return
|
||||||
|
Reference in New Issue
Block a user