Skip to content

Instantly share code, notes, and snippets.

@gdiasag
Created October 18, 2024 03:23
Show Gist options
  • Select an option

  • Save gdiasag/0f67035262810ac0f617335f5b81e9de to your computer and use it in GitHub Desktop.

Select an option

Save gdiasag/0f67035262810ac0f617335f5b81e9de to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"io"
"log"
"net/http"
"regexp"
"sync"
)
var urlRx = regexp.MustCompile("((((https?|ftps?|gopher|telnet|nntp)://)|(mailto:|news:))([-%()_.!~*';/?:@&=+$,A-Za-z0-9])+)")
func crawl(ch chan string, depth int, wg *sync.WaitGroup) {
defer wg.Done()
if depth <= 0 {
return
}
url := <-ch
body, refs, err := fetch(url)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Found: %s %s\n", url, body)
for _, ref := range refs {
wg.Add(1)
go func() {
ch <- ref
}()
go crawl(ch, depth-1, wg)
}
}
func fetch(url string) (string, []string, error) {
res, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
body, err := io.ReadAll(res.Body)
if err != nil {
log.Fatal(err)
}
refs := findRefs(body)
return string(body), refs, nil
}
func findRefs(body []byte) []string {
return urlRx.FindAllString(string(body), -1)
}
func main() {
var wg sync.WaitGroup
ch := make(chan string)
go func() {
ch <- "https://go.dev"
}()
wg.Add(1)
go func() {
defer wg.Done()
crawl(ch, 4, &wg)
}()
wg.Wait()
close(ch)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment