Skip to content

Instantly share code, notes, and snippets.

@ronaldsuwandi
Last active June 8, 2017 02:40
Show Gist options
  • Save ronaldsuwandi/9e6174b75d13270d0735ddc86ff024e6 to your computer and use it in GitHub Desktop.
Save ronaldsuwandi/9e6174b75d13270d0735ddc86ff024e6 to your computer and use it in GitHub Desktop.
Go Tour: Web Crawler (channel approach) + Equivalent Binary Trees
package main
import "fmt"
import "golang.org/x/tour/tree"
func WalkRecursive(t *tree.Tree, ch chan int) {
if t.Left != nil {
WalkRecursive(t.Left, ch)
}
ch <- t.Value
if t.Right != nil {
WalkRecursive(t.Right, ch)
}
}
// Walk walks the tree t sending all values
// from the tree to the channel ch.
func Walk(t *tree.Tree, ch chan int) {
WalkRecursive(t, ch)
close(ch)
}
// Same determines whether the trees
// t1 and t2 contain the same values.
func Same(t1, t2 *tree.Tree) bool {
c1 := make(chan int)
c2 := make(chan int)
go Walk(t1, c1)
go Walk(t2, c2)
for {
v1, ok1 := <-c1
v2, ok2 := <-c2
if v1 != v2 {
return false
}
// not ok1 and not ok2 (because both channels are closed)
if !ok1 && !ok2 {
return true
}
}
}
func main() {
fmt.Println(Same(tree.New(1), tree.New(1)))
fmt.Println(Same(tree.New(1), tree.New(10)))
fmt.Println(Same(tree.New(12), tree.New(1)))
fmt.Println(Same(tree.New(12), tree.New(12)))
}
package main
import (
"fmt"
"sync"
)
type Cache struct {
cache map[string]bool
mux sync.Mutex
}
func (c *Cache) Add(url string) {
c.mux.Lock()
defer c.mux.Unlock()
c.cache[url] = true
}
func (c *Cache) Cached(url string) bool {
c.mux.Lock()
defer c.mux.Unlock()
return c.cache[url]
}
type Fetcher interface {
// Fetch returns the body of URL and
// a slice of URLs found on that page.
Fetch(url string) (body string, urls []string, err error)
}
// Crawl uses fetcher to recursively crawl
// pages starting with url, to a maximum of depth.
func Crawl(url string, depth int, fetcher Fetcher, wg *sync.WaitGroup, cache *Cache) {
defer wg.Done()
if depth <= 0 {
return
}
if cache.Cached(url) {
fmt.Println("* Cache Hit:", url)
return
}
fmt.Println("* Cache Miss:", url)
cache.Add(url)
body, urls, err := fetcher.Fetch(url)
if err != nil {
fmt.Println(err)
return
}
fmt.Printf("found: %s %q\n", url, body)
for _, u := range urls {
wg.Add(1)
go Crawl(u, depth-1, fetcher, wg, cache)
}
return
}
func main() {
wg := &sync.WaitGroup{}
cache := &Cache{cache: map[string]bool{}}
wg.Add(1)
go Crawl("http://golang.org/", 4, fetcher, wg, cache)
wg.Wait()
}
// fakeFetcher is Fetcher that returns canned results.
type fakeFetcher map[string]*fakeResult
type fakeResult struct {
body string
urls []string
}
func (f fakeFetcher) Fetch(url string) (string, []string, error) {
if res, ok := f[url]; ok {
return res.body, res.urls, nil
}
return "", nil, fmt.Errorf("not found: %s", url)
}
// fetcher is a populated fakeFetcher.
var fetcher = fakeFetcher{
"http://golang.org/": &fakeResult{
"The Go Programming Language",
[]string{
"http://golang.org/pkg/",
"http://golang.org/cmd/",
},
},
"http://golang.org/pkg/": &fakeResult{
"Packages",
[]string{
"http://golang.org/",
"http://golang.org/cmd/",
"http://golang.org/pkg/fmt/",
"http://golang.org/pkg/os/",
},
},
"http://golang.org/pkg/fmt/": &fakeResult{
"Package fmt",
[]string{
"http://golang.org/",
"http://golang.org/pkg/",
},
},
"http://golang.org/pkg/os/": &fakeResult{
"Package os",
[]string{
"http://golang.org/",
"http://golang.org/pkg/",
},
},
}
package main
import (
"fmt"
"sync"
)
type Cache struct {
cache map[string]bool
mux sync.Mutex
}
func (c *Cache) Add(url string) {
c.mux.Lock()
defer c.mux.Unlock()
c.cache[url] = true
}
func (c *Cache) Cached(url string) bool {
c.mux.Lock()
defer c.mux.Unlock()
return c.cache[url]
}
type Fetcher interface {
// Fetch returns the body of URL and
// a slice of URLs found on that page.
Fetch(url string) (body string, urls []string, err error)
}
// Crawl uses fetcher to recursively crawl
// pages starting with url, to a maximum of depth.
func Crawl(url string, depth int, fetcher Fetcher, ch chan string, c *Cache) {
defer close(ch)
if depth <= 0 {
return
}
if c.Cached(url) {
fmt.Println("* Cache Hit:", url)
return
}
fmt.Println("* Cache Miss:", url)
c.Add(url)
body, urls, err := fetcher.Fetch(url)
if err != nil {
fmt.Println(err)
return
}
ch <- body
result := make([]chan string, len(urls))
for i, url := range urls {
result[i] = make(chan string)
go Crawl(url, depth-1, fetcher, result[i], c)
}
for i := range result {
for s := range result[i] {
ch <- s
}
}
}
func main() {
ch := make(chan string)
go Crawl("http://golang.org/", 4, fetcher, ch,
&Cache{cache: map[string]bool{}})
for body := range ch {
fmt.Println("Body:", body)
}
}
// fakeFetcher is Fetcher that returns canned results.
type fakeFetcher map[string]*fakeResult
type fakeResult struct {
body string
urls []string
}
func (f fakeFetcher) Fetch(url string) (string, []string, error) {
if res, ok := f[url]; ok {
return res.body, res.urls, nil
}
return "", nil, fmt.Errorf("- Not found: %s", url)
}
// fetcher is a populated fakeFetcher.
var fetcher = fakeFetcher{
"http://golang.org/": &fakeResult{
"The Go Programming Language",
[]string{
"http://golang.org/pkg/",
"http://golang.org/cmd/",
},
},
"http://golang.org/pkg/": &fakeResult{
"Packages",
[]string{
"http://golang.org/",
"http://golang.org/cmd/",
"http://golang.org/pkg/fmt/",
"http://golang.org/pkg/os/",
},
},
"http://golang.org/pkg/fmt/": &fakeResult{
"Package fmt",
[]string{
"http://golang.org/",
"http://golang.org/pkg/",
},
},
"http://golang.org/pkg/os/": &fakeResult{
"Package os",
[]string{
"http://golang.org/",
"http://golang.org/pkg/",
},
},
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment