Last active
June 8, 2017 02:40
-
-
Save ronaldsuwandi/9e6174b75d13270d0735ddc86ff024e6 to your computer and use it in GitHub Desktop.
Go Tour: Web Crawler (channel approach) + Equivalent Binary Trees
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import "fmt" | |
import "golang.org/x/tour/tree" | |
func WalkRecursive(t *tree.Tree, ch chan int) { | |
if t.Left != nil { | |
WalkRecursive(t.Left, ch) | |
} | |
ch <- t.Value | |
if t.Right != nil { | |
WalkRecursive(t.Right, ch) | |
} | |
} | |
// Walk walks the tree t sending all values | |
// from the tree to the channel ch. | |
func Walk(t *tree.Tree, ch chan int) { | |
WalkRecursive(t, ch) | |
close(ch) | |
} | |
// Same determines whether the trees | |
// t1 and t2 contain the same values. | |
func Same(t1, t2 *tree.Tree) bool { | |
c1 := make(chan int) | |
c2 := make(chan int) | |
go Walk(t1, c1) | |
go Walk(t2, c2) | |
for { | |
v1, ok1 := <-c1 | |
v2, ok2 := <-c2 | |
if v1 != v2 { | |
return false | |
} | |
// not ok1 and not ok2 (because both channels are closed) | |
if !ok1 && !ok2 { | |
return true | |
} | |
} | |
} | |
func main() { | |
fmt.Println(Same(tree.New(1), tree.New(1))) | |
fmt.Println(Same(tree.New(1), tree.New(10))) | |
fmt.Println(Same(tree.New(12), tree.New(1))) | |
fmt.Println(Same(tree.New(12), tree.New(12))) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"sync" | |
) | |
type Cache struct { | |
cache map[string]bool | |
mux sync.Mutex | |
} | |
func (c *Cache) Add(url string) { | |
c.mux.Lock() | |
defer c.mux.Unlock() | |
c.cache[url] = true | |
} | |
func (c *Cache) Cached(url string) bool { | |
c.mux.Lock() | |
defer c.mux.Unlock() | |
return c.cache[url] | |
} | |
type Fetcher interface { | |
// Fetch returns the body of URL and | |
// a slice of URLs found on that page. | |
Fetch(url string) (body string, urls []string, err error) | |
} | |
// Crawl uses fetcher to recursively crawl | |
// pages starting with url, to a maximum of depth. | |
func Crawl(url string, depth int, fetcher Fetcher, wg *sync.WaitGroup, cache *Cache) { | |
defer wg.Done() | |
if depth <= 0 { | |
return | |
} | |
if cache.Cached(url) { | |
fmt.Println("* Cache Hit:", url) | |
return | |
} | |
fmt.Println("* Cache Miss:", url) | |
cache.Add(url) | |
body, urls, err := fetcher.Fetch(url) | |
if err != nil { | |
fmt.Println(err) | |
return | |
} | |
fmt.Printf("found: %s %q\n", url, body) | |
for _, u := range urls { | |
wg.Add(1) | |
go Crawl(u, depth-1, fetcher, wg, cache) | |
} | |
return | |
} | |
func main() { | |
wg := &sync.WaitGroup{} | |
cache := &Cache{cache: map[string]bool{}} | |
wg.Add(1) | |
go Crawl("http://golang.org/", 4, fetcher, wg, cache) | |
wg.Wait() | |
} | |
// fakeFetcher is Fetcher that returns canned results. | |
type fakeFetcher map[string]*fakeResult | |
type fakeResult struct { | |
body string | |
urls []string | |
} | |
func (f fakeFetcher) Fetch(url string) (string, []string, error) { | |
if res, ok := f[url]; ok { | |
return res.body, res.urls, nil | |
} | |
return "", nil, fmt.Errorf("not found: %s", url) | |
} | |
// fetcher is a populated fakeFetcher. | |
var fetcher = fakeFetcher{ | |
"http://golang.org/": &fakeResult{ | |
"The Go Programming Language", | |
[]string{ | |
"http://golang.org/pkg/", | |
"http://golang.org/cmd/", | |
}, | |
}, | |
"http://golang.org/pkg/": &fakeResult{ | |
"Packages", | |
[]string{ | |
"http://golang.org/", | |
"http://golang.org/cmd/", | |
"http://golang.org/pkg/fmt/", | |
"http://golang.org/pkg/os/", | |
}, | |
}, | |
"http://golang.org/pkg/fmt/": &fakeResult{ | |
"Package fmt", | |
[]string{ | |
"http://golang.org/", | |
"http://golang.org/pkg/", | |
}, | |
}, | |
"http://golang.org/pkg/os/": &fakeResult{ | |
"Package os", | |
[]string{ | |
"http://golang.org/", | |
"http://golang.org/pkg/", | |
}, | |
}, | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"sync" | |
) | |
type Cache struct { | |
cache map[string]bool | |
mux sync.Mutex | |
} | |
func (c *Cache) Add(url string) { | |
c.mux.Lock() | |
defer c.mux.Unlock() | |
c.cache[url] = true | |
} | |
func (c *Cache) Cached(url string) bool { | |
c.mux.Lock() | |
defer c.mux.Unlock() | |
return c.cache[url] | |
} | |
type Fetcher interface { | |
// Fetch returns the body of URL and | |
// a slice of URLs found on that page. | |
Fetch(url string) (body string, urls []string, err error) | |
} | |
// Crawl uses fetcher to recursively crawl | |
// pages starting with url, to a maximum of depth. | |
func Crawl(url string, depth int, fetcher Fetcher, ch chan string, c *Cache) { | |
defer close(ch) | |
if depth <= 0 { | |
return | |
} | |
if c.Cached(url) { | |
fmt.Println("* Cache Hit:", url) | |
return | |
} | |
fmt.Println("* Cache Miss:", url) | |
c.Add(url) | |
body, urls, err := fetcher.Fetch(url) | |
if err != nil { | |
fmt.Println(err) | |
return | |
} | |
ch <- body | |
result := make([]chan string, len(urls)) | |
for i, url := range urls { | |
result[i] = make(chan string) | |
go Crawl(url, depth-1, fetcher, result[i], c) | |
} | |
for i := range result { | |
for s := range result[i] { | |
ch <- s | |
} | |
} | |
} | |
func main() { | |
ch := make(chan string) | |
go Crawl("http://golang.org/", 4, fetcher, ch, | |
&Cache{cache: map[string]bool{}}) | |
for body := range ch { | |
fmt.Println("Body:", body) | |
} | |
} | |
// fakeFetcher is Fetcher that returns canned results. | |
type fakeFetcher map[string]*fakeResult | |
type fakeResult struct { | |
body string | |
urls []string | |
} | |
func (f fakeFetcher) Fetch(url string) (string, []string, error) { | |
if res, ok := f[url]; ok { | |
return res.body, res.urls, nil | |
} | |
return "", nil, fmt.Errorf("- Not found: %s", url) | |
} | |
// fetcher is a populated fakeFetcher. | |
var fetcher = fakeFetcher{ | |
"http://golang.org/": &fakeResult{ | |
"The Go Programming Language", | |
[]string{ | |
"http://golang.org/pkg/", | |
"http://golang.org/cmd/", | |
}, | |
}, | |
"http://golang.org/pkg/": &fakeResult{ | |
"Packages", | |
[]string{ | |
"http://golang.org/", | |
"http://golang.org/cmd/", | |
"http://golang.org/pkg/fmt/", | |
"http://golang.org/pkg/os/", | |
}, | |
}, | |
"http://golang.org/pkg/fmt/": &fakeResult{ | |
"Package fmt", | |
[]string{ | |
"http://golang.org/", | |
"http://golang.org/pkg/", | |
}, | |
}, | |
"http://golang.org/pkg/os/": &fakeResult{ | |
"Package os", | |
[]string{ | |
"http://golang.org/", | |
"http://golang.org/pkg/", | |
}, | |
}, | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment