Last active
June 18, 2023 07:36
-
-
Save PratikDeoghare/fb2c015692b7e017b19510d24b33f973 to your computer and use it in GitHub Desktop.
Approximate Word Counter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "flag" | |
| "fmt" | |
| "io" | |
| "math/rand" | |
| "os" | |
| "regexp" | |
| "sort" | |
| ) | |
| var re = regexp.MustCompile(`\w+`) | |
| func count(s []byte) int { | |
| matches := re.FindAllIndex(s, -1) | |
| return len(matches) | |
| } | |
| func main() { | |
| filename := flag.String("f", "", "-f filename") | |
| flag.Parse() | |
| wc, err := avgWordCount(*filename, 100) | |
| if err != nil { | |
| panic(err) | |
| } | |
| fmt.Println(wc) | |
| } | |
| func avgWordCount(filename string, nSample int) (int, error) { | |
| wcc := 0 | |
| for i := 0; i < nSample; i++ { | |
| wc, err := countWords(filename) | |
| if err != nil { | |
| return 0, err | |
| } | |
| wcc += wc | |
| } | |
| return wcc / nSample, nil | |
| } | |
| func countWords(filename string) (int, error) { | |
| fileInfo, err := os.Stat(filename) | |
| if err != nil { | |
| return 0, err | |
| } | |
| size := fileInfo.Size() | |
| f, err := os.Open(filename) | |
| if err != nil { | |
| return 0, err | |
| } | |
| defer f.Close() | |
| bufSize := 128 | |
| nIter := 10 | |
| var offsets []int64 | |
| for i := 0; i < nIter; i++ { | |
| offsets = append(offsets, rand.Int63n(size)) | |
| } | |
| sort.Slice(offsets, func(i, j int) bool { | |
| return offsets[i] < offsets[j] | |
| }) | |
| wc := 0 | |
| bs := make([]byte, bufSize) | |
| for _, offset := range offsets { | |
| _, err := f.ReadAt(bs, offset) | |
| if err == io.EOF { | |
| // ignore | |
| } else if err != nil { | |
| return 0, err | |
| } | |
| wc += count(bs) | |
| } | |
| w := float64(size) * float64(wc) / (float64(bufSize) * float64(nIter)) | |
| return int(w), nil | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment