Last active
August 30, 2016 01:18
-
-
Save huydx/6a15f9a311c0ff3a2c1b1948b6c03582 to your computer and use it in GitHub Desktop.
Revisions
-
huydx revised this gist
Aug 30, 2016 . 1 changed file with 68 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -61,3 +61,71 @@ func main() { fmt.Println(match("^fo*bar$", "foo")) fmt.Println(match("^fo*bar$", "foooooooooooobar")) } //////////////////////// Some note about parsing technique Reference link: - http://math.hws.edu/javanotes/c9/s5.html - http://perl.plover.com/Regex/article.html - http://matt.might.net/articles/parsing-regex-with-recursive-descent/ - https://swtch.com/~rsc/regexp/regexp1.html - http://matt.might.net/articles/nonblocking-lexing-toolkit-based-on-regex-derivatives/ - http://genius.cat-v.org/brian-kernighan/articles/beautiful lexer : recognize token parser : from token to build structure tree 3 types of regex: - basic regular expression (BRE) - extended regular expression (ERE) - Perl compatible regular expression (PCRE) regex symbol has math equivalent meaning http://web.cse.ohio-state.edu/software/2231/web-sw2/extras/slides/27.Recursive-Descent-Parsing.pdf a recursive descent parser for regex. NOT all grammar suit for recursive descent - idea is to construct procedure for each kind of grammar - peek(), next(), eat(item) - LL(k) type : determistic by examining next k tokens Context free grammar - production rules describe "all" possible strings - production rules == simple replacement EBNF, ABNF: http://matt.might.net/articles/grammars-bnf-ebnf/ - NFA: - special kind of finite machine - include of: - set of symbols - set of states - next-state function - subset of states which accept state - initial state s0 - NFA represent: table or graph - epsilon transition: transition state on an empty string?? - DFA: - NO epsilon transition - Problem: convert regex string to a datastructure to easy manipulate and pattern matching - convert regex to NFA - using thompson algorithm - it's like arithmetic evaluation YACC %{ header } %union %token %type %% rules %% user setting http://loveruby.net/ja/rhg/book/yacc.html http://qiita.com/k0kubun/items/1b641dfd186fe46feb65 -
huydx created this gist
Aug 29, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,63 @@ package main import ( "fmt" ) func match(regex string, text string) bool { if string(regex[0]) == "^" { return matchhere(regex[1:], text) } for { if matchhere(regex, text) { return true } else { if end(text) { return false } text = text[1:] } } return false } func matchhere(regex string, text string) bool { fmt.Printf("function %s called params %s %s\n", "matchhere", regex, text) if end(regex) { return false } if string(regex[0]) == "$" && len(regex) == 1 { return len(text) == 0 } if len(regex) > 2 && string(regex[1]) == "*" { return matchstar(string(regex[0]), regex[2:], text) } if len(text) > 0 && (string(regex[0]) == "." || regex[0] == text[0]) { return matchhere(regex[1:], text[1:]) } return false } func end(text string) bool { return len(text) == 0 } func matchstar(c string, regex string, text string) bool { fmt.Printf("function %s called params %s %s %s\n", "matchstar", c, regex, text) for { fmt.Printf(" loop inside matchstar %s\n", text) if len(text) > 0 && string(text[0]) == c { text = text[1:] continue } return matchhere(regex, text) } } func main() { fmt.Println(match("^fo*$", "foo")) fmt.Println(match("^fo*bar$", "foo")) fmt.Println(match("^fo*bar$", "foooooooooooobar")) }