[] module Krow.Regex.Evaluation open Krow.Regex.Types open System.Text module Regex = let escape s = (RegularExpressions.Regex.Escape s).Replace("]", "\]") let unescape (s:string) = (RegularExpressions.Regex.Unescape (s.Replace("\]", "]"))) [] module private Helpers = let groupable (regex:IRegex) = match regex with | :? Regex.Sequence | :? Regex.OneOf -> Regex.NonCapturing regex :> IRegex | _ -> regex let listGroupable (regex:IRegex) = match regex with | :? Regex.OneOf -> Regex.NonCapturing regex :> IRegex | _ -> regex let rec charsetContent charSet = match charSet with | CharSet.OneOf chars -> let string = (new string(chars |> List.toArray)).Replace("/",@"\/") $@"{string}" | CharSet.Range (first,last) -> $@"{first}-{last}" | CharSet.Multiple charSets -> charSets |> List.map charsetContent |> List.reduce (+) let referenceString reference = match reference with | Group.Positional number -> number |> string | Group.Named string -> string |> escape let rec evaluate (regex:IRegex) = match regex with | :? Regex.Literal as literal -> let (Regex.Literal literal) = literal escape literal | :? Regex.Raw as literal -> let (Regex.Raw literal) = literal literal | :? Regex.OneOf as oneOf -> let (Regex.OneOf regexs) = oneOf regexs |> List.map evaluate |> String.concat "|" | :? Regex.Sequence as sequence -> let (Regex.Sequence regexs) = sequence regexs |> List.map (listGroupable >> evaluate) |> String.concat "" | :? Regex.NonCapturing as grouping -> let (Regex.NonCapturing regex) = grouping $@"(?:{evaluate regex})" | :? Mode.WithModes as withModes -> let (Mode.WithModes (modes, regex)) = withModes let modeChar = function | Mode.CaseInsensitive -> "i" | Mode.Multiline -> "m" | Mode.ExplicitCapture -> "n" | Mode.IgnoreUnescapedWhiteSpace -> "x" let modeList = modes |> List.map modeChar |> String.concat "" $@"(?{modeList}:{regex})" | :? Group.Reference as reference -> let string = reference |> referenceString match reference with | Group.Positional _ -> $@"\{string}" | Group.Named _ -> $@"\k<{string}>" | :? Look.Look as look -> match look with | Look.Ahead regex -> $@"(?={regex |> evaluate})" | Look.Behind regex -> $@"(?<={regex |> evaluate})" | :? Look.Negated as look -> let (Look.Negated look) = look match look with | Look.Ahead regex -> $@"(?!{regex |> evaluate})" | Look.Behind regex -> $@"(? evaluate})" | :? Group.Group as group -> let (Group.Group(group,regex)) = group let regex = regex |> evaluate match group with // Capturing | Group.Capturing -> $@"({regex})" | Group.CapturingWithName name -> $@"(?<{name}>{regex})" // Non capturing | Group.NonBacktrackingGrouping -> $@"(?>{regex})" // Balancing | Group.UnCapturing reference -> $@"(?<-{reference |> referenceString}>{regex})" | Group.Balancing (newName, reference) -> $@"(?<{newName |> escape}-{reference |> referenceString}>{regex})" | :? SpecialChar.SpecialChar as special -> match special with | SpecialChar.WildCard -> @"." | SpecialChar.Bell -> @"\a" | SpecialChar.Backspace -> @"\b" | SpecialChar.Tab -> @"\t" | SpecialChar.VerticalTab -> @"\v" | SpecialChar.CarriageReturn -> @"\r" | SpecialChar.NewLine -> @"\n" | SpecialChar.Escaped -> @"\e" | SpecialChar.Octal oct -> $@"\{oct}" | SpecialChar.Hexadecimal hex -> $@"\x{hex}" | SpecialChar.ASCII ascii -> $@"\u{ascii}" | :? Anchor.Anchor as anchor -> match anchor with | Anchor.Start -> @"\A" | Anchor.StartOfLine -> @"^" | Anchor.End -> @"\z" | Anchor.EndOfLine -> @"$" | Anchor.Boundary -> @"\b" | Anchor.NotBoundary -> @"\B" | Anchor.AfterMatch -> @"\G" | :? CharSet.CharSet as charSet -> $"[{charsetContent charSet}]" | :? CharSet.Negated as negated -> let (CharSet.Negated charSet) = negated $"[^{charsetContent charSet}]" | :? CharClass.CharClass as charClass -> match charClass with | CharClass.InUnicodeBlock block -> $@"\p{{{block}}}" | CharClass.LetterOrDigit -> @"\w" | CharClass.WhitespaceChar -> @"\s" | CharClass.Digit -> @"\d" | :? CharClass.Negated as negated -> let (CharClass.Negated charClass) = negated match charClass with | CharClass.InUnicodeBlock block -> $@"\P{{{block}}}" | CharClass.LetterOrDigit -> @"\W" | CharClass.WhitespaceChar -> @"\S" | CharClass.Digit -> @"\D" | :? Quantity.Quantified as quantified -> match quantified with | Quantity.Greedy (regex,quantity) -> let regex = regex |> groupable |> evaluate match quantity with | Quantity.Exactly amount -> $@"{regex}{{{amount}}}" | Quantity.AtLeast amount -> if amount = 0 then $@"{regex}*" else if amount = 1 then $@"{regex}+" else $@"{regex}{{{amount},}}" | Quantity.Between (min,max) -> if min = 0 && max = 1 then $@"{regex}?" else $@"{regex}{{{min},{max}}}" | Quantity.Lazy (regex,quantity) -> let greedQuantified = Quantity.Greedy(regex,quantity) |> evaluate greedQuantified + "?" | :? Condition.Conditional as conditional -> let evaluateCondition = function | Condition.Regex regex -> regex |> evaluate | Condition.Reference reference -> reference |> referenceString $@"(?({conditional.If |> evaluateCondition}){conditional.Then |> evaluate}|{conditional.Else |> evaluate})" | _ -> failwith "Not supported"