Skip to content

Instantly share code, notes, and snippets.

@defparam
Created March 13, 2021 23:54
Show Gist options
  • Select an option

  • Save defparam/29da13704932787365d7f51bab287c22 to your computer and use it in GitHub Desktop.

Select an option

Save defparam/29da13704932787365d7f51bab287c22 to your computer and use it in GitHub Desktop.

Revisions

  1. defparam created this gist Mar 13, 2021.
    61 changes: 61 additions & 0 deletions fuzz.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,61 @@
    import sys, re
    import atheris
    from urllib.parse import urlparse

    # Our sketchy regex to be tested
    OurRegex = re.compile(b"^(((([A-Za-z0-9.-]*\.)?example1\.com)|(([A-Za-z0-9.-]*\.)\?example2\.com)|(([A-Za-z0-9.-]*\.)?example3\.com)))")

    # The allow list of domains the regex is trying to validate
    Allowlist = [b"example1.com", b"example2.com", b"example3.com"]

    # Our Fuzzing Harness
    def TestOneInput(data):
    # Arbitrary, but lets get a minimum of 5 bytes of fuzz data
    if len(data) < 5:
    return

    # We use the first byte as a random value selector of one of the three allowed domains
    # and we append the domain to the rest of the fuzzer test data
    #
    # Test will look something like this: <FUZZ DATA>example1.com, <FUZZ DATA>example2.com, <FUZZ DATA>example3.com
    test = data[1:] + Allowlist[data[0] % len(Allowlist)]

    # We process our test case through the regex
    RegexResult = OurRegex.match(test)

    # If the regex didn't validate it as trusted there is no point in processing
    # it through urllib, just return
    if not RegexResult:
    return

    # We have a trusted input, lets compare it to urllib.
    # urllib will throw exception at malformed UTF-8 so
    # we place it inside a try block, return on exception
    try:
    # urlib also requires a scheme, so we give it https
    UrllibResult = urlparse(b"https://" + test)
    except:
    return

    # At this point we have results from urllib
    # lets validate that our RegEx-trusted input countains at least 1 of the trusted domains
    for domain in Allowlist:
    # For each domain in the Allowlist we result if we see any sign of it
    if domain in UrllibResult.netloc:
    return

    # If we got this far it means that we have an input deemed trusted by our regex
    # but urllib did not find any of the allowlist domains inside the authority string
    # of the parsed URL, raise an exception to the fuzzer
    print ("\n\n\n\n==================================================================")
    print ("(SEVERE): Found a potential bypass!")
    print ("\n Payload: %s"% (test))
    print ("Urllib Authority: %s\n"% (UrllibResult.netloc))
    print ("Note: When parsing this input with urllib it appears that none ")
    print ("of the allow list domains were found in the authority!")
    print ("==================================================================\n\n")
    raise RuntimeError("Fuzzer found a discrepency")


    atheris.Setup(sys.argv, TestOneInput)
    atheris.Fuzz()