Skip to content

Instantly share code, notes, and snippets.

@sorz
Last active July 13, 2021 16:57
Show Gist options
  • Select an option

  • Save sorz/5577181 to your computer and use it in GitHub Desktop.

Select an option

Save sorz/5577181 to your computer and use it in GitHub Desktop.

Revisions

  1. @bluen bluen revised this gist Jan 14, 2014. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions gfwlist2regex.py
    Original file line number Diff line number Diff line change
    @@ -13,6 +13,7 @@ def convert_line(line):
    return line[1:-1]

    line = line.replace('*', '.+')
    line = line.replace('(', r'\(').replace(')', r'\)')
    if line.startswith('||'):
    return '^https?:\/\/%s.*' % line[2:]
    elif line.startswith('|'):
  2. BlueN created this gist May 14, 2013.
    47 changes: 47 additions & 0 deletions gfwlist2regex.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,47 @@
    #!/usr/bin/env python
    #encoding: utf-8
    import urllib2
    from base64 import b64decode


    LIST_URL = 'https://autoproxy-gfwlist.googlecode.com/svn/trunk/gfwlist.txt'
    BLACK_FILE = 'gfw.url_regex.lst'
    WHITE_FILE = 'cn.url_regex.lst'

    def convert_line(line):
    if line[0] == '/' and line[-1] == '/':
    return line[1:-1]

    line = line.replace('*', '.+')
    if line.startswith('||'):
    return '^https?:\/\/%s.*' % line[2:]
    elif line.startswith('|'):
    return '^%s.*' % line[1:]
    elif line[-1] == '|':
    return '.*%s$' % line
    else:
    return '.*%s.*' % line


    def convert(gfwlist):
    black = open(BLACK_FILE, 'w')
    white = open(WHITE_FILE, 'w')

    for l in gfwlist.split('\n'):
    l = l[:-1]
    if not l or l[0] == '!' or l[0] == '[':
    continue

    if l.startswith('@@'):
    white.write(convert_line(l[2:]) + '\n')
    else:
    black.write(convert_line(l) + '\n')


    def main():
    src = urllib2.urlopen(LIST_URL).read()
    src = b64decode(src)
    convert(src)

    if __name__ == '__main__':
    main()