Skip to content

Instantly share code, notes, and snippets.

@pannal
Last active March 22, 2019 21:11
Show Gist options
  • Select an option

  • Save pannal/ff8066e272e2ecd42621894f6c843dce to your computer and use it in GitHub Desktop.

Select an option

Save pannal/ff8066e272e2ecd42621894f6c843dce to your computer and use it in GitHub Desktop.

Revisions

  1. pannal revised this gist Mar 22, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -29,7 +29,7 @@ def _log_level_string_to_int(log_level_string):
    map = "file://{}"; # default; set this via -w/--whitelist-path
    prefilter = true;
    action = "accept";
    filter = "email"; # use "email:domain" for --domain mode
    filter = "email"; # use "email:domain" for --use-domains mode
    }}""".format(WL_PATH_DEF), formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("message_class", help="What to classify the message as: 'ham' or 'spam'")
    parser.add_argument("--input", "-i", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Mail message input; read from stdin by default")
  2. pannal revised this gist Mar 22, 2019. 1 changed file with 11 additions and 7 deletions.
    18 changes: 11 additions & 7 deletions rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -29,10 +29,11 @@ def _log_level_string_to_int(log_level_string):
    map = "file://{}"; # default; set this via -w/--whitelist-path
    prefilter = true;
    action = "accept";
    filter = "email";
    filter = "email"; # use "email:domain" for --domain mode
    }}""".format(WL_PATH_DEF), formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("message_class", help="What to classify the message as: 'ham' or 'spam'")
    parser.add_argument("--input", "-i", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Mail message input; read from stdin by default")
    parser.add_argument("-d", "--use-domains", help="Use domains instead of the full e-mail addresses inside the whitelist", default=False, action="store_true")
    parser.add_argument("-w", "--whitelist-path", help="Path to whitelist file *NEEDS TO BE WRITABLE BY THE CURRENT USER*; Default: {}".format(WL_PATH_DEF), default=WL_PATH_DEF)
    parser.add_argument("-r", "--rspamc-path", help="Path to rspamc binary; Default: {}".format(RC_PATH_DEF), default=RC_PATH_DEF)
    parser.add_argument("--log-file", help="Path to log file; Default: stdout", default=None)
    @@ -97,21 +98,24 @@ def _log_level_string_to_int(log_level_string):
    whitelist_orig = f.read().split()
    whitelist = whitelist_orig[:]
    for omit, addr in list(set(from_lines)):
    if cls == "spam" and addr in whitelist:
    addr_or_domain = addr
    if args.use_domains:
    addr_or_domain = addr.split("@")[1]
    if cls == "spam" and addr_or_domain in whitelist:
    action = "remove"
    elif cls == "ham" and addr not in whitelist:
    elif cls == "ham" and addr_or_domain not in whitelist:
    action = "append"
    else:
    log.debug("whitelist: {} already marked as {}".format(addr, cls))
    log.debug("whitelist: {} already marked as {}".format(addr_or_domain, cls))
    continue

    log.info("{}: {}".format(cls, addr))
    getattr(whitelist, action)(addr)
    log.info("{}: {}".format(cls, addr_or_domain))
    getattr(whitelist, action)(addr_or_domain)

    if whitelist_orig != whitelist:
    f.truncate(0)
    f.seek(0)
    f.writelines("\n".join(whitelist)+"\n")
    f.writelines("\n".join(list(set(whitelist)))+"\n")

    # rspamc learn message
    ret = subprocess.check_output([args.rspamc_path, "learn_{}".format(cls)], input=what, universal_newlines=True)
  3. pannal revised this gist Mar 22, 2019. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -22,19 +22,18 @@ def _log_level_string_to_int(log_level_string):
    return log_level_int

    WL_PATH_DEF = "/etc/rspamd/local.d/whitelist.txt"
    MP_PATH_DEF = "/usr/local/bin/mailparser"
    RC_PATH_DEF = "/usr/bin/rspamc"

    parser = argparse.ArgumentParser(description="""Learn messages via rspamc and manage a sender whitelist. Depends on https://github.com/SpamScope/mail-parser.\n\nUse with local.d/multimap.conf:\nSENDER_FROM_WHITELIST {{
    type = "from";
    map = "file://{}"; # default; set this via -w/--whitelist-path
    prefilter = true;
    action = "accept";
    filter = "email";
    }}""".format(WL_PATH_DEF), formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("message_class", help="What to classify the message as: 'ham' or 'spam'")
    parser.add_argument("--input", "-i", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Mail message input; read from stdin by default")
    parser.add_argument("-w", "--whitelist-path", help="Path to whitelist file *NEEDS TO BE WRITABLE BY THE CURRENT USER*; Default: {}".format(WL_PATH_DEF), default=WL_PATH_DEF)
    parser.add_argument("-m", "--mailparser-path", help="Path to mailparser binary; Default: {}".format(MP_PATH_DEF), default=MP_PATH_DEF)
    parser.add_argument("-r", "--rspamc-path", help="Path to rspamc binary; Default: {}".format(RC_PATH_DEF), default=RC_PATH_DEF)
    parser.add_argument("--log-file", help="Path to log file; Default: stdout", default=None)
    parser.add_argument('--log-level',
  4. pannal revised this gist Mar 22, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -97,7 +97,7 @@ def _log_level_string_to_int(log_level_string):
    # this might be a tad naive
    whitelist_orig = f.read().split()
    whitelist = whitelist_orig[:]
    for omit, addr in from_lines:
    for omit, addr in list(set(from_lines)):
    if cls == "spam" and addr in whitelist:
    action = "remove"
    elif cls == "ham" and addr not in whitelist:
  5. pannal revised this gist Mar 22, 2019. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -83,7 +83,7 @@ def _log_level_string_to_int(log_level_string):

    # use mailparser to get sender addresses
    mail = mailparser.parse_from_string(what)
    from_lines = mail.from_
    from_lines = mail.from_[:]
    for k in ("return_path", "envelope_from", "sender", "x_mail_from"):
    val = getattr(mail, k)
    if not val:
    @@ -103,7 +103,7 @@ def _log_level_string_to_int(log_level_string):
    elif cls == "ham" and addr not in whitelist:
    action = "append"
    else:
    log.debug("whitelist: message already marked as {}".format(cls))
    log.debug("whitelist: {} already marked as {}".format(addr, cls))
    continue

    log.info("{}: {}".format(cls, addr))
  6. pannal revised this gist Mar 22, 2019. 1 changed file with 22 additions and 4 deletions.
    26 changes: 22 additions & 4 deletions rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -5,6 +5,8 @@
    import json
    import logging
    import argparse
    import email
    import mailparser

    _LOG_LEVEL_STRINGS = ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG']

    @@ -19,7 +21,7 @@ def _log_level_string_to_int(log_level_string):

    return log_level_int

    WL_PATH_DEF = "/etc/rspamd/whitelist.txt"
    WL_PATH_DEF = "/etc/rspamd/local.d/whitelist.txt"
    MP_PATH_DEF = "/usr/local/bin/mailparser"
    RC_PATH_DEF = "/usr/bin/rspamc"

    @@ -41,6 +43,12 @@ def _log_level_string_to_int(log_level_string):
    type=_log_level_string_to_int,
    nargs='?',
    help='Set the logging output level. {0}; Default: INFO'.format(_LOG_LEVEL_STRINGS))
    parser.add_argument('--mailparse-log-level',
    default='WARNING',
    dest='mp_log_level',
    type=_log_level_string_to_int,
    nargs='?',
    help='Set the mailparse logging output level. {0}; Default: INFO'.format(_LOG_LEVEL_STRINGS))

    log = logging.getLogger()

    @@ -60,6 +68,7 @@ def _log_level_string_to_int(log_level_string):
    log.addHandler(consoleHandler)

    log.setLevel(args.log_level)
    logging.getLogger("mailparser").setLevel(args.mp_log_level)
    log.debug("Called rspamd_learn.py")

    # main
    @@ -72,8 +81,16 @@ def _log_level_string_to_int(log_level_string):
    if not what:
    raise ValueError("Either pass the to-be-processed message as stdin or via -i/--input")

    # use mailparser to get From
    from_lines = json.loads(subprocess.check_output([args.mailparser_path, "-k", "-m"], input=what, universal_newlines=True))
    # use mailparser to get sender addresses
    mail = mailparser.parse_from_string(what)
    from_lines = mail.from_
    for k in ("return_path", "envelope_from", "sender", "x_mail_from"):
    val = getattr(mail, k)
    if not val:
    continue
    if not isinstance(val, list):
    val = [val]
    from_lines += email.utils.getaddresses(val)

    # parse current whitelist and update it according to the current message
    with open(args.whitelist_path, "r+", encoding="utf-8") as f:
    @@ -102,4 +119,5 @@ def _log_level_string_to_int(log_level_string):
    log.debug("Rspamc result: {}".format(ret))

    except Exception as e:
    log.exception(e)
    log.exception(e)
    sys.exit(1)
  7. pannal revised this gist Mar 22, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -25,7 +25,7 @@ def _log_level_string_to_int(log_level_string):

    parser = argparse.ArgumentParser(description="""Learn messages via rspamc and manage a sender whitelist. Depends on https://github.com/SpamScope/mail-parser.\n\nUse with local.d/multimap.conf:\nSENDER_FROM_WHITELIST {{
    type = "from";
    map = "file://{}"; # default
    map = "file://{}"; # default; set this via -w/--whitelist-path
    prefilter = true;
    action = "accept";
    }}""".format(WL_PATH_DEF), formatter_class=argparse.RawDescriptionHelpFormatter)
  8. pannal revised this gist Mar 22, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -80,7 +80,7 @@ def _log_level_string_to_int(log_level_string):
    # this might be a tad naive
    whitelist_orig = f.read().split()
    whitelist = whitelist_orig[:]
    for ommit, addr in from_lines:
    for omit, addr in from_lines:
    if cls == "spam" and addr in whitelist:
    action = "remove"
    elif cls == "ham" and addr not in whitelist:
  9. pannal revised this gist Mar 22, 2019. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -23,12 +23,12 @@ def _log_level_string_to_int(log_level_string):
    MP_PATH_DEF = "/usr/local/bin/mailparser"
    RC_PATH_DEF = "/usr/bin/rspamc"

    parser = argparse.ArgumentParser(description="""Learn messages via rspamc and manage a sender whitelist. Depends on https://github.com/SpamScope/mail-parser.\n\nUse with local.d/multimap.conf:\nSENDER_FROM_WHITELIST {
    parser = argparse.ArgumentParser(description="""Learn messages via rspamc and manage a sender whitelist. Depends on https://github.com/SpamScope/mail-parser.\n\nUse with local.d/multimap.conf:\nSENDER_FROM_WHITELIST {{
    type = "from";
    map = "file:///etc/rspamd/whitelist.txt"; # default; set this via -w/--whitelist-path
    map = "file://{}"; # default
    prefilter = true;
    action = "accept";
    }""", formatter_class=argparse.RawDescriptionHelpFormatter)
    }}""".format(WL_PATH_DEF), formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("message_class", help="What to classify the message as: 'ham' or 'spam'")
    parser.add_argument("--input", "-i", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Mail message input; read from stdin by default")
    parser.add_argument("-w", "--whitelist-path", help="Path to whitelist file *NEEDS TO BE WRITABLE BY THE CURRENT USER*; Default: {}".format(WL_PATH_DEF), default=WL_PATH_DEF)
  10. pannal revised this gist Mar 22, 2019. 1 changed file with 0 additions and 2 deletions.
    2 changes: 0 additions & 2 deletions rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -75,8 +75,6 @@ def _log_level_string_to_int(log_level_string):
    # use mailparser to get From
    from_lines = json.loads(subprocess.check_output([args.mailparser_path, "-k", "-m"], input=what, universal_newlines=True))



    # parse current whitelist and update it according to the current message
    with open(args.whitelist_path, "r+", encoding="utf-8") as f:
    # this might be a tad naive
  11. pannal revised this gist Mar 22, 2019. 1 changed file with 5 additions and 3 deletions.
    8 changes: 5 additions & 3 deletions rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -46,7 +46,7 @@ def _log_level_string_to_int(log_level_string):

    if __name__ == "__main__":
    # parse argv
    args = parser.parse_args()
    args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])

    # set up logging
    formatter = logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')
    @@ -65,15 +65,17 @@ def _log_level_string_to_int(log_level_string):
    # main
    try:
    cls = args.message_class
    if cls not in ("spam", "ham"):
    raise ValueError("First argument must be 'ham' or 'spam'")

    what = args.input.read()
    if not what:
    raise ValueError("Either pass the to-be-processed message as stdin or via -i/--input")

    # use mailparser to get From
    from_lines = json.loads(subprocess.check_output([args.mailparser_path, "-k", "-m"], input=what, universal_newlines=True))

    if cls not in ("spam", "ham"):
    raise ValueError("First argument must be 'ham' or 'spam'")


    # parse current whitelist and update it according to the current message
    with open(args.whitelist_path, "r+", encoding="utf-8") as f:
  12. pannal revised this gist Mar 22, 2019. 1 changed file with 6 additions and 1 deletion.
    7 changes: 6 additions & 1 deletion rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -23,7 +23,12 @@ def _log_level_string_to_int(log_level_string):
    MP_PATH_DEF = "/usr/local/bin/mailparser"
    RC_PATH_DEF = "/usr/bin/rspamc"

    parser = argparse.ArgumentParser(description="Learn messages via rspamc and manage a sender whitelist. Depends on https://github.com/SpamScope/mail-parser")
    parser = argparse.ArgumentParser(description="""Learn messages via rspamc and manage a sender whitelist. Depends on https://github.com/SpamScope/mail-parser.\n\nUse with local.d/multimap.conf:\nSENDER_FROM_WHITELIST {
    type = "from";
    map = "file:///etc/rspamd/whitelist.txt"; # default; set this via -w/--whitelist-path
    prefilter = true;
    action = "accept";
    }""", formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("message_class", help="What to classify the message as: 'ham' or 'spam'")
    parser.add_argument("--input", "-i", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Mail message input; read from stdin by default")
    parser.add_argument("-w", "--whitelist-path", help="Path to whitelist file *NEEDS TO BE WRITABLE BY THE CURRENT USER*; Default: {}".format(WL_PATH_DEF), default=WL_PATH_DEF)
  13. pannal revised this gist Mar 22, 2019. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -26,7 +26,7 @@ def _log_level_string_to_int(log_level_string):
    parser = argparse.ArgumentParser(description="Learn messages via rspamc and manage a sender whitelist. Depends on https://github.com/SpamScope/mail-parser")
    parser.add_argument("message_class", help="What to classify the message as: 'ham' or 'spam'")
    parser.add_argument("--input", "-i", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Mail message input; read from stdin by default")
    parser.add_argument("-w", "--whitelist-path", help="Path to whitelist file; Default: {}".format(WL_PATH_DEF), default=WL_PATH_DEF)
    parser.add_argument("-w", "--whitelist-path", help="Path to whitelist file *NEEDS TO BE WRITABLE BY THE CURRENT USER*; Default: {}".format(WL_PATH_DEF), default=WL_PATH_DEF)
    parser.add_argument("-m", "--mailparser-path", help="Path to mailparser binary; Default: {}".format(MP_PATH_DEF), default=MP_PATH_DEF)
    parser.add_argument("-r", "--rspamc-path", help="Path to rspamc binary; Default: {}".format(RC_PATH_DEF), default=RC_PATH_DEF)
    parser.add_argument("--log-file", help="Path to log file; Default: stdout", default=None)
  14. pannal revised this gist Mar 22, 2019. 1 changed file with 6 additions and 1 deletion.
    7 changes: 6 additions & 1 deletion rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -40,8 +40,10 @@ def _log_level_string_to_int(log_level_string):
    log = logging.getLogger()

    if __name__ == "__main__":
    # set up logging
    # parse argv
    args = parser.parse_args()

    # set up logging
    formatter = logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')
    if args.log_file:
    fileHandler = logging.FileHandler("{}".format(args.log_file))
    @@ -62,12 +64,15 @@ def _log_level_string_to_int(log_level_string):
    if not what:
    raise ValueError("Either pass the to-be-processed message as stdin or via -i/--input")

    # use mailparser to get From
    from_lines = json.loads(subprocess.check_output([args.mailparser_path, "-k", "-m"], input=what, universal_newlines=True))

    if cls not in ("spam", "ham"):
    raise ValueError("First argument must be 'ham' or 'spam'")

    # parse current whitelist and update it according to the current message
    with open(args.whitelist_path, "r+", encoding="utf-8") as f:
    # this might be a tad naive
    whitelist_orig = f.read().split()
    whitelist = whitelist_orig[:]
    for ommit, addr in from_lines:
  15. pannal revised this gist Mar 22, 2019. No changes.
  16. pannal revised this gist Mar 22, 2019. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -76,6 +76,7 @@ def _log_level_string_to_int(log_level_string):
    elif cls == "ham" and addr not in whitelist:
    action = "append"
    else:
    log.debug("whitelist: message already marked as {}".format(cls))
    continue

    log.info("{}: {}".format(cls, addr))
  17. pannal created this gist Mar 22, 2019.
    94 changes: 94 additions & 0 deletions rspamd_learn.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,94 @@
    #!/usr/bin/python3

    import sys
    import subprocess
    import json
    import logging
    import argparse

    _LOG_LEVEL_STRINGS = ['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG']

    def _log_level_string_to_int(log_level_string):
    if not log_level_string in _LOG_LEVEL_STRINGS:
    message = 'invalid choice: {0} (choose from {1})'.format(log_level_string, _LOG_LEVEL_STRINGS)
    raise argparse.ArgumentTypeError(message)

    log_level_int = getattr(logging, log_level_string, logging.INFO)
    # check the logging log_level_choices have not changed from our expected values
    assert isinstance(log_level_int, int)

    return log_level_int

    WL_PATH_DEF = "/etc/rspamd/whitelist.txt"
    MP_PATH_DEF = "/usr/local/bin/mailparser"
    RC_PATH_DEF = "/usr/bin/rspamc"

    parser = argparse.ArgumentParser(description="Learn messages via rspamc and manage a sender whitelist. Depends on https://github.com/SpamScope/mail-parser")
    parser.add_argument("message_class", help="What to classify the message as: 'ham' or 'spam'")
    parser.add_argument("--input", "-i", nargs='?', type=argparse.FileType('r'), default=sys.stdin, help="Mail message input; read from stdin by default")
    parser.add_argument("-w", "--whitelist-path", help="Path to whitelist file; Default: {}".format(WL_PATH_DEF), default=WL_PATH_DEF)
    parser.add_argument("-m", "--mailparser-path", help="Path to mailparser binary; Default: {}".format(MP_PATH_DEF), default=MP_PATH_DEF)
    parser.add_argument("-r", "--rspamc-path", help="Path to rspamc binary; Default: {}".format(RC_PATH_DEF), default=RC_PATH_DEF)
    parser.add_argument("--log-file", help="Path to log file; Default: stdout", default=None)
    parser.add_argument('--log-level',
    default='INFO',
    dest='log_level',
    type=_log_level_string_to_int,
    nargs='?',
    help='Set the logging output level. {0}; Default: INFO'.format(_LOG_LEVEL_STRINGS))

    log = logging.getLogger()

    if __name__ == "__main__":
    # set up logging
    args = parser.parse_args()
    formatter = logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')
    if args.log_file:
    fileHandler = logging.FileHandler("{}".format(args.log_file))
    fileHandler.setFormatter(formatter)
    log.addHandler(fileHandler)
    else:
    consoleHandler = logging.StreamHandler()
    consoleHandler.setFormatter(formatter)
    log.addHandler(consoleHandler)

    log.setLevel(args.log_level)
    log.debug("Called rspamd_learn.py")

    # main
    try:
    cls = args.message_class
    what = args.input.read()
    if not what:
    raise ValueError("Either pass the to-be-processed message as stdin or via -i/--input")

    from_lines = json.loads(subprocess.check_output([args.mailparser_path, "-k", "-m"], input=what, universal_newlines=True))

    if cls not in ("spam", "ham"):
    raise ValueError("First argument must be 'ham' or 'spam'")

    with open(args.whitelist_path, "r+", encoding="utf-8") as f:
    whitelist_orig = f.read().split()
    whitelist = whitelist_orig[:]
    for ommit, addr in from_lines:
    if cls == "spam" and addr in whitelist:
    action = "remove"
    elif cls == "ham" and addr not in whitelist:
    action = "append"
    else:
    continue

    log.info("{}: {}".format(cls, addr))
    getattr(whitelist, action)(addr)

    if whitelist_orig != whitelist:
    f.truncate(0)
    f.seek(0)
    f.writelines("\n".join(whitelist)+"\n")

    # rspamc learn message
    ret = subprocess.check_output([args.rspamc_path, "learn_{}".format(cls)], input=what, universal_newlines=True)
    log.debug("Rspamc result: {}".format(ret))

    except Exception as e:
    log.exception(e)