Skip to content

Instantly share code, notes, and snippets.

@amn41
Created July 31, 2017 14:20
Show Gist options
  • Select an option

  • Save amn41/0a3556e79d177ad5e0ce5fd84afe9f59 to your computer and use it in GitHub Desktop.

Select an option

Save amn41/0a3556e79d177ad5e0ce5fd84afe9f59 to your computer and use it in GitHub Desktop.

Revisions

  1. amn41 created this gist Jul 31, 2017.
    71 changes: 71 additions & 0 deletions process_logs.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,71 @@
    from __future__ import unicode_literals
    from __future__ import print_function
    from __future__ import division
    from __future__ import absolute_import
    from builtins import str as text

    import argparse
    import io
    import json

    from rasa_nlu.converters import load_data
    from rasa_nlu.model import Metadata, Interpreter

    def create_argparser():
    parser = argparse.ArgumentParser(
    description='Process logs from Rasa NLU server. If a model dir is specified, ' +
    'load that model and re-do the predictions. Sort by intent confidence, ' +
    'and output the data in the rasa json format for training data'
    )
    parser.add_argument('-m', '--model_dir', default=None,
    help='dir containing model (optional)')
    parser.add_argument('-l', '--log_file',
    help='file or dir containing training data')
    parser.add_argument('-o', '--out_file',
    help='file where to save the logs in rasa format')
    return parser


    def process_logs(model_dir, log_file, out_file):

    logged_predictions = [
    json.loads(l) for l in io.open(log_file).readlines()
    ]

    if model_dir is not None:
    # load model & its training data
    metadata = Metadata.load(model_directory)
    interpreter = Interpreter.load(metadata, RasaNLUConfig())
    training_data = load_data(interpreter.config["training_data"]).training_examples

    logged_texts = set([t["text"] for t in logged_predictions])
    # dedupe & create test set
    train_texts = set([t['text'] for t in training_data])
    test_texts = logged_texts.difference(train_texts)
    # predict on test set
    predictions = [interpreter.parse(t) for t in test_texts]
    else:
    predictions = logged_predictions

    predictions = [p for p in predictions if p.get("user_input").get("intent_ranking") is not None]
    predictions.sort(key=lambda p:p["user_input"]["intent"]["confidence"])

    preds = [
    {
    "intent": p["user_input"]["intent"]["name"],
    "entities": p["user_input"]["entities"],
    "text": p["user_input"]["text"]
    }
    for p in predictions
    ]
    data = {"rasa_nlu_data": {"common_examples": preds } }

    # persist
    with io.open(out_file, "w") as f:
    f.write(text(json.dumps(data, indent=2)))


    if __name__ == "__main__":
    parser = create_argparser()
    args = parser.parse_args()
    process_logs(args.model_dir, args.log_file, args.out_file)