from nltk.probability import DictionaryProbDist from nltk import NaiveBayesClassifier train_samples = { 'I hate you and you are a bad person': 'neg', 'I love you and you are a good person': 'pos', 'I fail at everything and I want to kill people' : 'neg', 'I win at everything and I want to love people' : 'pos', 'sad are things are heppening. fml' : 'neg', 'good are things are heppening. gbu' : 'pos', 'I am so poor' : 'neg', 'I am so rich' : 'pos', 'I hate you mommy ! You are my terrible person' : 'neg', 'I love you mommy ! You are my amazing person' : 'pos', 'I want to kill butterflies since they make me sad' : 'neg', 'I want to chase butterflies since they make me happy' : 'pos', 'I want to hurt bunnies' : 'neg', 'I want to hug bunnies' : 'pos', 'You make me frown' : 'neg', 'You make me smile' : 'pos', } test_samples = [ 'You are a terrible person and everything you do is bad', 'I love you all and you make me happy', 'I frown whenever I see you in a poor state of mind', 'Finally getting rich from my ideas. They make me smile.', 'My mommy is poor', 'I love butterflies. Yay for happy', 'Everything is fail today and I hate stuff', ] def gen_bow(s): words = s.split(' ') bow = {} for word in words: bow[word] = True return bow label_probdist = DictionaryProbDist({'pos': 0.5, 'neg': 0.5}) true_probdist = DictionaryProbDist({True: 6}) feature_probdist = { ## need to generate this from train_samples ('neg', 'no'): true_probdist, ('neg', 'hate'): true_probdist, ('neg', 'fml'): true_probdist, ('neg', 'poor'): true_probdist, ('neg', 'sad'): true_probdist, ('neg', 'fail'): true_probdist, ('neg', 'kill'): true_probdist, ('neg', 'evil'): true_probdist, ('pos', 'bunnies'): true_probdist, ('pos', 'butteryfly'): true_probdist, ('pos', 'pony'): true_probdist, ('pos', 'love'): true_probdist, ('pos', 'smile'): true_probdist, ('pos', 'happy'): true_probdist, ('pos', 'amazing'): true_probdist, ('pos', 'yes'): true_probdist, } classifier = NaiveBayesClassifier(label_probdist, feature_probdist) for sample in test_samples: print "%s | %s | %s" % (sample, classifier.classify(gen_bow(sample)), classifier.prob_classify(gen_bow(sample))) classifier.show_most_informative_features()