Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created October 10, 2012 18:31
Show Gist options
  • Select an option

  • Save marcelcaraciolo/3867521 to your computer and use it in GitHub Desktop.

Select an option

Save marcelcaraciolo/3867521 to your computer and use it in GitHub Desktop.

Revisions

  1. marcelcaraciolo created this gist Oct 10, 2012.
    156 changes: 156 additions & 0 deletions friends_recommender.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,156 @@
    #-*-coding: utf-8 -*-

    '''
    This module represents the recommender system for recommending
    new friends based on 'mutual friends'.
    '''
    __author__ = 'Marcel Caraciolo <caraciol@gmail.com>'

    from mrjob.job import MRJob

    def combinations(iterable, r):
    """
    Implementation of itertools combinations method. Re-implemented here because
    of import issues in Amazon Elastic MapReduce. Was just easier to do this than
    bootstrap.
    More info here: http://docs.python.org/library/itertools.html#itertools.combinations
    Input/Output:
    combinations('ABCD', 2) --> AB AC AD BC BD CD
    combinations(range(4), 3) --> 012 013 023 123
    """
    pool = tuple(iterable)
    n = len(pool)
    if r > n:
    return
    indices = range(r)
    yield tuple(pool[i] for i in indices)
    while True:
    for i in reversed(range(r)):
    if indices[i] != i + n - r:
    break
    else:
    return
    indices[i] += 1
    for j in range(i + 1, r):
    indices[j] = indices[j - 1] + 1
    yield tuple(pool[i] for i in indices)


    TOP_N = 10


    class FriendsRecommender(MRJob):

    def steps(self):
    return [self.mr(self.map_input, self.reverse_index),
    self.mr(None, self.count_number_of_friends),
    self.mr(None, self.top_recommendations)]

    def map_input(self, key, line):
    '''
    Compute the reversed index for each connection
    between the source.
    Input (source -> {“friend1”, “friend2”, “friend3”}):
    marcel,jonas,maria,jose,amanda
    Output {[friend1], source, 1,
    {“friend1”, “friend2”, “friend3”}};
    {[friend2], source, 1,
    {“friend1”, “friend2”, “friend3”}};
    '''
    input = line.split(';')
    user_id, item_ids = input[0], input[1:]
    for i in range(len(item_ids)):
    f1 = item_ids[i]
    yield f1, (user_id, 1, item_ids)

    def reverse_index(self, key, values):
    '''
    Compute the cartesian product between among all
    sources that shares common friends.
    Input (friend1 -> (source, 1,
    {“friend1”, “friend2”, “friend3”}};
    marcel, jonas,maria,jose,amanda
    Output {(jonas, maria), 1}, {(maria, jose), 1},
    {(jonas,amanda), 1}, {(maria, amanda),1},
    {(jose, jonas), 1}
    '''
    final_list = []
    for value in values:
    final_list.append(value)

    for item1, item2 in combinations(final_list, 2):
    f1, count1, f1_friends = item1
    f2, count2, f2_friends = item2
    if f1 not in f2_friends:
    yield (f2, f1), (count1)
    if f2 not in f1_friends:
    yield (f1, f2), (count2)

    def count_number_of_friends(self, key, values):
    '''
    Count the number of mutual friends.
    Input {(source, recommended), [count1, count2, ... count n]}
    {(jonas, maria), [1,1,1]},
    {(jose, jonas), [1,1]},
    {(maria, amanda), [1]},
    {(maria,jose), [1]},
    {(jonas,amanda), [1, 1, 1]}
    Output ({friend1, friend2} numberOfMutualFriends):
    (jonas, marcel ) 1
    (fabiola, marcel) 1
    (marcel, patricia) 1
    (marcel, paula) 1
    (carol, marcel) 2
    '''

    user_id, possible_friend = key

    yield (user_id), \
    (possible_friend, sum(values))

    def top_recommendations(self, key, values):
    '''
    Get the TOP N recommendations for user.
    Input ({user_id, [(numberOfMutualFriends, friend),
    (numberOfMutualFriends2, friend)]}):
    [{marcel, [(patricia, 1), (paula, 1)]},
    {carol, [(marcel, 2)]},
    {fabiola, [(marcel,1)]},
    {jonas, [(marcel,1)]}]
    Output ({user_id, [(numberOfMutualFriends, friend),
    (numberOfMutualFriends2, friend)]}):
    Ex: Get the top 3 suggestions.
    [{marcel, [(patricia, 1), (paula, 1)]},
    {carol, [(marcel, 2)]},
    {fabiola, [(marcel,1)]},
    {jonas, [(marcel,1)]}]
    '''
    recommendations = []
    for idx, (item, score) in enumerate(values):
    recommendations.append((item, score))

    yield key, sorted(recommendations, key=lambda k: -k[1])[:TOP_N]


    if __name__ == '__main__':
    FriendsRecommender.run()