oyasai8910 · July 17, 2017 13:09
diff --git a/Recomendation.py b/Recomendation.py
 #!/usr/bin/env python

 from __future__ import print_function
 from pyspark import SparkContext
 from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
 from pyspark.mllib.evaluation import RegressionMetrics
 import math

 if __name__ == "__main__":
    sc = SparkContext(appName="PythonCollaborativeFilteringExample")
    # $example on$
    # Load and parse the data
    data = sc.textFile("preference_not0.csv")

    f = open("outputRecord.txt", "w")
    
    # Splitting the data
    ratings = data.map(lambda l: l.split(','))\
        .map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))

    train, validation, test, adjustment = ratings.randomSplit([6, 2, 2, 0]) # splitting data into testing data, validation data and training data

    train.cache() # caching data for quick optimization
    validation.cache()
    test.cache()
    
    validationForPredict = validation.map(lambda x: (x[0], x[1]))
    actualReformatted = validation.map(lambda x: ((x[0], x[1]), x[2]))

    # Build the recommendation model using Alternating Least Squares
    #rank = 10
    #numIterations = 10
    #model = ALS.train(train, rank, numIterations)

    iterations = [5, 7, 10]
    regularizationParameter = 0.1
    ranks = [10, 12, 15]
    RMSEs = [0, 0, 0, 0, 0, 0, 0, 0, 0]
    err = 0
    tolerance = 0.03

    minRMSE = float('inf')
    bestIteration = -1
    bestRank = -1
    ptr1 = "output \n"

    #validating hyper-parameters
    for rank in ranks:
        for iteration in iterations:
            model = ALS.trainImplicit(train, rank, iteration, lambda_=regularizationParameter)
            predictedRatings = model.predictAll(validationForPredict)
            predictedReformatted = predictedRatings.map(lambda x: ((x[0], x[1]), x[2]))

            predictionAndObservations = (predictedReformatted.join(actualReformatted).map(lambda x: x[1]))
            
            metrics = RegressionMetrics(predictionAndObservations)
            RMSE = metrics.rootMeanSquaredError
            RMSEs[err] = RMSE
            err += 1
            
            #print ("For rank %s and iteration %s, the RMSE is %s") % (rank, iteration, RMSE)
            ptr1 = ptr1 +  "For rank " + str(rank) +  " and iterations " + str(iteration) + " the RMSE is " + str(RMSE) + " \n"
            if RMSE < minRMSE:
                minRMSE = RMSE
                bestIteration = iteration
                bestRank = rank

    #print ("The best model was trained with rank %s and iteration %s") % (bestRank, bestIteration)
    ptr2 = "The best model was trained with rank " + str(bestRank) + " and iteration " + str(bestIteration) + " \n"

    bestModel = ALS.trainImplicit(train, bestRank, iterations=bestIteration, lambda_=regularizationParameter)

    testForPredicting = test.map(lambda x: (x[0], x[1]))
    testReformatted = test.map(lambda x: ((x[0], x[1]), x[2]))

    predictedTest = bestModel.predictAll(testForPredicting)
    predictedTestReformatted = predictedTest.map(lambda x: ((x[0], x[1]), x[2]))

    predictionAndObservationTest = (predictedTestReformatted.join(testReformatted). map(lambda x: x[1]))

    metrics = RegressionMetrics(predictionAndObservationTest)
    testRMSE = metrics.rootMeanSquaredError

    print ("The Model had a RMSE on the test set of " + str(testRMSE))
    ptr3 = "The Model had a RMSE on the test set of " + str(testRMSE)

    f.write(ptr1 + ptr2 + ptr3)

    # Evaluate the model on training data
    #testdata = test.map(lambda p: (p[0], p[1]))
    #predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
    
    # Keep the it if there is already a data, if not put in the data camputed by ALS
    #ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)

    #RMSE = math.sqrt(ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean())
    #print("Root Mean Squared Error = " + str(RMSE))

    # Save and load model
    bestModel.save(sc, "target/tmp/myCollaborativeFilter")
    sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
    # $example off$

    f.close()
	#!/usr/bin/env python

	from __future__ import print_function
	from pyspark import SparkContext
	from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating
	from pyspark.mllib.evaluation import RegressionMetrics
	import math

	if __name__ == "__main__":
	sc = SparkContext(appName="PythonCollaborativeFilteringExample")
	# $example on$
	# Load and parse the data
	data = sc.textFile("preference_not0.csv")

	f = open("outputRecord.txt", "w")

	# Splitting the data
	ratings = data.map(lambda l: l.split(','))\
	.map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2])))

	train, validation, test, adjustment = ratings.randomSplit([6, 2, 2, 0]) # splitting data into testing data, validation data and training data

	train.cache() # caching data for quick optimization
	validation.cache()
	test.cache()

	validationForPredict = validation.map(lambda x: (x[0], x[1]))
	actualReformatted = validation.map(lambda x: ((x[0], x[1]), x[2]))

	# Build the recommendation model using Alternating Least Squares
	#rank = 10
	#numIterations = 10
	#model = ALS.train(train, rank, numIterations)

	iterations = [5, 7, 10]
	regularizationParameter = 0.1
	ranks = [10, 12, 15]
	RMSEs = [0, 0, 0, 0, 0, 0, 0, 0, 0]
	err = 0
	tolerance = 0.03

	minRMSE = float('inf')
	bestIteration = -1
	bestRank = -1
	ptr1 = "output \n"

	#validating hyper-parameters
	for rank in ranks:
	for iteration in iterations:
	model = ALS.trainImplicit(train, rank, iteration, lambda_=regularizationParameter)
	predictedRatings = model.predictAll(validationForPredict)
	predictedReformatted = predictedRatings.map(lambda x: ((x[0], x[1]), x[2]))

	predictionAndObservations = (predictedReformatted.join(actualReformatted).map(lambda x: x[1]))

	metrics = RegressionMetrics(predictionAndObservations)
	RMSE = metrics.rootMeanSquaredError
	RMSEs[err] = RMSE
	err += 1

	#print ("For rank %s and iteration %s, the RMSE is %s") % (rank, iteration, RMSE)
	ptr1 = ptr1 + "For rank " + str(rank) + " and iterations " + str(iteration) + " the RMSE is " + str(RMSE) + " \n"
	if RMSE < minRMSE:
	minRMSE = RMSE
	bestIteration = iteration
	bestRank = rank

	#print ("The best model was trained with rank %s and iteration %s") % (bestRank, bestIteration)
	ptr2 = "The best model was trained with rank " + str(bestRank) + " and iteration " + str(bestIteration) + " \n"

	bestModel = ALS.trainImplicit(train, bestRank, iterations=bestIteration, lambda_=regularizationParameter)

	testForPredicting = test.map(lambda x: (x[0], x[1]))
	testReformatted = test.map(lambda x: ((x[0], x[1]), x[2]))

	predictedTest = bestModel.predictAll(testForPredicting)
	predictedTestReformatted = predictedTest.map(lambda x: ((x[0], x[1]), x[2]))

	predictionAndObservationTest = (predictedTestReformatted.join(testReformatted). map(lambda x: x[1]))

	metrics = RegressionMetrics(predictionAndObservationTest)
	testRMSE = metrics.rootMeanSquaredError

	print ("The Model had a RMSE on the test set of " + str(testRMSE))
	ptr3 = "The Model had a RMSE on the test set of " + str(testRMSE)

	f.write(ptr1 + ptr2 + ptr3)

	# Evaluate the model on training data
	#testdata = test.map(lambda p: (p[0], p[1]))
	#predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))

	# Keep the it if there is already a data, if not put in the data camputed by ALS
	#ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)

	#RMSE = math.sqrt(ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean())
	#print("Root Mean Squared Error = " + str(RMSE))

	# Save and load model
	bestModel.save(sc, "target/tmp/myCollaborativeFilter")
	sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
	# $example off$

	f.close()
No results found