Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save kristiyanto/40c886df823e01ed2f038cb8a3c7c9bd to your computer and use it in GitHub Desktop.

Select an option

Save kristiyanto/40c886df823e01ed2f038cb8a3c7c9bd to your computer and use it in GitHub Desktop.
from pyspark.ml import Pipeline
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.feature import IndexToString, StringIndexer
# Model
als = ALS(maxIter=5, regParam=0.01, implicitPrefs=True, seed=777, nonnegative=True, coldStartStrategy='drop',
userCol='user_index', itemCol='context_index', ratingCol='count')
rmse = RegressionEvaluator(metricName='rmse', labelCol='count',
predictionCol='prediction')
mae = RegressionEvaluator(metricName='mae', labelCol='count',
predictionCol='prediction')
# Convert IDs into Index
userid_to_index = StringIndexer(inputCol='user_id', outputCol='user_index', handleInvalid='keep')
context_to_index = StringIndexer(inputCol='context', outputCol='context_index', handleInvalid='keep')
id_to_index = Pipeline(stages=[userid_to_index, context_to_index]).fit(features)
features_indexed = id_to_index.transform(features)
user_mapping = features_indexed.select('user_id', 'user_index').distinct()
context_mapping = features_indexed.select('context', 'context_index').distinct()
# Training and evaluation
training, test = features_indexed.randomSplit([0.8, 0.2])
model = als.fit(training)
predictions = model.transform(test)
rmse_value = rmse.evaluate(predictions)
mae_value = mae.evaluate(predictions)
print('RMSE: ', rmse_value, 'MAE: ', mae_value)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment