Skip to content

Commit

Permalink
Add file
Browse files Browse the repository at this point in the history
  • Loading branch information
susanli2016 committed May 7, 2018
1 parent 0037881 commit 5d75027
Showing 1 changed file with 292 additions and 0 deletions.
292 changes: 292 additions & 0 deletions Recommender system.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"root\n",
" |-- movieId: integer (nullable = true)\n",
" |-- rating: double (nullable = true)\n",
" |-- userId: integer (nullable = true)\n",
"\n"
]
}
],
"source": [
"from pyspark.sql import SparkSession\n",
"from pyspark.ml.recommendation import ALS\n",
"from pyspark.ml.evaluation import RegressionEvaluator\n",
"\n",
"spark = SparkSession.builder.appName('recommender').getOrCreate()\n",
"df = spark.read.csv('movielens_ratings.csv', inferSchema= True, header = True)\n",
"df.printSchema()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+------+------+\n",
"|movieId|rating|userId|\n",
"+-------+------+------+\n",
"| 2| 3.0| 0|\n",
"| 3| 1.0| 0|\n",
"| 5| 2.0| 0|\n",
"+-------+------+------+\n",
"only showing top 3 rows\n",
"\n"
]
}
],
"source": [
"df.show(3)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+------------------+------------------+------------------+\n",
"|summary| movieId| rating| userId|\n",
"+-------+------------------+------------------+------------------+\n",
"| count| 1501| 1501| 1501|\n",
"| mean| 49.40572951365756|1.7741505662891406|14.383744170552964|\n",
"| stddev|28.937034065088994| 1.187276166124803| 8.591040424293272|\n",
"| min| 0| 1.0| 0|\n",
"| max| 99| 5.0| 29|\n",
"+-------+------------------+------------------+------------------+\n",
"\n"
]
}
],
"source": [
"df.describe().show()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"train, test = df.randomSplit([0.8, 0.2])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"als = ALS(maxIter=5, regParam=0.01, userCol='userId', itemCol='movieId', ratingCol='rating')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+-------+------+------+-----------+\n",
"|movieId|rating|userId| prediction|\n",
"+-------+------+------+-----------+\n",
"| 31| 1.0| 26| -2.5238004|\n",
"| 31| 1.0| 27|-0.59501255|\n",
"| 31| 1.0| 4| 3.137197|\n",
"| 85| 1.0| 28| -0.1683234|\n",
"| 85| 1.0| 13| 2.2037606|\n",
"| 85| 5.0| 8| 4.343044|\n",
"| 85| 1.0| 29| 1.5260103|\n",
"| 65| 1.0| 28| 3.4493313|\n",
"| 53| 3.0| 13| 2.631197|\n",
"| 53| 1.0| 25| -2.3101962|\n",
"| 78| 1.0| 13| 0.54879403|\n",
"| 78| 1.0| 11| 0.4418241|\n",
"| 81| 5.0| 28| 0.8307642|\n",
"| 81| 1.0| 1| -1.0092545|\n",
"| 81| 1.0| 6| 2.4090357|\n",
"| 81| 1.0| 19| 0.13363218|\n",
"| 81| 1.0| 15| 0.5015665|\n",
"| 28| 1.0| 23| -0.2624761|\n",
"| 28| 1.0| 2| 1.4344041|\n",
"| 76| 1.0| 1| 1.9119977|\n",
"+-------+------+------+-----------+\n",
"only showing top 20 rows\n",
"\n"
]
}
],
"source": [
"model = als.fit(train)\n",
"predictions = model.transform(test)\n",
"predictions.show()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE: 1.8124486699552562\n"
]
}
],
"source": [
"evaluator = RegressionEvaluator(metricName = 'rmse', labelCol = 'rating', predictionCol = 'prediction')\n",
"rmse = evaluator.evaluate(predictions)\n",
"print('RMSE:', rmse)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+------+-------+\n",
"|userId|movieId|\n",
"+------+-------+\n",
"| 12| 4|\n",
"| 12| 18|\n",
"| 12| 22|\n",
"| 12| 35|\n",
"| 12| 38|\n",
"| 12| 41|\n",
"| 12| 45|\n",
"| 12| 63|\n",
"| 12| 79|\n",
"| 12| 83|\n",
"| 12| 95|\n",
"| 12| 96|\n",
"+------+-------+\n",
"\n"
]
}
],
"source": [
"this_user = test.filter(test['userId'] == 12).select('userId', 'movieId')\n",
"this_user.show()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+------+-------+----------+\n",
"|userId|movieId|prediction|\n",
"+------+-------+----------+\n",
"| 12| 22| 1.6517887|\n",
"| 12| 96| 0.1308065|\n",
"| 12| 41| 1.4067035|\n",
"| 12| 35| 0.7640405|\n",
"| 12| 4|-1.1053085|\n",
"| 12| 63| 3.851338|\n",
"| 12| 45|0.70455414|\n",
"| 12| 38| 2.8361285|\n",
"| 12| 95| 0.9426958|\n",
"| 12| 83| 0.6145076|\n",
"| 12| 79| 1.3491223|\n",
"| 12| 18| -0.656619|\n",
"+------+-------+----------+\n",
"\n"
]
}
],
"source": [
"recommendation_this_user = model.transform(this_user)\n",
"recommendation_this_user.show()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+------+-------+----------+\n",
"|userId|movieId|prediction|\n",
"+------+-------+----------+\n",
"| 12| 63| 3.851338|\n",
"| 12| 38| 2.8361285|\n",
"| 12| 22| 1.6517887|\n",
"| 12| 41| 1.4067035|\n",
"| 12| 79| 1.3491223|\n",
"| 12| 95| 0.9426958|\n",
"| 12| 35| 0.7640405|\n",
"| 12| 45|0.70455414|\n",
"| 12| 83| 0.6145076|\n",
"| 12| 96| 0.1308065|\n",
"| 12| 18| -0.656619|\n",
"| 12| 4|-1.1053085|\n",
"+------+-------+----------+\n",
"\n"
]
}
],
"source": [
"recommendation_this_user.orderBy('prediction', ascending=False).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "conda_python3",
"language": "python",
"name": "conda_python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 5d75027

Please sign in to comment.