Skip to content

Commit

Permalink
Add file
Browse files Browse the repository at this point in the history
  • Loading branch information
susanli2016 committed May 7, 2018
1 parent 2ed697a commit fb8ec27
Showing 1 changed file with 214 additions and 0 deletions.
214 changes: 214 additions & 0 deletions Feature Importance.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"root\n",
" |-- A: integer (nullable = true)\n",
" |-- B: integer (nullable = true)\n",
" |-- C: double (nullable = true)\n",
" |-- D: integer (nullable = true)\n",
" |-- Spoiled: double (nullable = true)\n",
"\n"
]
}
],
"source": [
"from pyspark.sql import SparkSession\n",
"spark = SparkSession.builder.appName('dog_food_tree').getOrCreate()\n",
"df = spark.read.csv('dog_food.csv', inferSchema=True, header=True)\n",
"df.printSchema()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---+---+----+---+-------+\n",
"| A| B| C| D|Spoiled|\n",
"+---+---+----+---+-------+\n",
"| 4| 2|12.0| 3| 1.0|\n",
"| 5| 6|12.0| 7| 1.0|\n",
"+---+---+----+---+-------+\n",
"only showing top 2 rows\n",
"\n"
]
}
],
"source": [
"df.show(2)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from pyspark.ml.feature import VectorAssembler\n",
"\n",
"assembler = VectorAssembler(inputCols = ['A', 'B', 'C', 'D'], outputCol = 'features')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"output = assembler.transform(df)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---+---+----+---+-------+------------------+\n",
"| A| B| C| D|Spoiled| features|\n",
"+---+---+----+---+-------+------------------+\n",
"| 4| 2|12.0| 3| 1.0|[4.0,2.0,12.0,3.0]|\n",
"| 5| 6|12.0| 7| 1.0|[5.0,6.0,12.0,7.0]|\n",
"| 6| 2|13.0| 6| 1.0|[6.0,2.0,13.0,6.0]|\n",
"+---+---+----+---+-------+------------------+\n",
"only showing top 3 rows\n",
"\n"
]
}
],
"source": [
"output.show(3)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from pyspark.ml.classification import RandomForestClassifier\n",
"\n",
"rf = RandomForestClassifier(labelCol = 'Spoiled', featuresCol = 'features')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"final_df = output.select('features', 'Spoiled')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+------------------+-------+\n",
"| features|Spoiled|\n",
"+------------------+-------+\n",
"|[4.0,2.0,12.0,3.0]| 1.0|\n",
"|[5.0,6.0,12.0,7.0]| 1.0|\n",
"|[6.0,2.0,13.0,6.0]| 1.0|\n",
"+------------------+-------+\n",
"only showing top 3 rows\n",
"\n"
]
}
],
"source": [
"final_df.show(3)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"rf_model = rf.fit(final_df)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Row(features=DenseVector([4.0, 2.0, 12.0, 3.0]), Spoiled=1.0)]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"final_df.take(1)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"SparseVector(4, {0: 0.0212, 1: 0.0166, 2: 0.9373, 3: 0.025})"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rf_model.featureImportances"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "conda_python3",
"language": "python",
"name": "conda_python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit fb8ec27

Please sign in to comment.