{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# K Nearest Neighbors"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Import Libraries"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
" \n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.metrics import confusion_matrix, classification_report, accuracy_score"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"data/loans.csv\").round(1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Income | \n",
" Credit_Score | \n",
" Status | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 86499.0 | \n",
" 575.7 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 106113.7 | \n",
" 588.0 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 100279.1 | \n",
" 575.9 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 113616.9 | \n",
" 559.0 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 135667.3 | \n",
" 727.1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Income Credit_Score Status\n",
"0 86499.0 575.7 0\n",
"1 106113.7 588.0 1\n",
"2 100279.1 575.9 0\n",
"3 113616.9 559.0 1\n",
"4 135667.3 727.1 1"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run Classifier"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# extract X,y from pandas\n",
"X = df.drop(['Status'],axis=1)\n",
"y = df['Status']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# set classifiers for k =3,5\n",
"knn3 = KNeighborsClassifier(n_neighbors=3).fit(X,y)\n",
"knn5 = KNeighborsClassifier(n_neighbors=5).fit(X,y)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Evaluate Classifier"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.928\n"
]
}
],
"source": [
"y_pred = knn3.predict(X)\n",
"print(accuracy_score(y,y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[474 26]\n",
" [ 46 454]]\n"
]
}
],
"source": [
"print(confusion_matrix(y,y_pred))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" 0 0.91 0.95 0.93 500\n",
" 1 0.95 0.91 0.93 500\n",
"\n",
" accuracy 0.93 1000\n",
" macro avg 0.93 0.93 0.93 1000\n",
"weighted avg 0.93 0.93 0.93 1000\n",
"\n"
]
}
],
"source": [
"print(classification_report(y,y_pred))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predictions"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# define new points for which we want prediction\n",
"new_points =[[82000,530],[123000,510],[90000,670],[99000,610]]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"y_pred_3 = knn3.predict(new_points)\n",
"y_pred_5 = knn5.predict(new_points)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0 1 0 1]\n",
"[0 1 0 0]\n"
]
}
],
"source": [
"# print predictions\n",
"print(y_pred_3)\n",
"print(y_pred_5)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Point1 | \n",
" Point2 | \n",
" Point3 | \n",
" Point4 | \n",
"
\n",
" \n",
" \n",
" \n",
" KNN=3 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" KNN=5 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Point1 Point2 Point3 Point4\n",
"KNN=3 0 1 0 1\n",
"KNN=5 0 1 0 0"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = {'KNN=3': y_pred_3, 'KNN=5': y_pred_5}\n",
"pd.DataFrame.from_dict(data, orient='index',\n",
" columns=['Point1','Point2','Point3','Point4'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.11"
}
},
"nbformat": 4,
"nbformat_minor": 4
}