{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# K Nearest Neighbors" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import Libraries" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", " \n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.metrics import confusion_matrix, classification_report, accuracy_score" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"data/loans.csv\").round(1)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IncomeCredit_ScoreStatus
086499.0575.70
1106113.7588.01
2100279.1575.90
3113616.9559.01
4135667.3727.11
\n", "
" ], "text/plain": [ " Income Credit_Score Status\n", "0 86499.0 575.7 0\n", "1 106113.7 588.0 1\n", "2 100279.1 575.9 0\n", "3 113616.9 559.0 1\n", "4 135667.3 727.1 1" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run Classifier" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# extract X,y from pandas\n", "X = df.drop(['Status'],axis=1)\n", "y = df['Status']" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# set classifiers for k =3,5\n", "knn3 = KNeighborsClassifier(n_neighbors=3).fit(X,y)\n", "knn5 = KNeighborsClassifier(n_neighbors=5).fit(X,y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Evaluate Classifier" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.928\n" ] } ], "source": [ "y_pred = knn3.predict(X)\n", "print(accuracy_score(y,y_pred))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[474 26]\n", " [ 46 454]]\n" ] } ], "source": [ "print(confusion_matrix(y,y_pred))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.91 0.95 0.93 500\n", " 1 0.95 0.91 0.93 500\n", "\n", " accuracy 0.93 1000\n", " macro avg 0.93 0.93 0.93 1000\n", "weighted avg 0.93 0.93 0.93 1000\n", "\n" ] } ], "source": [ "print(classification_report(y,y_pred))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Predictions" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# define new points for which we want prediction\n", "new_points =[[82000,530],[123000,510],[90000,670],[99000,610]]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "y_pred_3 = knn3.predict(new_points)\n", "y_pred_5 = knn5.predict(new_points)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 1 0 1]\n", "[0 1 0 0]\n" ] } ], "source": [ "# print predictions\n", "print(y_pred_3)\n", "print(y_pred_5)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Point1Point2Point3Point4
KNN=30101
KNN=50100
\n", "
" ], "text/plain": [ " Point1 Point2 Point3 Point4\n", "KNN=3 0 1 0 1\n", "KNN=5 0 1 0 0" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = {'KNN=3': y_pred_3, 'KNN=5': y_pred_5}\n", "pd.DataFrame.from_dict(data, orient='index',\n", " columns=['Point1','Point2','Point3','Point4'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.11" } }, "nbformat": 4, "nbformat_minor": 4 }