K Nearest Neighbors

Import Libraries

import pandas as pd
import numpy as np
 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

Load Data

df = pd.read_csv("data/loans.csv").round(1)
df.head()
Income Credit_Score Status
0 86499.0 575.7 0
1 106113.7 588.0 1
2 100279.1 575.9 0
3 113616.9 559.0 1
4 135667.3 727.1 1

Run Classifier

# extract X,y from pandas
X = df.drop(['Status'],axis=1)
y = df['Status']
# set classifiers for k =3,5
knn3 = KNeighborsClassifier(n_neighbors=3).fit(X,y)
knn5 = KNeighborsClassifier(n_neighbors=5).fit(X,y)

Evaluate Classifier

y_pred = knn3.predict(X)
print(accuracy_score(y,y_pred))
0.928
print(confusion_matrix(y,y_pred))
[[474  26]
 [ 46 454]]
print(classification_report(y,y_pred))
              precision    recall  f1-score   support

           0       0.91      0.95      0.93       500
           1       0.95      0.91      0.93       500

    accuracy                           0.93      1000
   macro avg       0.93      0.93      0.93      1000
weighted avg       0.93      0.93      0.93      1000

Predictions

# define new points for which we want prediction
new_points =[[82000,530],[123000,510],[90000,670],[99000,610]]
y_pred_3 = knn3.predict(new_points)
y_pred_5 = knn5.predict(new_points)
# print predictions
print(y_pred_3)
print(y_pred_5)
[0 1 0 1]
[0 1 0 0]
data = {'KNN=3': y_pred_3, 'KNN=5': y_pred_5}
pd.DataFrame.from_dict(data, orient='index',
                       columns=['Point1','Point2','Point3','Point4'])
Point1 Point2 Point3 Point4
KNN=3 0 1 0 1
KNN=5 0 1 0 0