# Machine Learning – Tutorial 19

### Final thoughts on K Nearest Neighbors

https://pythonprogramming.net/final-thoughts-knn-machine-learning-tutorial/

```# imports
import numpy as np
from math import sqrt
import warnings
from collections import Counter
import pandas as pd
import random

# define function
def K_nearest_neighbours(data, predict, k=3):
if len(data) >= k:
warnings.warn('K is set to value less than total voting groups!')
distances = []
for group in data:
for features in data[group]:
euclidean_distance = np.linalg.norm(np.array(features)-np.array(predict))
distances.append([euclidean_distance, group])

votes = [i for i in sorted(distances) [:k]]
#print(vote_result, confidence)
return vote_result, confidence
accuracies = []

for i in range(25):

# import data
# there are gaps in the data denoted by '?' - these need to be converted to -99999 so the algorythm treats it as an outlier
df.replace('?',-99999,inplace=True)
# drop any useless data - in this case the ID
df.drop('id',1,inplace=True)
#print(df)
# convert everything in the list to a number
full_data = df.astype(float).values.tolist()
#print(full_data[:5]) # print first 5 rows
random.shuffle(full_data) # no need to define variable again i.e. full_data = random.shuffle(full_data)

test_size = 0.2
train_set = {2:[],4:[]}
test_set = {2:[],4:[]}
train_data = full_data[:-int(test_size*len(full_data))] # slicing the full data set by the test_size
test_data = full_data[-int(test_size*len(full_data)):] # last 20%

for i in train_data:
train_set[i[-1]].append(i[:-1]) # -1 gives the last column
for i in test_data:
test_set[i[-1]].append(i[:-1]) # -1 gives the last column

correct = 0
total = 0

for group in test_set:
for data in test_set[group]:
vote, confidence  = K_nearest_neighbours(train_set,data, k=5)
if group == vote:
correct +=1
#    else:
#        print(confidence)
total +=1

#print('Accuracy', correct/total)
accuracies.append(correct / total)

print((sum(accuracies)/len(accuracies)*100))

```