Applying K Nearest Neighbors to Data
https://pythonprogramming.net/k-nearest-neighbors-application-machine-learning-tutorial/
Data links:-
# import libs import numpy as np from sklearn import preprocessing, neighbors # cross_validation is depreciated and train_test_split moved into model_selection from sklearn.model_selection import train_test_split import pandas as pd df = pd.read_csv('breast-cancer-wisconsin.data') # there are gaps in the data denoted by '?' - these need to be converted to -99999 so the algorythm treats it as an outlier df.replace('?',-99999,inplace=True) # drop any useless data - in this case the ID df.drop('id',1,inplace=True) #print(df) # define X & y (X for features; y for labels) # X is everything except 'class' # In the datafile I had a space after 'class' which caused errors X = np.array(df.drop(['class'], 1)) y = np.array(df['class']) # split the data into train and test datasets using train_Test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) #define classifier (clf) clf = neighbors.KNeighborsClassifier() # fit the classifier clf.fit(X_train, y_train) accuracy = clf.score(X_test, y_test) print(accuracy) # important the array needs to be 2D so double brackets are needed rather than reshaping the array example_measures = np.array([[4,2,1,1,1,2,3,2,1],[4,2,1,2,2,2,3,2,1]]) prediction = clf.predict(example_measures) print(prediction)