Support Vector Machine Intro and Application
https://pythonprogramming.net/support-vector-machine-intro-machine-learning-tutorial/
# import libs import numpy as np from sklearn import preprocessing, neighbors, svm # cross_validation is depreciated and train_test_split moved into model_selection from sklearn.model_selection import train_test_split import pandas as pd df = pd.read_csv('breast-cancer-wisconsin.data') # there are gaps in the data denoted by '?' - these need to be converted to -99999 so the algorythm treats it as an outlier df.replace('?',-99999,inplace=True) # drop any useless data - in this case the ID df.drop('id',1,inplace=True) #print(df) # define X & y (X for features; y for labels) # X is everything except 'class' # In the datafile I had a space after 'class' which caused errors X = np.array(df.drop(['class'], 1)) y = np.array(df['class']) # split the data into train and test datasets using train_Test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) #define classifier (clf) clf = svm.SVC() #swapped out K Nearest neighbors # fit the classifier clf.fit(X_train, y_train) accuracy = clf.score(X_test, y_test) print(accuracy) # important the array needs to be 2D so double brackets are needed rather than reshaping the array #example_measures = np.array([[4,2,1,1,1,2,3,2,1],[4,2,1,2,2,2,3,2,1]]) #prediction = clf.predict(example_measures) #print(prediction)