Machine Learning – Tutorial 20

Support Vector Machine Intro and Application

# import libs
import numpy as np
from sklearn import preprocessing, neighbors, svm
# cross_validation is depreciated and train_test_split moved into model_selection
from sklearn.model_selection import train_test_split
import pandas as pd

df = pd.read_csv('')
# there are gaps in the data denoted by '?' - these need to be converted to -99999 so the algorythm treats it as an outlier
# drop any useless data - in this case the ID

# define X & y (X for features; y for labels)
# X is everything except 'class'
# In the datafile I had a space after 'class' which caused errors
X = np.array(df.drop(['class'], 1))
y = np.array(df['class'])

# split the data into train and test datasets using train_Test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
#define classifier (clf)
clf = svm.SVC() #swapped out K Nearest neighbors
# fit the classifier, y_train)

accuracy = clf.score(X_test, y_test)


# important the array needs to be 2D so double brackets are needed rather than reshaping the array
#example_measures = np.array([[4,2,1,1,1,2,3,2,1],[4,2,1,2,2,2,3,2,1]])
#prediction = clf.predict(example_measures)

Leave a Reply