Support Vector Machines

Machine Learning – Tutorial 28

Visualization and Predicting with our Custom SVM

https://pythonprogramming.net/predictions-svm-machine-learning-tutorial/

 

import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
style.use('ggplot')

# build SVM class

class Support_Vector_Machine:
    # The __init__ method of a class is one that runs whenever an object is created with the class
    # calling self in the class allows sharing of variables across the class, so is included in all function defs
    def __init__(self, visualisation=True):
        # sets visualisations to what ever the user specifies (defaults to True)
        self.visualisation = visualisation
        # defines colours for the two states 1 & -1
        self.colors = {1:'r', -1:'b'}
        # sets some standards for the graphs
        if self.visualisation:
            self.fig = plt.figure()
            self.ax = self.fig.add_subplot(1,1,1)
    # train
    def fit(self, data):
        # set up access to the data that's passed when the function is called
        self.data = data
         # { ||w||: [w,b] }
        opt_dict = {}
        #
        transforms = [[1,1],
                      [-1,1],
                      [-1,-1],
                      [1,-1]]
        # finding values to work with for our ranges.
        all_data = [] # set up a placeholder for the values
        # for loop to step through data and append it to all_data (list of values)
        for yi in self.data:
            for featureset in self.data[yi]:
                for feature in featureset:
                    all_data.append(feature)
        # next define the max and min value in list
        self.max_feature_value = max(all_data)
        self.min_feature_value = min(all_data)
        # free up memory once we've got the values
        all_data=None
        # define step size for optimisation Big through to small
        step_sizes = [self.max_feature_value * 0.1,
                      self.max_feature_value * 0.01,
                      # starts getting very high cost after this.
                      self.max_feature_value * 0.001]

        # extremely expensive
        b_range_multiple = 5
        b_multiple = 5
        # first element in vector w
        latest_optimum = self.max_feature_value*10

        ## Begin the stepping process
        for step in step_sizes:
            w = np.array([latest_optimum,latest_optimum])
            # we can do this because convex
            optimized = False
            while not optimized:
                # we're not optimising b as much as w (not needed)
                for b in np.arange(-1*(self.max_feature_value*b_range_multiple),
                                   self.max_feature_value*b_range_multiple,
                                   step*b_multiple):
                    for transformation in transforms:
                        w_t = w*transformation
                        found_option = True
                        # weakest link in the SVM fundamentally
                        # SMO attempts to fix this a bit
                        # yi(xi.w+b) >= 1
                        #
                        # #### add a break here later..
                        for i in self.data:
                            for xi in self.data[i]:
                                yi=i
                                if not yi*(np.dot(w_t,xi)+b) >= 1:
                                    found_option = False

                        if found_option:
                            opt_dict[np.linalg.norm(w_t)] = [w_t,b]

                if w[0]<0:
                    optimized = True
                    print('optimised a step')
                else:
                    w = w - step

            # break out of while loop
            # take a list of the magnitudes and sort them
            norms = sorted([n for n in opt_dict]) # sorting lowest to highest
            #||w|| : [w,b]
            opt_choice = opt_dict[norms[0]] # smallest magnitude
            self.w = opt_choice[0] # sets w to first element in the smallest mag
            self.b = opt_choice[1] # sets b to second element in the smallest mag
            latest_optimum = opt_choice[0][0]+step*2  # resetting the opt to the latest

    def predict(self,features):
        # sign( x.w+b )
        classification = np.sign(np.dot(np.array(features),self.w)+self.b)
        if classification !=0 and self.visualisation:
            self.ax.scatter(features[0], features[1], s=100, marker='*', c=self.colors[classification])

        return classification

    def visualise(self):
        #scattering known featuresets using a one line for loop
        [[self.ax.scatter(x[0],x[1],s=100,color=self.colors[i]) for x in data_dict[i]] for i in data_dict]
        # hyperplane = x.w+b
        def hyperplane(x,w,b,v):
            # v = (w.x+b)
            return (-w[0]*x-b+v) / w[1]

        datarange = (self.min_feature_value*0.9,self.max_feature_value*1.1) # gives space on the graph
        hyp_x_min = datarange[0]
        hyp_x_max = datarange[1]

        # w.x + b = 1
        # pos sv hyperplane
        psv1 = hyperplane(hyp_x_min, self.w, self.b, 1) # define the ys
        psv2 = hyperplane(hyp_x_max, self.w, self.b, 1) # define the ys
        self.ax.plot([hyp_x_min,hyp_x_max], [psv1,psv2], "k") # plot xs, ys then colour k=black g-- = green
                # w.x + b = -1
        # negative sv hyperplane
        nsv1 = hyperplane(hyp_x_min, self.w, self.b, -1)
        nsv2 = hyperplane(hyp_x_max, self.w, self.b, -1)
        self.ax.plot([hyp_x_min,hyp_x_max], [nsv1,nsv2], "k")

        # w.x + b = 0
        # decision
        db1 = hyperplane(hyp_x_min, self.w, self.b, 0)
        db2 = hyperplane(hyp_x_max, self.w, self.b, 0)
        self.ax.plot([hyp_x_min,hyp_x_max], [db1,db2], "g--")

        plt.show()

# define data dictionary
data_dict = {-1:np.array([[1,7],
                          [2,8],
                          [3,8],]),

             1:np.array([[5,1],
                         [6,-1],
                         [7,3],])}

svm = Support_Vector_Machine()
svm.fit(data=data_dict)
predict_us = [[0,10],
              [1,3],
              [3,4],
              [3,5],
              [5,5],
              [5,6],
              [6,-5],
              [5,8]]

for p in predict_us:
    svm.predict(p)

svm.visualise()

Machine Learning – Tutorial 27

Support Vector Machine Optimization in Python part 2

https://pythonprogramming.net/svm-optimization-python-2-machine-learning-tutorial/

 

import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
style.use('ggplot')

# build SVM class

class Support_Vector_Machine:
    # The __init__ method of a class is one that runs whenever an object is created with the class
    # calling self in the class allows sharing of variables across the class, so is included in all function defs
    def __init__(self, visualisation=True):
        # sets visualisations to what ever the user specifies (defaults to True)
        self.visualisation = visualisation
        # defines colours for the two states 1 &amp; -1
        self.colors = {1:'r', -1:'b'}
        # sets some standards for the graphs
        if self.visualisation:
            self.fig = plt.figure()
            self.ax = self.fig.add_subplot(1,1,1)
    # train
    def fit(self, data):
        # set up access to the data that's passed when the function is called
        self.data = data
         # { ||w||: [w,b] }
        opt_dict = {}
        #
        transforms = [[1,1],
                      [-1,1],
                      [-1,-1],
                      [1,-1]]
        # finding values to work with for our ranges.
        all_data = [] # set up a placeholder for the values
        # for loop to step through data and append it to all_data (list of values)
        for yi in self.data:
            for featureset in self.data[yi]:
                for feature in featureset:
                    all_data.append(feature)
        # next define the max and min value in list
        self.max_feature_value = max(all_data)
        self.min_feature_value = min(all_data)
        # free up memory once we've got the values
        all_data=None
        # define step size for optimisation Big through to small
        step_sizes = [self.max_feature_value * 0.1,
                      self.max_feature_value * 0.01,
                      # starts getting very high cost after this.
                      self.max_feature_value * 0.001]

        # extremely expensive
        b_range_multiple = 5
        b_multiple = 5
        # first element in vector w
        latest_optimum = self.max_feature_value*10

        ## Begin the stepping process
        for step in step_sizes:
            w = np.array([latest_optimum,latest_optimum])
            # we can do this because convex
            optimized = False
            while not optimized:
                # we're not optimising b as much as w (not needed)
                for b in np.arange(-1*(self.max_feature_value*b_range_multiple),
                                   self.max_feature_value*b_range_multiple,
                                   step*b_multiple):
                    for transformation in transforms:
                        w_t = w*transformation
                        found_option = True
                        # weakest link in the SVM fundamentally
                        # SMO attempts to fix this a bit
                        # yi(xi.w+b) >= 1
                        # 
                        # #### add a break here later..
                        for i in self.data:
                            for xi in self.data[i]:
                                yi=i
                                if not yi*(np.dot(w_t,xi)+b) >= 1:
                                    found_option = False
                                    
                        if found_option:
                            opt_dict[np.linalg.norm(w_t)] = [w_t,b]
                                
                if w[0]<0:
                    optimized = True
                    print('optimised a step')
                else:
                    w = w - step

            # break out of while loop  
            # take a list of the magnitudes and sort them          
            norms = sorted([n for n in opt_dict]) # sorting lowest to highest
            #||w|| : [w,b]
            opt_choice = opt_dict[norms[0]] # smallest magnitude
            self.w = opt_choice[0] # sets w to first element in the smallest mag
            self.b = opt_choice[1] # sets b to second element in the smallest mag
            latest_optimum = opt_choice[0][0]+step*2  # resetting the opt to the latest               

    def predict(self,features):
        # sign( x.w+b )
        classification = np.sign(np.dot(np.array(features),self.w)+self.b)

        return classification

# define data dictionary
data_dict = {-1:np.array([[1,7],
                          [2,8],
                          [3,8],]),

             1:np.array([[5,1],
                         [6,-1],
                         [7,3],])}

Machine Learning – Tutorial 26

Support Vector Machine Optimization in Python

https://pythonprogramming.net/svm-optimization-python-machine-learning-tutorial/

More resources:-

 

import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
style.use('ggplot')

# build SVM class

class Support_Vector_Machine:
    # The __init__ method of a class is one that runs whenever an object is created with the class
    # calling self in the class allows sharing of variables across the class, so is included in all function defs
    def __init__(self, visualisation=True):
        # sets visualisations to what ever the user specifies (defaults to True)
        self.visualisation = visualisation
        # defines colours for the two states 1 &amp; -1
        self.colors = {1:'r', -1:'b'}
        # sets some standards for the graphs
        if self.visualisation:
            self.fig = plt.figure()
            self.ax = self.fig.add_subplot(1,1,1)
    # train
    def fit(self, data):
        # set up access to the data that's passed when the function is called
        self.data = data
         # { ||w||: [w,b] }
        opt_dict = {}
        # 
        transforms = [[1,1],
                      [-1,1],
                      [-1,-1],
                      [1,-1]]
        # finding values to work with for our ranges.
        all_data = [] # set up a placeholder for the values
        # for loop to step through data and append it to all_data (list of values)
        for yi in self.data:
            for featureset in self.data[yi]:
                for feature in featureset:
                    all_data.append(feature)
        # next define the max and min value in list
        self.max_feature_value = max(all_data)
        self.min_feature_value = min(all_data)
        # free up memory once we've got the values
        all_data=None        
        # define step size for optimisation Big through to small
        step_sizes = [self.max_feature_value * 0.1,
                      self.max_feature_value * 0.01,
                      # starts getting very high cost after this.
                      self.max_feature_value * 0.001]      

        # extremely expensive
        b_range_multiple = 5
        b_multiple = 5
        # first element in vector w
        latest_optimum = self.max_feature_value*10

        ## Begin the stepping process
        for step in step_sizes:
            w = np.array([latest_optimum,latest_optimum])
            # we can do this because convex
            optimized = False
            while not optimized:
                pass
        
    def predict(self,features):
        # sign( x.w+b )
        classification = np.sign(np.dot(np.array(features),self.w)+self.b)

        return classification

# define data dictionary
data_dict = {-1:np.array([[1,7],
                          [2,8],
                          [3,8],]),

             1:np.array([[5,1],
                         [6,-1],
                         [7,3],])}

Machine Learning – Tutorial 25

Beginning SVM from Scratch in Python

https://pythonprogramming.net/svm-in-python-machine-learning-tutorial/

import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
style.use('ggplot')

# build SVM class

class Support_Vector_Machine:
    # The __init__ method of a class is one that runs whenever an object is created with the class
    # calling self in the class allows sharing of variables across the class, so is included in all function defs
    def __init__(self, visualisation=True):
        # sets visualisations to what ever the user specifies (defaults to True)
        self.visualisation = visualisation
        # defines colours for the two states 1 &amp; -1
        self.colors = {1:'r', -1:'b'}
        # sets some standards for the graphs
        if self.visualisation:
            self.fig = plt.figure()
            self.ax = self.fig.add_subplot(1,1,1)
    # train
    def fit(self, data):
        pass

    def predict(self,features):
        # sign( x.w+b )
        classification = np.sign(np.dot(np.array(features),self.w)+self.b)

        return classification

# define data dictionary
data_dict = {-1:np.array([[1,7],
                          [2,8],
                          [3,8],]),

             1:np.array([[5,1],
                         [6,-1],
                         [7,3],])}

Machine Learning – Tutorial 20

Support Vector Machine Intro and Application

https://pythonprogramming.net/support-vector-machine-intro-machine-learning-tutorial/

# import libs
import numpy as np
from sklearn import preprocessing, neighbors, svm
# cross_validation is depreciated and train_test_split moved into model_selection
from sklearn.model_selection import train_test_split
import pandas as pd

df = pd.read_csv('breast-cancer-wisconsin.data')
# there are gaps in the data denoted by '?' - these need to be converted to -99999 so the algorythm treats it as an outlier
df.replace('?',-99999,inplace=True)
# drop any useless data - in this case the ID
df.drop('id',1,inplace=True)
#print(df)

# define X &amp; y (X for features; y for labels)
# X is everything except 'class'
# In the datafile I had a space after 'class' which caused errors
X = np.array(df.drop(['class'], 1))
y = np.array(df['class'])

# split the data into train and test datasets using train_Test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
#define classifier (clf)
clf = svm.SVC() #swapped out K Nearest neighbors
# fit the classifier
clf.fit(X_train, y_train)

accuracy = clf.score(X_test, y_test)

print(accuracy)

# important the array needs to be 2D so double brackets are needed rather than reshaping the array
#example_measures = np.array([[4,2,1,1,1,2,3,2,1],[4,2,1,2,2,2,3,2,1]])
#prediction = clf.predict(example_measures)
#print(prediction)