Perceptron Model (PLA)

In this note I used the homework1’s dataset from Coursera course: Machine Learning Foundations

First I import the data by Pandas.read_csv funciton and spilt the dataset by train_test_split, which can split the dataset to train / test data with random sorting.
Then I’m going to implemnt two perceptron models: In the first perceptron I use sklearn.line_model.Perceptron to train a Perceptron then evaluate the accuracy. In the second preceptron, I do the perceptron manually by scratching its graph y = W’X, and evaluate the accuracy.
Finally, the average of iteration of perceptron convergence is counted with the operation time, to check the performance of our manually perceptron model.

Import Data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
column_names = ['x1', 'x2', 'x3', 'x4', 'y']
data = pd.read_csv('hw1_15_train.dat', sep="\s+", header = None, names = column_names)
data.head()

x1 x2 x3 x4 y
0 0.97681 0.107230 0.64385 0.29556 1
1 0.67194 0.241800 0.83075 0.42741 1
2 0.20619 0.233210 0.81004 0.98691 1
3 0.51583 0.055814 0.92274 0.75797 1
4 0.70893 0.108360 0.33951 0.77058 1

Note: if don’t use sep=”\s+”, the result will be:

data2 = pd.read_csv('hw1_15_train.dat', names = column_names)
data2.head()

x1 x2 x3 x4 y
0 0.97681 0.10723 0.64385 0.29556\t1 NaN NaN NaN NaN
1 0.67194 0.2418 0.83075 0.42741\t1 NaN NaN NaN NaN
2 0.20619 0.23321 0.81004 0.98691\t1 NaN NaN NaN NaN
3 0.51583 0.055814 0.92274 0.75797\t1 NaN NaN NaN NaN
4 0.70893 0.10836 0.33951 0.77058\t1 NaN NaN NaN NaN

Perceptron 1- sklearn. line_model

#split data
from sklearn.model_selection import train_test_split

X=data[['x1', 'x2', 'x3', 'x4']]
y=data['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=101)
X_train.head()

x1 x2 x3 x4
206 0.83858 0.81299 0.95404 0.624720
318 0.33281 0.83714 0.21334 0.275350
202 0.54582 0.79468 0.18279 0.048142
314 0.76590 0.28888 0.61728 0.165700
20 0.71073 0.29015 0.15557 0.705880
X_test.head()

x1 x2 x3 x4
38 0.91059 0.18045 0.089421 0.59719
387 0.50468 0.99699 0.751360 0.51681
270 0.51718 0.67211 0.708280 0.31218
181 0.32174 0.85545 0.713780 0.91737
195 0.61368 0.29695 0.357480 0.84100
len(y_train)
240
from sklearn.linear_model import Perceptron
pct=Perceptron(max_iter=100,eta0=0.001,random_state=0)#iterative, speed of learning, random resort of training data
pct.fit(X_train, y_train)

y_pred=pct.predict(X_test)
pct.score(X_train,y_train)
from sklearn.metrics import accuracy_score
print ('Accuracy:%.2f' %accuracy_score(y_test,y_pred))
Accuracy:1.00

Perceptron 2 - Manually

Here we try to construct the Preceptron model as a Perceptron Class to make our model have the same form of the sklearn.linear_model perceptron model. When Perceptron class is called, the constructor will import the dataset, split it by space. Because the data might not be linear seperatable. When user called Perceptron.train(), the random seed, learning rate and max_iteration is specified, to avoid unstoppable looping.

from sklearn.model_selection import train_test_split
from datetime import datetime
import pandas as pd
import numpy as np
import seaborn as sns
import random


class Perceptron(object):
def __init__(self, dataset):

self.data = pd.read_csv(dataset, sep="\s+", header = None)
self.num_of_datas = self.data.shape[0]
self.num_of_features = self.data.shape[1] -1
self.w = np.zeros(self.num_of_features)
self.t = 0


def train(self, random_seed = -1, learning_speed = 1, max_iteration = 10000):

X=self.data[self.data.columns[0:self.num_of_features]]
y=self.data[self.num_of_features]

if random_seed > -1:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0, random_state = random_seed)
else:
X_train = X
y_train = y
#Note: if we use Preceptron model from sklean, we don't have to insert x0 = 1 lines
#DataFrameName.insert(loc, column, value, allow_duplicates = False)
X_train.insert(0, "x0", np.ones(len(y_train)), False)
X_train.head()

def sign_positive(num):
if num > 0: return 1
else: return -1

w = np.zeros(self.num_of_features +1)
for t in range(0, max_iteration):
for n in range(0,len(X_train)):
if sign_positive(np.dot(w, X_train.iloc[n])) != y_train.iloc[n]:
w = w + learning_speed * y_train.iloc[n] * X_train.iloc[n]
# print("iteration:", t, "index:", n, "mistake on:", X_train.iloc[n].name)
break
elif n == len(X_train)-1:
# print("no mistake for iteration:",t)
self.t = t
return
self.t = t
return

# if __name__ == '__main__':

Q1. Implement PLA by visiting examples in fixed, pre-determined random cycles throughout the algorithm. Run the algorithm on the data set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average number of updates before the algorithm halts?

pct = Perceptron('hw1_15_train.dat')
tick1 = datetime.now()
_sum = 0
repeat = 2000
for i in range(repeat):
pct.train(random.randint(1,60000))
_sum += pct.t
# print(pct.t)
tick2 = datetime.now()
tdiff = tick2 - tick1
print("average converge iteration:",_sum / repeat, "for", repeat, "time perceptron in", tdiff.total_seconds(), "secs.")
average converge iteration: 65.738 for 2000 time perceptron in 404.713126 secs.

Q2. Implement PLA by visiting examples in fixed, pre-determined random cycles throughout the algorithm with learning speed = 0.5. Run the algorithm on the data set. Please repeat your experiment for 2000 times, each with a different random seed. What is the average number of updates before the algorithm halts?

pct = Perceptron('hw1_15_train.dat')
tick1 = datetime.now()
_sum = 0
repeat = 2000
for i in range(repeat):
pct.train(random.randint(1,60000), 0.5)
_sum += pct.t
# print(pct.t)
tick2 = datetime.now()
tdiff = tick2 - tick1
print("average converge iteration:",_sum / repeat, "for", repeat, "time perceptron in", tdiff.total_seconds(), "secs.")
average converge iteration: 64.804 for 2000 time perceptron in 417.867396 secs.