I was practicing vectorising the baackprop for a basic NN and I tried modifying a code for binary classification which was originally written for multi class classification. Code:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, log_loss
from tqdm import tqdm_notebook
import seaborn as sns
import imageio
import time
from IPython.display import HTML
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import make_blobs
data, labels = make_blobs(n_samples=1000, centers=4, n_features=2, random_state=0)
labels = np.mod(labels_orig, 2)
X_train, X_val, Y_train, Y_val = train_test_split(data, labels, stratify=labels, random_state=0)
Y_train = Y_train.reshape(-1,1)
W1 = np.random.randn(2,2)
W2 = np.random.randn(2,1)
class FF_MultiClass_InputWeightVectorisedEx:
def __init__(self, W1, W2):
self.W1 = W1.copy() #(2,2)
self.W2 = W2.copy() #(2,1)
self.B1 = np.zeros((1,2))
self.B2 = np.zeros((1,1))
def sigmoid(self, X):
return 1.0/(1.0 + np.exp(-X))
def softmax(self, X):
exps = np.exp(X)
return exps / np.sum(exps, axis=1).reshape(-1,1)
def forward_pass(self, X):
self.A1 = np.matmul(X,self.W1) + self.B1 # (N, 2) * (2, 2) -> (N, 2)
self.H1 = self.sigmoid(self.A1) # (N, 2)
self.A2 = np.matmul(self.H1, self.W2) + self.B2 # (N, 2) * (2, 1) -> (N, 1)
self.H2 = self.softmax(self.A2) # (N, 1)
return self.H2
def grad_sigmoid(self, X):
return X*(1-X)
def grad(self, X, Y):
self.forward_pass(X)
m = X.shape[0]
self.dA2 = self.H2 - Y # (N, 1) - (N, 1) -> (N, 1)
self.dW2 = np.matmul(self.H1.T, self.dA2) # (2, N) * (N, 1) -> (2, 1)
self.dB2 = np.sum(self.dA2, axis=0).reshape(1, -1) # (N, 1) -> (1, 1)
self.dH1 = np.matmul(self.dA2, self.W2.T) # (N, 1) * (1, 2) -> (N, 2)
self.dA1 = np.multiply(self.dH1, self.grad_sigmoid(self.H1)) # (N, 2) .* (N, 2) -> (N, 2)
self.dW1 = np.matmul(X.T, self.dA1) # (2, N) * (N, 2) -> (2, 2)
self.dB1 = np.sum(self.dA1, axis=0).reshape(1, -1) # (N, 2) -> (1, 2)
def fit(self, X, Y, epochs=1, learning_rate=1, display_loss=False):
if display_loss:
loss = {}
for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
self.grad(X, Y) # X -> (N, 2), Y -> (N, 4)
m = X.shape[0]
self.W2 -= learning_rate * (self.dW2/m)
self.B2 -= learning_rate * (self.dB2/m)
self.W1 -= learning_rate * (self.dW1/m)
self.B1 -= learning_rate * (self.dB1/m)
if display_loss:
Y_pred = self.predict(X)
loss[i] = log_loss(Y, Y_pred)
if display_loss:
plt.plot(loss.values())
plt.xlabel('Epochs')
plt.ylabel('Log Loss')
plt.show()
def predict(self, X):
Y_pred = self.forward_pass(X)
return np.array(Y_pred).squeeze()
models_init = [FF_MultiClass_InputWeightVectorisedEx(W1, W2)]
models = []
for idx, model in enumerate(models_init, start=1):
tic = time.time()
ffsn_multi_specific = FF_MultiClass_InputWeightVectorisedEx(W1, W2)
ffsn_multi_specific.fit(X_train,Y_train,epochs=2000,learning_rate=.5,display_loss=True)
models.append(ffsn_multi_specific)
toc = time.time()
print("Time taken by model {}: {}".format(idx, toc-tic))
The error I'm getting is for the display loss
function in the above class which is
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
The original multi class classification display loss function was:
if display_loss:
Y_pred = self.predict(X)
loss[i] = log_loss(np.argmax(Y, axis=1), Y_pred)
My display loss fucntion(based on the multi class one) was(Don't mind the indentation):
if display_loss:
Y_pred = self.predict(X)
loss[i] = log_loss(Y, Y_pred)
As I am new to NN in general, can anyone tell what was the cause of this error and how can it be modififed? Edit: It may be concerning what the loop at the bottom does. Originally this code was to compare different algos of NN for time. However I was concerned with only 1 algo i.e written above. So I modified the loop doing as little change as possible.