Welcome Guest ( Log In | Register )

Outline · [ Standard ] · Linear+

Python Need help with my neural network project, 6 hidden layer NN refuses to learn

views
     
TSZenithSkirmisher
post Aug 16 2022, 05:02 PM, updated 4y ago

Getting Started
**
Junior Member
261 posts

Joined: Oct 2018
Hello, I know this might not be the appropriate place to ask this but can someone help me with my neural network code, the outputs are either all the same for all samples or all will converge to the same number when num of outputs=1.

I think there's something wrong with the backprop but I cant figure out what, can anyone advise ?

This is the link to the jupyternotebook: https://drive.google.com/file/d/1W5pJt4mbwn...iew?usp=sharing
weisinx7
post Aug 24 2022, 02:14 PM

Getting Started
**
Junior Member
213 posts

Joined: Oct 2019
Hi, could you reshare the gdrive link? Looks like i can't see anything from it.

Typically the problem will be under fitting, but you may use larger batch size (probably 32 or larger) to let it converge faster.
TSZenithSkirmisher
post Aug 24 2022, 02:38 PM

Getting Started
**
Junior Member
261 posts

Joined: Oct 2018
import numpy as np
from scipy.special import expit as sigmoid
from scipy.special import softmax as sm
import pandas as pd
import math
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from math import sqrt


class NeuralNet:
def __init__(self, num_features, num_hidden1,num_hidden2,num_hidden3,num_hidden4,num_hidden5,num_hidden6 ,alpha, max_epochs, num_output=1):
super().__init__()
self.num_features=num_features # number of input nodes (features)
self.num_hidden1=num_hidden1 # number of hidden nodes for 1st hidden layer
self.num_hidden2=num_hidden2 # number of hidden nodes for 2nd hidden layer
self.num_hidden3=num_hidden3 # number of hidden nodes for 3rd hidden layer
self.num_hidden4=num_hidden4 # number of hidden nodes for 4th hidden layer
self.num_hidden5=num_hidden5 # number of hidden nodes for 5th hidden layer
self.num_hidden6=num_hidden6 # number of hidden nodes for 6th hidden layer
self.alpha=alpha # learning rate
self.max_epochs=max_epochs # maximum number of epochs
self.num_output=num_output # number of output nodes
self.loss = [] #list to store losses per 100 epochs
self.Weights_Input_to_H1=np.random.randn(self.num_hidden1, self.num_features)*(2/sqrt(self.num_features))
self.Bias_Input_to_H1=np.zeros([self.num_hidden1,1])
self.Weights_H1_to_H2=np.random.randn(self.num_hidden2, self.num_hidden1)*(2/sqrt(self.num_hidden1))
self.Bias_H1_to_H2=np.zeros([self.num_hidden2,1])
self.Weights_H2_to_H3=np.random.randn(self.num_hidden3, self.num_hidden2)*(2/sqrt(self.num_hidden2))
self.Bias_H2_to_H3=np.zeros([self.num_hidden3,1])
self.Weights_H3_to_H4=np.random.randn(self.num_hidden4, self.num_hidden3)*(2/sqrt(self.num_hidden3))
self.Bias_H3_to_H4=np.zeros([self.num_hidden4,1])
self.Weights_H4_to_H5=np.random.randn(self.num_hidden5, self.num_hidden4)*(2/sqrt(self.num_hidden4))
self.Bias_H4_to_H5=np.zeros([self.num_hidden5,1])
self.Weights_H5_to_H6=np.random.randn(self.num_hidden6, self.num_hidden5)*(2/sqrt(self.num_hidden5))
self.Bias_H5_to_H6=np.zeros([self.num_hidden6,1])
self.Weights_H6_to_output=np.random.randn(self.num_output, self.num_hidden6)*(2/sqrt(self.num_hidden6))
self.Bias_H6_to_output=np.zeros([self.num_output,1])
self.dWeights_Input_to_H1=np.zeros([self.num_hidden1, self.num_features])
self.dBias_Input_to_H1=np.zeros([self.num_hidden1,1])
self.dWeights_H1_to_H2=np.zeros([self.num_hidden2, self.num_hidden1])
self.dBias_H1_to_H2=np.zeros([self.num_hidden2,1])
self.dWeights_H2_to_H3=np.zeros([self.num_hidden3, self.num_hidden2])
self.dBias_H2_to_H3=np.zeros([self.num_hidden3,1])
self.dWeights_H3_to_H4=np.zeros([self.num_hidden4, self.num_hidden3])
self.dBias_H3_to_H4=np.zeros([self.num_hidden4,1])
self.dWeights_H4_to_H5=np.zeros([self.num_hidden5, self.num_hidden4])
self.dBias_H4_to_H5=np.zeros([self.num_hidden5,1])
self.dWeights_H5_to_H6=np.zeros([self.num_hidden6, self.num_hidden5])
self.dBias_H5_to_H6=np.zeros([self.num_hidden6,1])
self.dWeights_H6_to_output=np.zeros([self.num_output, self.num_hidden6])
self.dBias_H6_to_output=np.zeros([self.num_output,1])




def relU(self,X):
return np.maximum(X, 0)

def deriv(self,X):
X[X>=0]=1
X[X<0]=0

return X



def Softmax_grad(self,X):
return(sm(X,axis=1)*(1-sm(X,axis=1)))

def sigmoid(self,X):
return 1/(1+np.exp(-X))

def softmax(self,x):
e=np.exp(x)
for i in range(e.shape[1]):
e[:,i]=e[:,i]/np.sum(e[:,i])
return e

def derivtanh(self,X):
return 1-((np.tanh(X))**2)







# TODO: complete implementation for forward pass
def forward(self, X):
self.z1=np.dot((self.Weights_Input_to_H1),(X))+self.Bias_Input_to_H1
self.a1=self.relU(self.z1)
self.z2=np.dot((self.Weights_H1_to_H2),(self.a1))+self.Bias_H1_to_H2
self.a2=self.relU(self.z2)
self.z3=np.dot((self.Weights_H2_to_H3),(self.a2))+self.Bias_H2_to_H3
self.a3=self.relU(self.z3)
self.z4=np.dot((self.Weights_H3_to_H4),(self.a3))+self.Bias_H3_to_H4
self.a4=self.relU(self.z4)
self.z5=np.dot((self.Weights_H4_to_H5),(self.a4))+self.Bias_H4_to_H5
self.a5=self.relU(self.z5)
self.z6=np.dot((self.Weights_H5_to_H6),(self.a5))+self.Bias_H5_to_H6
self.a6=self.relU(self.z6)
self.z7=np.dot((self.Weights_H6_to_output),(self.a6))+self.Bias_H6_to_output
self.a7=self.softmax((self.z7))
return self.a7




# TODO: complete implementation for backpropagation
# the following Numpy functions may be useful: np.dot, np.sum, np.tanh, numpy.ndarray.T
def backprop(self, X, t):

self.dWeights_Input_to_H1=np.zeros([self.num_hidden1, self.num_features])
self.dBias_Input_to_H1=np.zeros([self.num_hidden1,1])
self.dWeights_H1_to_H2=np.zeros([self.num_hidden2, self.num_hidden1])
self.dBias_H1_to_H2=np.zeros([self.num_hidden2,1])
self.dWeights_H2_to_H3=np.zeros([self.num_hidden3, self.num_hidden2])
self.dBias_H2_to_H3=np.zeros([self.num_hidden3,1])
self.dWeights_H3_to_H4=np.zeros([self.num_hidden4, self.num_hidden3])
self.dBias_H3_to_H4=np.zeros([self.num_hidden4,1])
self.dWeights_H4_to_H5=np.zeros([self.num_hidden5, self.num_hidden4])
self.dBias_H4_to_H5=np.zeros([self.num_hidden5,1])
self.dWeights_H5_to_H6=np.zeros([self.num_hidden6, self.num_hidden5])
self.dBias_H5_to_H6=np.zeros([self.num_hidden6,1])
self.dWeights_H6_to_output=np.zeros([self.num_output, self.num_hidden6])
self.dBias_H6_to_output=np.zeros([self.num_output,1])
self.dz7=(self.a7.reshape(self.num_output,-1)-t.reshape(self.num_output,-1))
self.dBias_H6_to_output=np.sum(self.dz7,axis=1,keepdims=True)/(X.shape[1])
self.dWeights_H6_to_output=np.dot((self.dz7),self.a6.T)/(X.shape[1])
self.dz6=(np.dot(self.Weights_H6_to_output.T,self.dz7)) * (self.deriv(self.z6))
self.dBias_H5_to_H6=np.sum(self.dz6,axis=1,keepdims=True)/(X.shape[1])
self.dWeights_H5_to_H6=np.dot((self.dz6),(self.a5.T))/(X.shape[1])
self.dz5=(np.dot(self.Weights_H5_to_H6.T,self.dz6)) * (self.deriv(self.z5))
self.dBias_H4_to_H5=np.sum(self.dz5,axis=1,keepdims=True)/(X.shape[1])
self.dWeights_H4_to_H5=np.dot((self.dz5),(self.a4.T))/(X.shape[1])
self.dz4=(np.dot(self.Weights_H4_to_H5.T,self.dz5)) * (self.deriv(self.z4))
self.dBias_H3_to_H4=np.sum(self.dz4,axis=1,keepdims=True)/(X.shape[1])
self.dWeights_H3_to_H4=np.dot((self.dz4),(self.a3.T))/(X.shape[1])
self.dz3=(np.dot(self.Weights_H3_to_H4.T,self.dz4)) * (self.deriv(self.z3))
self.dBias_H2_to_H3=np.sum(self.dz3,axis=1,keepdims=True)/(X.shape[1])
self.dWeights_H2_to_H3=np.dot((self.dz3),(self.a2.T))/(X.shape[1])
self.dz2=(np.dot(self.Weights_H2_to_H3.T,self.dz3)) * (self.deriv(self.z2))
self.dBias_H1_to_H2=np.sum(self.dz2,axis=1,keepdims=True)/(X.shape[1])
self.dWeights_H1_to_H2=np.dot((self.dz2),(self.a1.T))/(X.shape[1])
self.dz1=(np.dot(self.Weights_H1_to_H2.T,self.dz2)) * (self.deriv(self.z1))
self.dBias_Input_to_H1=np.sum(self.dz1,axis=1,keepdims=True)/(X.shape[1])
self.dWeights_Input_to_H1=np.dot((self.dz1),X.T)/(X.shape[1])
















#TODO: complete implementation for fitting data, and change the existing code if needed
def fit(self, x_train_data, y_train_data,x_dev_data,y_dev_data):



for step in range(self.max_epochs):
self.forward(x_train_data)
self.backprop(x_train_data, y_train_data)
#self.MSE=np.mean((self.dz7/2)**2)
self.CCloss=np.mean(log_loss(y_true=y_train_data,y_pred=self.a7,normalize=True))
self.Bias_H6_to_output=self.Bias_H6_to_output-((self.alpha)*(self.dBias_H6_to_output))
self.Weights_H6_to_output=self.Weights_H6_to_output-((self.alpha)*(self.dWeights_H6_to_output))
self.Bias_H5_to_H6=self.Bias_H5_to_H6-((self.alpha)*(self.dBias_H5_to_H6))
self.Weights_H5_to_H6=self.Weights_H5_to_H6-((self.alpha)*(self.dWeights_H5_to_H6))
self.Bias_H4_to_H5=self.Bias_H4_to_H5-((self.alpha)*(self.dBias_H4_to_H5))
self.Weights_H4_to_H5=self.Weights_H4_to_H5-((self.alpha)*(self.dWeights_H4_to_H5))
self.Bias_H3_to_H4=self.Bias_H3_to_H4-((self.alpha)*(self.dBias_H3_to_H4))
self.Weights_H3_to_H4=self.Weights_H3_to_H4-((self.alpha)*(self.dWeights_H3_to_H4))
self.Bias_H2_to_H3=self.Bias_H2_to_H3-((self.alpha)*(self.dBias_H2_to_H3))
self.Weights_H2_to_H3=self.Weights_H2_to_H3-((self.alpha)*(self.dWeights_H2_to_H3))
self.Bias_H1_to_H2=self.Bias_H1_to_H2-((self.alpha)*(self.dBias_H1_to_H2))
self.Weights_H1_to_H2=self.Weights_H1_to_H2-((self.alpha)*(self.dWeights_H1_to_H2))
self.Bias_Input_to_H1=self.Bias_Input_to_H1-((self.alpha)*(self.dBias_Input_to_H1))
self.Weights_Input_to_H1=self.Weights_Input_to_H1-((self.alpha)*(self.dWeights_Input_to_H1))


if step % 100 == 0:
print(f'step: {step}, loss: {self.CCloss:3.150f}')
print(accuracy_score(np.argmax(y_train_data,axis=0),np.argmax(self.a7,axis=0)))
print(accuracy_score(np.argmax(y_dev_data,axis=0),np.argmax(self.forward(x_dev_data),axis=0)))
self.loss.append(self.CCloss)








def predict(self,X,y=None):
self.forward(X)
if(self.num_output>1):
self.a7=np.argmax(self.a7, axis=1)
lin_out1=self.a7
#y_hat=np.where(lin_out1>0.5,1,0)



return lin_out1
TSZenithSkirmisher
post Aug 24 2022, 02:39 PM

Getting Started
**
Junior Member
261 posts

Joined: Oct 2018
QUOTE(weisinx7 @ Aug 24 2022, 02:14 PM)
Hi, could you reshare the gdrive link? Looks like i can't see anything from it.

Typically the problem will be under fitting, but you may use larger batch size (probably 32 or larger) to let it converge faster.
*
weisinx7
post Aug 24 2022, 05:01 PM

Getting Started
**
Junior Member
213 posts

Joined: Oct 2019
QUOTE(ZenithSkirmisher @ Aug 24 2022, 02:39 PM)

*
I think it's better if you can attach the file or share it somewhere else, since that will enable me to run it from my end too. Also probably you can send me the training and testing data too?
TSZenithSkirmisher
post Aug 24 2022, 05:03 PM

Getting Started
**
Junior Member
261 posts

Joined: Oct 2018
QUOTE(weisinx7 @ Aug 24 2022, 05:01 PM)
I think it's better if you can attach the file or share it somewhere else, since that will enable me to run it from my end too. Also probably you can send me the training and testing data too?
*
Run on iris

transpose the data first

 

Change to:
| Lo-Fi Version
0.0145sec    0.65    5 queries    GZIP Disabled
Time is now: 25th December 2025 - 01:08 AM