2

I have a simple feedforward neural network with 2 input neurons (and 1 bias neuron), 4 hidden neurons (and 1 bias neuron), and one output neuron. The feedforward mechanism seems to be working fine, but I have trouble fully understanding how to implement the backpropagation algorithm.

There are 3 classes :

  • Neural::Net ; builds the network, feeds forward input values (no backpropagation for the moment)
  • Neural::Neuron ; has characteristics of the neuron (index, output, weight etc)
  • Neural::Connection ; a structure-like class that randomizes the weights and hold the output, delta weight etc..

Now to make things clear, I take calculus class so I understand a few notions although this is quite advanced but I still want to make it work.

The transfer function is a logistic function. The weights of the synapses are "attached" to the neuron outputting the value.

This is my attempt at a back propagation function:

void Net::backPropagate(const vector<double>& targetVals) { Layer& outputLayer = myLayers.back(); assert(targetVals.size() == outputLayer.size()); cout << "good2" << endl; // Starting with the output layer for (unsigned int i = 0; i < outputLayer.size(); ++i) { // Traversing output layer double output = outputLayer[i].getOutput(); cout << "good3" << endl; double error = output * (1 - output) * (pow(targetVals[i] - output,2)); cout << "good4" << endl; outputLayer[i].setError(error); // Calculating error double newWeight = outputLayer[i].getWeight(); newWeight += (error * outputLayer[i].getOutput()); outputLayer[i].setWeight(newWeight); // Setting new weight cout << "good5" << endl; } for (unsigned int i = myLayers.size() - 2; i > 0; --i) { // Traversing hidden layers all the way to input layer Layer& currentLayer = myLayers[i]; Layer& nextLayer = myLayers[i + 1]; for (unsigned int j = 0; j < currentLayer.size(); ++j) { // Traversing current layer const double& output = currentLayer[j].getOutput(); double subSum = 0.0; // Initializing subsum for (unsigned int k = 0; k < nextLayer.size(); ++k) { // Traversing next layer subSum += pow(nextLayer[k].getError() * currentLayer[j].getWeight(),2); // Getting their backpropagated error and weight } double error = output*(1 - output)*(subSum); currentLayer[j].setError(error); double newWeight = currentLayer[j].getWeight(); newWeight += error * output; currentLayer[j].setWeight(newWeight); } } 

I tried to train my network to:

  • Input {1,1} -> Output {0}
  • Input {0,0} -> Output {1}

But the outputs for both are very close to 1 (~0.998) no matter how many times I train it so obviously something is wrong.

Here is the full code:

// STL_Practice.cpp : Defines the entry point for the console application. // #include <iostream> #include <cassert> #include <cstdlib> #include <vector> #include <time.h> #include "ConsoleColor.hpp" using namespace std; namespace Neural { class Neuron; typedef vector<Neuron> Layer; // ******************** Class: Connection ******************** // class Connection { public: Connection(); void setOutput(const double& outputVal) { myOutputVal = outputVal; } void setWeight(const double& weight) { myDeltaWeight = myWeight- weight; myWeight = weight; } double getOutput(void) const { return myOutputVal; } double getWeight(void) const { return myWeight; } private: static double randomizeWeight(void) { return rand() / double(RAND_MAX); } double myOutputVal; double myWeight; double myDeltaWeight; }; Connection::Connection() { myOutputVal = 0; myWeight = Connection::randomizeWeight(); myDeltaWeight = myWeight; cout << "Weight: " << myWeight << endl; } // ******************** Class: Neuron ************************ // class Neuron { public: Neuron(); void setIndex(const unsigned int& index) { myIndex = index; } void setOutput(const double& output) { myConnection.setOutput(output); } void setWeight(const double& weight) { myConnection.setWeight(weight); } void setError(const double& error) { myError = error; } unsigned int getIndex(void) const { return myIndex; } double getOutput(void) const { return myConnection.getOutput(); } double getWeight(void) const { return myConnection.getWeight(); } double getError(void) const { return myError; } void feedForward(const Layer& prevLayer); void printOutput(void) const; private: inline static double transfer(const double& weightedSum); Connection myConnection; unsigned int myIndex; double myError; }; Neuron::Neuron() : myIndex(0), myConnection() { } double Neuron::transfer(const double& weightedSum) { return 1 / double((1 + exp(-weightedSum))); } void Neuron::printOutput(void) const { cout << "Neuron " << myIndex << ':' << myConnection.getOutput() << endl; } void Neuron::feedForward(const Layer& prevLayer) { // Weight sum of the previous layer's output values double weightedSum = 0; for (unsigned int i = 0; i < prevLayer.size(); ++i) { weightedSum += prevLayer[i].getOutput()*myConnection.getWeight(); cout << "Neuron " << i << " from prevLayer has output: " << prevLayer[i].getOutput() << endl; cout << "Weighted sum: " << weightedSum << endl; } // Transfer function myConnection.setOutput(Neuron::transfer(weightedSum)); cout << "Transfer: " << myConnection.getOutput() << endl; } // ******************** Class: Net *************************** // class Net { public: Net(const vector<unsigned int>& topology); void setTarget(const vector<double>& targetVals); void feedForward(const vector<double>& inputVals); void backPropagate(const vector<double>& targetVals); void printOutput(void) const; private: vector<Layer> myLayers; }; Net::Net(const vector<unsigned int>& topology) { assert(topology.size() > 0); for (unsigned int i = 0; i < topology.size(); ++i) { // Creating the layers myLayers.push_back(Layer(((i + 1) == topology.size()) ? topology[i] : topology[i] + 1)); // +1 is for bias neuron // Setting each neurons index inside layer for (unsigned int j = 0; j < myLayers[i].size(); ++j) { myLayers[i][j].setIndex(j); } // Console log cout << red; if (i == 0) { cout << "Input layer (" << myLayers[i].size() << " neurons including bias neuron) created." << endl; myLayers[i].back().setOutput(1); } else if (i < topology.size() - 1) { cout << "Hidden layer " << i << " (" << myLayers[i].size() << " neurons including bias neuron) created." << endl; myLayers[i].back().setOutput(1); } else { cout << "Output layer (" << myLayers[i].size() << " neurons) created." << endl; } cout << white; } } void Net::feedForward(const vector<double>& inputVals) { assert(myLayers[0].size() - 1 == inputVals.size()); for (unsigned int i = 0; i < inputVals.size(); ++i) { // Setting input vals to input layer cout << yellow << "Setting input vals..."; myLayers[0][i].setOutput(inputVals[i]); // myLayers[0] is the input layer cout << "myLayer[0][" << i << "].getOutput()==" << myLayers[0][i].getOutput() << white << endl; } for (unsigned int i = 1; i < myLayers.size() - 1; ++i) { // Updating hidden layers for (unsigned int j = 0; j < myLayers[i].size() - 1; ++j) { // - 1 because bias neurons do not have input cout << "myLayers[" << i << "].size()==" << myLayers[i].size() << endl; cout << green << "Updating neuron " << j << " inside layer " << i << white << endl; myLayers[i][j].feedForward(myLayers[i - 1]); // Updating the neurons output based on the neurons of the previous layer } } for (unsigned int i = 0; i < myLayers.back().size(); ++i) { // Updating output layer cout << green << "Updating output neuron " << i << ": " << white << endl; const Layer& prevLayer = myLayers[myLayers.size() - 2]; myLayers.back()[i].feedForward(prevLayer); // Updating the neurons output based on the neurons of the previous layer } } void Net::printOutput(void) const { for (unsigned int i = 0; i < myLayers.back().size(); ++i) { cout << blue; myLayers.back()[i].printOutput(); cout << white; } } void Net::backPropagate(const vector<double>& targetVals) { Layer& outputLayer = myLayers.back(); assert(targetVals.size() == outputLayer.size()); cout << "good2" << endl; // Starting with the output layer for (unsigned int i = 0; i < outputLayer.size(); ++i) { // Traversing output layer double output = outputLayer[i].getOutput(); cout << "good3" << endl; double error = output * (1 - output) * (pow(targetVals[i] - output,2)); cout << "good4" << endl; outputLayer[i].setError(error); // Calculating error double newWeight = outputLayer[i].getWeight(); newWeight += (error * outputLayer[i].getOutput()); outputLayer[i].setWeight(newWeight); // Setting new weight cout << "good5" << endl; } for (unsigned int i = myLayers.size() - 2; i > 0; --i) { // Traversing hidden layers all the way to input layer Layer& currentLayer = myLayers[i]; Layer& nextLayer = myLayers[i + 1]; for (unsigned int j = 0; j < currentLayer.size(); ++j) { // Traversing current layer const double& output = currentLayer[j].getOutput(); double subSum = 0.0; // Initializing subsum for (unsigned int k = 0; k < nextLayer.size(); ++k) { // Traversing next layer subSum += pow(nextLayer[k].getError() * currentLayer[j].getWeight(),2); // Getting their backpropagated error and weight } double error = output*(1 - output)*(subSum); currentLayer[j].setError(error); double newWeight = currentLayer[j].getWeight(); newWeight += error * output; currentLayer[j].setWeight(newWeight); } } } } int main(int argc, char* argv[]) { srand(time(NULL)); vector<unsigned int> myTopology; myTopology.push_back(2); myTopology.push_back(4); myTopology.push_back(1); cout << myTopology.size() << endl << endl; // myTopology == {3, 4, 2 ,1} Neural::Net myNet(myTopology); for (unsigned int i = 0; i < 50; ++i) { myNet.feedForward({1, 1}); myNet.backPropagate({0}); } for (unsigned int i = 0; i < 50; ++i){ myNet.feedForward({0, 0}); myNet.backPropagate({1}); } cout << "Feeding 0,0" << endl; myNet.feedForward({0, 0}); myNet.printOutput(); cout << "Feeding 1,1" << endl; myNet.feedForward({1, 1}); myNet.printOutput(); return 0; } 

2 Answers 2

2

You could try training until the error of the network is 0%, but that would likely take too long or be impossible. It's common to use a minimum error of 0.01 (1%) with thresholds like: > 0.9 = 1 and < 0.1 = 0.

To calculate the error of a network with a single output neuron, you would add Sum(Math.Abs(idealOutput - a.Value)) to a list for each input. Then average the list to get the error.

My implementation in C# is:

int epoch = 0; double error = 1.0; Network = network; while (error > minError && epoch < int.MaxValue) { var errors = new List<double>(); for (int i = 0; i < inputs.Count; i++) { Algorithm(inputs[i], ideals[i]); int n = 0; errors.Add(Network.Layers[Network.Layers.Count - 1].Neurons.Sum(a => Math.Abs(ideals[i][n++] - a.Value))); } error = errors.Average(); Console.WriteLine("Epoch: #{0} --- Error: {1}", epoch, error); epoch++; } 
Sign up to request clarification or add additional context in comments.

Comments

1

Use an evolutionary algorithm instead of backpropagation to train the weights.

This should help.

1 Comment

I am planning on trying an evolutionary algorithm later. For now, I want to be able to understand backpropagation fully and implement it correctly. Thank you for the link.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.