void Ann<K>::updateError( double **input, const size_t &input_rows, const size_t &input_cols, double **weights, const size_t &weights_rows, const size_t &weights_cols, double **bias, const size_t &bias_rows, const size_t &bias_cols, double **target_output, const size_t &target_output_rows, const size_t &target_output_cols, unsigned int *target_classes, const size_t &target_classes_rows) { double **output = feedForward( input, input_rows, input_cols, weights, weights_rows, weights_cols, bias, bias_rows, bias_cols, target_output_cols); double **subtracted = MatrixOps::subtract(target_output, output, target_output_rows, target_output_cols); double **squared = MatrixOps::hadamardMultiply(subtracted, subtracted, target_output_rows, target_output_cols); training_error.push_back(MatrixOps::sum(squared, target_output_rows, target_output_cols) / (target_output_rows * target_output_cols)); MatrixOps::deleteMatrix(subtracted, target_output_rows); MatrixOps::deleteMatrix(squared, target_output_rows); unsigned int *classes = MatrixOps::matrixToClass(output, target_output_rows, target_output_cols); unsigned int correct_classes = 0; for (size_t r = 0; r < target_classes_rows; r++) { if (classes[r] != target_classes[r]) { correct_classes++; } } training_classification_error.push_back((double)correct_classes / target_classes_rows); MatrixOps::deleteMatrix(output, target_output_rows); delete[] classes; }
int trainNet(NeuralNetwork* net, Sample* samples, unsigned int numberOfSamples, unsigned int epochs, unsigned int batchSize, double learningRate) { unsigned int epoch = 0, startIndex, i, currentBatchSize; if(!samples || epochs < 1 || batchSize < 1 || batchSize > numberOfSamples || learningRate <= 0.0 || !isValidNet(net)) { return -2; } while(epoch < epochs) { shuffleSamples(samples, numberOfSamples); startIndex = 0; //while there are still mini batches while(startIndex < numberOfSamples) { currentBatchSize = fmin(numberOfSamples - startIndex, batchSize); printf("epoch: %d, currentBatchSize: %d\n", epoch, currentBatchSize); initializeDeltas(net); for(i = 0; i < currentBatchSize; i++) { //feedForward feedForward(net, samples[startIndex + i].inputs); //update deltas updateDeltas(net, samples[startIndex + i].outputs); } updateWeights(net, learningRate/(double)currentBatchSize); startIndex += currentBatchSize; } epoch++; } return 0; }
void trainNetwork(DATASET *trainData, float *weightsFromInputToHidden,float *weightsFromHiddenToOutput,float *biasesOfHidden,float *biasesOfOutput, int INPUTLAYERNODES, int HIDDENLAYERNODES, int OUTPUTLAYERNODES){ for (int epoch = 0; epoch < EPOCHS; epoch++){ float error = 0; for(int train = 0; train < DATATOREAD; train++){ float input[INPUTLAYERNODES]; float hidden[HIDDENLAYERNODES]; float output[OUTPUTLAYERNODES]; float target[OUTPUTLAYERNODES]; for(int j = 0; j < OUTPUTLAYERNODES; j++){ if(trainData[train].value[7] == j){ target[j] = 1; }else{ target[j] = 0; } } feedForward(input, hidden, output, trainData, train, weightsFromInputToHidden, weightsFromHiddenToOutput, biasesOfHidden, biasesOfOutput, INPUTLAYERNODES, HIDDENLAYERNODES, OUTPUTLAYERNODES); for(int i = 0; i < OUTPUTLAYERNODES; i++){ error += pow(target[i] - sigmoid(output[i]),2); } float deltaHidden[HIDDENLAYERNODES]; float deltaOutput[OUTPUTLAYERNODES]; backPropagateError(input, hidden, output, target, deltaHidden, deltaOutput, weightsFromHiddenToOutput, HIDDENLAYERNODES, OUTPUTLAYERNODES); updateWeights(input, hidden, deltaHidden, deltaOutput,weightsFromInputToHidden, weightsFromHiddenToOutput, biasesOfHidden, biasesOfOutput, INPUTLAYERNODES, HIDDENLAYERNODES, OUTPUTLAYERNODES); } printf("EPOCH %i | error = %f\n",epoch, error); //int q = evaulate(trainData, weightsFromInputToHidden, weightsFromHiddenToOutput, biasesOfHidden, biasesOfOutput, INPUTLAYERNODES, HIDDENLAYERNODES, OUTPUTLAYERNODES); printf("Epoch %i: %i\n", epoch, DATATOREAD); } }
float MultilayerNN::trainOne(vector<float> tuple) { float error; // Set up topography & randomize weights if first run if (!topoSet) { setTopo(tuple); } // Set previous weights if size = 0 //if (previousWeights.size() == 0) previousWeights = weights; // Set input nodes to training tuple values for (int i = 0; i < inputNodes.size(); i++) { inputNodes.at(i) = tuple.at(i); } // FIRST PASS: Feed forward through network feedForward(); // SECOND PASS: Calculate error, propagate deltas back through network given desired output error = addErrorForIteration(tuple.back()); backProp(tuple.back()); // THIRD PASS: Update weights updateWeights(); return error; }
/** This trains the neural network according to the back propagation algorithm. The primary steps are: for each training pattern: feed forward propagate backward until the MSE becomes suitably small */ void CNeuralNet::train(std::vector<std::vector<double>> inputs, std::vector<std::vector<double>> outputs, uint trainingSetSize) { while (m_MSE > m_mse_cutoff) { for (int n = 0; n < trainingSetSize; ++n) { // Feed forward and update each nodes outputs feedForward(inputs[n]); // Propagate backwards propagateErrorBackward(outputs[n]); // Update the MSE m_MSE = meanSquaredError(outputs[n]); //cout << m_MSE << endl; // Once the cutoff value is met the network is considered trained if (m_MSE < m_mse_cutoff) { break; } } } //std::cout << "Training complete" << std::endl; }
double NeuralNetwork::computeCost(const arma::mat& input, const arma::mat& output, const std::vector<arma::mat>& theta) { arma::mat h = feedForward(input, theta); arma::mat h_1 = 1 - h; unsigned int m = input.n_rows; double cost = (-1.0 / m) * arma::accu(output % logarithm(h) + (1 - output) % logarithm(h_1)); return cost + (m_regFactor / (2.0 * m)) * computeRegTerm(theta); }
arma::field<arma::Cube<double>> VanillaFeedForward::feedForward( const arma::field<arma::Cube<double>>& xs) { mxs = xs; arma::field<arma::Cube<double>> ys(xs.size()); for (unsigned int i = 0; i < xs.size(); ++i) { ys[i] = feedForward(xs[i]); } return ys; }
// get the classification accuracy of the provided data set // PRECONDITIONS: // * each individual input MUST be composed of INPUT_SIZE values // * inputs and labels MUST BOTH have 'size' number of elements float getTestAccuracy(NeuralNetwork* nn, float inputs[][INPUT_SIZE], int labels[INPUT_SIZE], int size) { int incorrectPatterns = 0; for (int i = 0; i < size; i++) { feedForward(nn, inputs[i]); int guess = guessClassification(nn->output); if (guess != labels[i]) incorrectPatterns++; } return 1.0 - (float)incorrectPatterns / (float)size; }
float MultilayerNN::testOne(vector<float> tuple) { // Set input nodes to testing tuple values for (int i = 0; i < inputNodes.size(); i++) { inputNodes.at(i) = tuple.at(i); } // Run tuple through net feedForward(); // Return squared error return addErrorForIteration(tuple.back()); }
double backPropagate(double *tg, BPNETWORK *nw ) { int i, j, k; double sumError = 0, error, dw, sum = 0; feedForward(nw); /* Find the delta for output layer */ for(i=0;i<nw->layer[nw->nLayer-1].nNeural;i++) { error = tg[i] - nw->layer[nw->nLayer-1].p[i].x; /* delta = f'(x) * e = [x*(1-x)] * (target - output) */ nw->layer[nw->nLayer-1].p[i].delta = nw->layer[nw->nLayer-1].p[i].x * (1 - nw->layer[nw->nLayer-1].p[i].x) * error; sumError = sumError + (error * error); } /* Find the delta for the hidden layers */ for(i=nw->nLayer-2;i>=0;i--) { /*For each layer, we browse all neural there*/ for(j=0;j<nw->layer[i].nNeural;j++) { sum = 0; /* Calculate sum delta of precide layer*/ for(k=0;k<nw->layer[i+1].nNeural;k++) sum = sum + nw->layer[i+1].p[k].delta * nw->layer[i].p[j].w[k]; nw->layer[i].p[j].delta = nw->layer[i].p[j].x * (1 - nw->layer[i].p[j].x) * sum; } } // Update weights int temp; for(i=0; i<nw->nLayer-1; i++) { temp = nw->layer[i+1].nNeural - 1; if(i+1 == nw->nLayer-1) temp = nw->layer[i+1].nNeural; for(j=0;j<nw->layer[i].nNeural;j++) { for(k=0; k<temp; k++) { // Apply momentum dw = (nw->alpha * nw->layer[i].p[j].preDw[k]) + nw->learningRate * nw->layer[i+1].p[k].delta * nw->layer[i].p[j].x; nw->layer[i].p[j].w[k] = nw->layer[i].p[j].w[k] + dw; nw->layer[i].p[j].preDw[k] = dw; } } } return sumError/2.0f; }
int classifySound(NeuralNetwork* nn, double input[63][14]) { int guess; float flatInput[INPUT_SIZE]; for (int i = 0; i < 63; i++) { for (int j = 0; j < 14; j++) { flatInput[i*14 + j] = (float)(input[i][j]); } } feedForward(nn, flatInput); return guessClassification(nn->output); }
void DNN::backPropSet(const dvec& input,const dvec& output) { unsigned int i; feedForward(input); unsigned int L = activations.size() - 1; /* * Start with the final layer */ std::vector<Matrix> d(L+1); /* * Copy contents */ Matrix out(output.size(),1); for(i=0;i<output.size();++i) out(i,0) = output.at(i); /* * Final layer error */ Matrix DC = Matrix::apply(quad_dC_dA,activations.at(L),out); d.at(L) = Matrix::had(DC,activations.at(L)); /* * Backpropagate */ for(i=L;i>0;--i) { Matrix wd = weights.at(i-1).T() * d.at(i); d.at(i-1) = Matrix::had( wd, activations.at(i-1) ); } /* * Calculate the gradient cost for this set */ for(i=L;i>0;--i) { bGradient.at(i-1) = bGradient.at(i-1) + d.at(i); Matrix wg = d.at(i) * activations.at(i-1).T(); wGradient.at(i-1) = wGradient.at(i-1) + wg; } }
/** Once our network is trained we can simply feed it some input though the feed forward method and take the maximum value as the classification */ uint CNeuralNet::classify(std::vector<double> input) { feedForward(input); float largest = 0; float largestIndex = 0; for (int i = 0; i < _outputActivation.size(); ++i) { if (_outputActivation[i] > largest) { largest = _outputActivation[i]; largestIndex = i; } } //std::cout << "Classifying complete" << std::endl; //cout << largestIndex << endl; return largestIndex; }
int Perceptron::train(double inputs[], int desired) { int guess = feedForward(inputs); float error = desired - guess; for (int i = 0; i < _inputNumber; i++) { _weights[i] += _learningFactor * error * inputs[i]; } _threshold -= _learningFactor * error; if (guess != desired) { // cout << "Numbers: " << inputs[0] << ", " << inputs[1] << "\n"; // cout << "Weights: " << _weights[0] << ", " << _weights[1] << "\n"; // cout << "Desired: " << desired << ", Guess: " << guess << "\n"; // cout << "Suma: " << _suma << "\n"; } return (guess != desired); }
Vec NeuralNetwork::getOutput(Vec in){ feedForward(in); int size = layers.size(); return layers[size-1].getOutput(); }
int main() { double err; int i, j, sample=0, iterations=0; int sum = 0; out = fopen("stats.txt", "w"); /* Seed the random number generator */ srand( time(NULL) ); assignRandomWeights(); while (1) { if (++sample == MAX_SAMPLES) sample = 0; inputs[0] = samples[sample].health; inputs[1] = samples[sample].knife; inputs[2] = samples[sample].gun; inputs[3] = samples[sample].enemy; target[0] = samples[sample].out[0]; target[1] = samples[sample].out[1]; target[2] = samples[sample].out[2]; target[3] = samples[sample].out[3]; feedForward(0); /* need to iterate through all ... */ err = 0.0; for (i = 0 ; i < OUTPUT_NEURONS ; i++) { err += sqr( (samples[sample].out[i] - actual[i]) ); } err = 0.5 * err; fprintf(out, "%g\n", err); printf("mse = %g\n", err); if (iterations++ > 100000) break; backPropagate(); } printf("wih\n"); for (j = 0; j < HIDDEN_NEURONS; j++){ for (i = 0; i < INPUT_NEURONS+1; i++){ printf("%lf, ", wih[i][j]); } printf("\n"); } printf("who\n"); for (j = 0; j < OUTPUT_NEURONS; j++){ for (i = 0; i < HIDDEN_NEURONS+1; i++){ printf("%lf, ", who[i][j]); } printf("\n"); } /* Test the network */ for (i = 0 ; i < MAX_SAMPLES ; i++) { inputs[0] = samples[i].health; inputs[1] = samples[i].knife; inputs[2] = samples[i].gun; inputs[3] = samples[i].enemy; target[0] = samples[i].out[0]; target[1] = samples[i].out[1]; target[2] = samples[i].out[2]; target[3] = samples[i].out[3]; feedForward(0); if (action(actual) != action(target)) { printf("%2.1g:%2.1g:%2.1g:%2.1g %s (%s)\n", inputs[0], inputs[1], inputs[2], inputs[3], strings[action(actual)], strings[action(target)]); } else { sum++; } } printf("Network is %g%% correct\n", ((float)sum / (float)MAX_SAMPLES) * 100.0); /* Run some tests */ /* Health Knife Gun Enemy */ inputs[0] = 2.0; inputs[1] = 1.0; inputs[2] = 1.0; inputs[3] = 1.0; feedForward(0); printf("2111 Action %s\n", strings[action(actual)]); inputs[0] = 1.0; inputs[1] = 1.0; inputs[2] = 1.0; inputs[3] = 2.0; feedForward(0); printf("1112 Action %s\n", strings[action(actual)]); inputs[0] = 0.0; inputs[1] = 0.0; inputs[2] = 0.0; inputs[3] = 0.0; feedForward(0); printf("0000 Action %s\n", strings[action(actual)]); inputs[0] = 0.0; inputs[1] = 1.0; inputs[2] = 1.0; inputs[3] = 1.0; feedForward(0); printf("0111 Action %s\n", strings[action(actual)]); inputs[0] = 2.0; inputs[1] = 0.0; inputs[2] = 1.0; inputs[3] = 3.0; feedForward(0); printf("2013 Action %s\n", strings[action(actual)]); inputs[0] = 2.0; inputs[1] = 1.0; inputs[2] = 0.0; inputs[3] = 3.0; feedForward(0); printf("2103 Action %s\n", strings[action(actual)]); inputs[0] = 0.0; inputs[1] = 1.0; inputs[2] = 0.0; inputs[3] = 3.0; feedForward(0); printf("0103 Action %s\n", strings[action(actual)]); fclose(out); return 0; }
// train the network // note: the _<____+1 in the for loop conditions could be _<=____ instead // PRECONDITIONS: // * each individual input MUST be composed of INPUT_SIZE values // * inputs and labels MUST BOTH have 'size' number of elements void trainNetwork( NeuralNetwork* nn, float inputs[][INPUT_SIZE], int labels[INPUT_SIZE], int size, float testInputs[][INPUT_SIZE], int testLabels[INPUT_SIZE], int testSize) { int epoch = 0; int done_training = 0; std::ofstream outfile; // outfile.open("feedforward.dat"); // outfile.close(); // outfile.clear(); // outfile.open("out.dat"); while (!done_training && epoch < maxEpoch) { int incorrectPatterns = 0; for (int i = 0; i < size; i++) { //*************************************************************************************************************** // Feedforward training input //*************************************************************************************************************** feedForward(nn, inputs[i]); //*************************************************************************************************************** // backpropagate errors // note: can this be improved with the calculation of error gradients? //*************************************************************************************************************** for (int j = 0; j < OUTPUT_SIZE; j++) { float target = labels[i] == j; //outfile << "label: " << labels[i] << " j: " << j << "\n"; // set error gradient for the output node nn->outputErrorGradients[j] = nn->output[j] * (1-nn->output[j]) * (target - nn->output[j]); //outfile << "outputErrorGradients[" << j << "]= " << nn->outputErrorGradients[j] << "\n"; for (int k = 0; k < HIDDEN_SIZE+1; k++) { nn->deltaHO[k][j] = learningRate * nn->hidden[k] * nn->outputErrorGradients[j] + momentum * nn->deltaHO[k][j]; //outfile << "deltaHO[" << k << "][" << j << "= " << nn->deltaHO[k][j] << "\n"; } } for (int j = 0; j < HIDDEN_SIZE; j++) { // set error gradient for the output node based on weightsHO times outputErrorGradients float sum = 0; for (int k = 0; k < OUTPUT_SIZE; k++) sum+= nn->weightHO[j][k] * nn->outputErrorGradients[k]; nn->hiddenErrorGradients[j] = nn->hidden[j] * (1-nn->hidden[j]) * sum; //outfile << "hiddenErrorGradients[" << j << "]= " << nn->hiddenErrorGradients[j] << "\n"; for (int k = 0; k < INPUT_SIZE+1; k++) nn->deltaIH[k][j] = learningRate * nn->input[k] * nn->hiddenErrorGradients[j] + momentum * nn->deltaIH[k][j]; } //*************************************************************************************************************** // update weights //*************************************************************************************************************** for (int j = 0; j < INPUT_SIZE+1; j++) { for (int k = 0; k < HIDDEN_SIZE; k++) { nn->weightIH[j][k] += nn->deltaIH[j][k]; } } for (int j = 0; j < HIDDEN_SIZE+1; j++) { for (int k = 0; k < OUTPUT_SIZE; k++) { nn->weightHO[j][k] += nn->deltaHO[j][k]; } } //*************************************************************************************************************** // check if pattern was correctly identified //*************************************************************************************************************** int guess = guessClassification(nn->output); if (guess != labels[i]) incorrectPatterns++; // if (i % (size/10) == 0) { // nn->trainingSetAccuracy = 1.0 - (float)incorrectPatterns / (float)size * 10.0; // nn->testSetAccuracy = getTestAccuracy(nn, testInputs, testLabels, testSize); // std::cout << "Epoch :" << epoch << "\n"; // std::cout << " Training Set Acc:" << nn->trainingSetAccuracy << "\n"; // std::cout << " Test Set Acc:" << nn->testSetAccuracy << "\n"; // incorrectPatterns = 0; // } } nn->trainingSetAccuracy = 1.0 - (float)incorrectPatterns / (float)size; nn->testSetAccuracy = getTestAccuracy(nn, testInputs, testLabels, testSize); std::cout << "Epoch :" << epoch << "\n"; std::cout << " Training Set Acc:" << nn->trainingSetAccuracy << "\n"; std::cout << " Test Set Acc:" << nn->testSetAccuracy << "\n"; epoch++; done_training = (nn->trainingSetAccuracy + nn->testSetAccuracy >= DESIRED_ACCURACY*2); } //outfile.close(); }
arma::mat NeuralNetwork::predict(const arma::mat& input) { return feedForward(input, m_theta); }
void test_and_function() { int neurals[3]; long step=0, epochs, iSample=0; BPNETWORK *nw = 0; double input[4][2] = {{0,0},{0,1},{1,0},{1,1}}; double target[4][1] = {{0},{0},{0},{1}}; double e; double *biases; double threshold = 0.00001; nw = (BPNETWORK *)malloc(sizeof(BPNETWORK)); epochs = 200000; nw->learningRate = 0.15; nw->alpha = 0.1; biases = (double*)malloc(3 * sizeof(double)); biases[0] = 0.89; biases[1] = -0.69; biases[2] = 0.55; printf("-------------Network configuration-------------\n"); printf("Epoch: %ld\n", epochs); printf("LearningRate: %lf\n", nw->learningRate); printf("Alpha(momentum): %lf \n", nw->alpha); printf("Threshold: %lf \n", threshold); printf("-----------------------------------------------\n"); if(nw != 0) { neurals[0] = 3; neurals[1] = 3; neurals[2] = 1; initNetwork(biases, 3, neurals, nw); step = 0; do{ inputData(input[iSample], nw); e = backPropagate(target[iSample], nw ); step = step + 1; iSample = step % 4; }while( (step<epochs) && e>threshold ); //printf("Mean square error: %lf (minimum error: %lf) (threshold: %lf) after %ld epoch\n", e, minE, threshold, count); printf("Training completed with error/threshold %lf/%lf after %ld epochs \n\n", e, threshold, step); //Test the network printf("This network demonstrate for AND gate: \n"); inputData(input[0], nw); feedForward(nw); printf("Output(0,0): %lf\n", nw->layer[nw->nLayer-1].p[0].x); inputData(input[1], nw); feedForward(nw); printf("Output(0,1): %lf\n", nw->layer[nw->nLayer-1].p[0].x); inputData(input[2], nw); feedForward(nw); printf("Output(1,0): %lf\n", nw->layer[nw->nLayer-1].p[0].x); inputData(input[3], nw); feedForward(nw); printf("Output(1,1): %lf\n", nw->layer[nw->nLayer-1].p[0].x); release(nw); free(nw); free(biases); } }
dvec DNN::predict(const dvec & inputs) { if(inputs.size() != activations.at(0).rows()) throw std::runtime_error("Input size must equal the number of input neurons"); feedForward(inputs); unsigned int layers = activations.size(); return activations[layers-1].col_slice(0,0,activations[layers-1].rows()-1); }