int main(int argc, char *argv[]) { // create network NeuralNet network; NeuralLayer * pHiddenLayer1 = new NeuralTanhLayer(2,16); network.addLayer( pHiddenLayer1 ); //NeuralLayer * pHiddenLayer2 = new NeuralTanhLayer(16,16); //network.addLayer( pHiddenLayer2 ); NeuralLayer * pOutputLayer = new NeuralSoftmaxLayer(16,2); network.addLayer( pOutputLayer ); // set learning rate, momentum, decay rate, output type // SCALAR = tanh or sigmoid output layer (use one output neuron) // PROB = softmax output layer, 1-of-C output encoding (use two output neurons) const unsigned int outType = PROB; network.setParams(0.2, 0, 0, outType); const unsigned int iters = 1000; const unsigned int testers = 2500; int rightCount = 0; for(int i=0;i<iters+testers;++i) { double error = 0.0; std::vector<double> exor, training; // generate training data switch(outType) { case SCALAR: exor = XOR_training(); training.push_back(exor[2]); exor.pop_back(); break; case PROB: exor = softmax_XOR_training(); training.push_back(exor[2]); training.push_back(exor[3]); exor.pop_back(); exor.pop_back(); break; } // training if( i<iters ) { std::vector<double> outputs = network.runNet(exor); error = network.trainNet(exor,training,outType); switch(outType) { case SCALAR: std::cout << exor[0] << "\t\t" << exor[1] << "\t\t" << outputs[0] << "\t\t" << error << std::endl; break; case PROB: std::cout << exor[0] << "\t\t" << exor[1] << "\t\t" << outputs[0] << "\t\t" << outputs[1] << "\t\t" << error << std::endl; break; } } // testing if( i>=iters ) { std::vector<double> outputs = network.runNet(exor); unsigned int out = 0; switch(outType) { case SCALAR: out = ( (outputs[0]>0.5) ? 1 : 0 ); if( out == (int)training[0] ) { ++rightCount; } break; case PROB: int classLabel = 0; if( outputs[0] < outputs[1] ) { classLabel = 1; } if( 1 == (int)training[classLabel] ) { ++rightCount; } break; } } } std::cout << std::endl << "accuracy: " << 100.0 * rightCount/testers << "%" << std::endl; return 0; }
int main() { // init variables double error = 0.; int truecnt = 0; int times,timed; // print useful info for reference std::cout << "\n" << "hidden neurons: " << "\t \t" << HIDDEN << std::endl; // init random number generator srand((int)time(NULL)); // create network std::cout << "initializing network..." << "\t \t"; NeuralNet DigitNet; NeuralLayer * pHiddenLayer1 = new NeuralTanhLayer(INPUT,HIDDEN); DigitNet.addLayer( pHiddenLayer1 ); NeuralLayer * pOutputLayer = new NeuralSoftmaxLayer(HIDDEN,OUTPUT); DigitNet.addLayer( pOutputLayer ); // set output type: // SCALAR = tanh or sigmoid output layer (use one output neuron) // PROB = softmax output layer, 1-of-N output encoding (use two output neurons) const unsigned int outType = PROB; // set learning rate, momentum, decay rate const double learningRate = 0.15; const double momentum = 0.0; const double decayRate = 0.0; DigitNet.setParams(learningRate,momentum,decayRate,outType); std::cout << "done" << std::endl; // load training and test data std::cout << "loading data..." << "\t \t \t"; std::vector< std::vector<double> > bigData( DATA_SIZE,std::vector<double>(INPUT+1,0.0) ); loadFromFile(bigData,"train.txt"); std::vector< std::vector<double> > trainData( TRAIN_SIZE,std::vector<double>(INPUT+1,0.0) ); std::vector< std::vector<double> > testData( TEST_SIZE,std::vector<double>(INPUT+1,0.0) ); buildData(bigData,trainData,TRAIN_SIZE,testData,TEST_SIZE); std::cout << "done" << std::endl; // loop over training data points and train net // slice off first column of each row (example) times=(int)time(NULL); // init time counter std::cout << "\n" << "training examples: " << "\t \t" << TRAIN_SIZE << std::endl; std::cout << "learning rate: " << "\t \t \t" << learningRate << std::endl; std::cout << "momentum: " << "\t \t \t" << momentum << std::endl; std::cout << "weight decay: " << "\t \t \t" << decayRate << std::endl; std::cout << "training network..." << "\t \t"; for(int i=0;i<TRAIN_SIZE;++i) { std::vector<double> data = trainData[i]; // extract data point double label = data[0]; // extract point label data.erase(data.begin()); std::vector<double> nLabel = encode((int)label); // encode to 1-of-N std::vector<double> outputs = DigitNet.runNet(data); error = DigitNet.trainNet(data,nLabel,outType); // train net, return MSE // decode output and compare to correct output if( decode(outputs) == (int)label ) truecnt++; } // stop timer and print out useful info timed=(int)time(NULL); times=timed-times; std::cout << "done" << std::endl; std::cout << "training time: " << "\t \t \t" << times << " seconds " << std::endl; std::cout << "training accuracy: " << "\t \t" << truecnt*100./TRAIN_SIZE << "%" << std::endl; // test net on test data times=(int)time(NULL); // init time counter std::cout << "\n" << "test points: " << "\t \t \t" << TEST_SIZE << std::endl; std::cout << "testing network..." << "\t \t"; truecnt = 0; for(int i=0;i<TEST_SIZE;++i) { std::vector<double> data = testData[i]; // extract data point double label = data[0]; // extract label data.erase(data.begin()); std::vector<double> outputs = DigitNet.runNet(data); // run net // decode output and compare to correct output if( decode(outputs) == (int)label ) truecnt++; } // stop timer and print out useful info timed=(int)time(NULL); times=timed-times; std::cout << "done" << std::endl; std::cout << "testing time: " << "\t \t \t" << times << " seconds " << std::endl; std::cout << "test accuracy: " << "\t \t \t" << truecnt*100./TEST_SIZE << "% " << std::endl; // save weights to reuse net in the future DigitNet.saveNet(); }