void Evolver::doStep() { judge->parent = this; if (genIdx == 0) { for (int i = 0; i < genSize; i++) { NeuralNet* n = new NeuralNet(1, 0, 1); for (int i = 0; i < 10; i++) { n->mutate(); } generation.push_back( n ); } } genIdx++; setlocale(LC_ALL, "C"); generation = judge->rate(generation); while(generation.size() < genSize) { NeuralNet* n = new NeuralNet(1, 0, 1); *n = *generation[rand() % generation.size()]; for (int i = 0; i < 20; i++) { n->mutate(); } generation.push_back(n); } }
int main() { Board b; NeuralNet net; std::cout << b << std::endl; Board pb = b; do { std::cin.ignore(); std::vector<int> outputs = net.Update(b.GetValues()); int dir = outputs[1] * 2 + outputs[0]; pb = b; b.turn(dir); std::cout << dir << std::endl; std::cout << b << std::endl; } while(!b.complete() && pb != b); std::cout << "Complete!" << std::endl; }
bool AI::loadparam(NeuralNet& nnet) { ifstream input(AI_PARAM_PATH, std::ios::in); if (input.is_open()) { #ifdef DEBUG cout << "loading model..." << endl; #endif for (int i = 0 ; i < HIDDEN_LAYERS ; ++i) { mat w = nnet.gethidden(i).getw(); for (uint32_t j = 0 ; j < w.n_rows ; ++j) { for (uint32_t k = 0 ; k < w.n_cols ; ++k) { double val = 0; input >> val; w(j, k) = val; nnet.gethidden(i).setw(w); } } } mat w = nnet.getoutput().getw(); for (uint32_t j = 0 ; j < w.n_rows ; ++j) { for (uint32_t k = 0 ; k < w.n_cols ; ++k) { double val = 0; input >> val; w(j, k) = val; } } nnet.getoutput().setw(w); input.close(); return true; }
NeuralNet *NeuralNet::clone() { NeuralNet *copy = new NeuralNet(cl); for(vector<Layer *>::iterator it = layers.begin(); it != layers.end(); it++) { LayerMaker2 *maker = (*it)->maker; LayerMaker2 *makerCopy = maker->clone(); copy->addLayer(makerCopy); } copy->print(); cout << "outputimagesize: " << copy->getOutputSize() << endl; return copy; }
TEST( testpropagate, softmax_byplane ) { NeuralNet *net = NeuralNet::maker()->imageSize(2)->planes(1)->instance(); net->addLayer( SoftMaxMaker::instance()->perPlane() ); net->setBatchSize( 1 ); int imageSizeSquared = net->layers[0]->getOutputImageSize() * net->layers[0]->getOutputImageSize(); float *input = new float[imageSizeSquared]; input[0] = 0; input[1] = 1; input[2] = 3; input[3] = 2; net->propagate( input ); float const*results = net->getResults(); float sum = 0; for( int i = 0; i < imageSizeSquared; i++ ) { cout << "results[" << i << "]=" << results[i] << endl; sum += results[i]; EXPECT_LE( 0, results[i] ); EXPECT_GE( 1, results[i] ); } EXPECT_FLOAT_NEAR( 1.0f, sum ); EXPECT_FLOAT_NEAR( (float)( exp(0.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[0] ); EXPECT_FLOAT_NEAR( (float)( exp(1.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[1] ); EXPECT_FLOAT_NEAR( (float)( exp(3.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[2] ); EXPECT_FLOAT_NEAR( (float)( exp(2.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[3] ); float *expected = new float[imageSizeSquared]; memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[2] = 1; float loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(results[2]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[0] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(results[0]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[1] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(results[1]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[3] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(results[3]), loss ); delete[] input; delete[] expected; delete net; }
void NeuroEvo::generate_new_members() { // Mutate existing members to generate more list<NeuralNet*>::iterator popMember = population.begin(); for (int i = 0; i < params->popSize; i++) { // add k new members // commented out so that you take parent's evaluation // (*popMember)->evaluation = 0.0; // dereference pointer AND iterator NeuralNet* m = new NeuralNet(**popMember); m->mutate(); population.push_back(m); ++popMember; } }
TEST(testbackward, softmax2) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = new NeuralNet(cl, 5, 1); net->addLayer(ForceBackpropLayerMaker::instance()); net->addLayer(SoftMaxMaker::instance()); cout << net->asString() << endl; // int batchSize = ; net->setBatchSize(2); checkLayer(net, 2); delete net; delete cl; }
TEST(testbackward, act1) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = new NeuralNet(cl, 1, 2); net->addLayer(ForceBackpropLayerMaker::instance()); net->addLayer(ActivationMaker::instance()->relu()); net->addLayer(SquareLossMaker::instance()); // net->addLayer(SoftMaxMaker::instance()); // maybe should use square loss maker, or cross entropy, // so that dont have to make filtersize == input image size? cout << net->asString() << endl; net->setBatchSize(1); checkLayer(net, 2); delete net; delete cl; }
int main() { NeuralNet nn = NeuralNet(2,2,1); double ***xor_pat = new double **[4]; for(int i = 0; i < 4; i++) { xor_pat[i] = new double*[2]; xor_pat[i][0] = new double[2]; xor_pat[i][0] = new double[1]; } xor_pat[0][0][0] = 0; xor_pat[0][0][1] = 0; xor_pat[0][1][0] = 0; xor_pat[1][0][0] = 0; xor_pat[1][0][1] = 1; xor_pat[0][1][0] = 1; xor_pat[2][0][0] = 1; xor_pat[2][0][1] = 0; xor_pat[0][1][0] = 1; xor_pat[3][0][0] = 1; xor_pat[3][0][1] = 1; xor_pat[0][1][0] = 0; nn.train(xor_pat, 1000, 0.5, 0.2, 4); nn.print_test(xor_pat, 4); return 0; }
//--------------------------------------------------------------------------- std::vector<int> NeuralNet::GetConnections() const { #define CHECK_ARCHITECTURE_165254 #ifdef CHECK_ARCHITECTURE_165254 { //Read the original connection matrix const Array<int> a_copy = this->connectionMatrix; //I promise not to change this NeuralNet NeuralNet * n = const_cast<NeuralNet*>(this); const Array<int> a = n->getConnections(); assert(a == a_copy); } #endif const std::vector<int> v = ConvertToVector(this->connectionMatrix); return v; }
void saveNet(NeuralNet& net) { // Serialize and save net ofstream outFile("best.net"); if (!outFile.is_open()) throw new runtime_error("File to serialize best net into didn't open"); outFile << net.serialize(); outFile.close(); }
TEST( testlogicaloperators, Convolve_1layerbiased_Or ) { cout << "Or, convolve" << endl; LogicalDataCreator ldc; ldc.applyOrGate(); EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = NeuralNet::maker(cl)->planes(2)->imageSize(1)->instance(); net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(1) ); net->addLayer( SquareLossMaker::instance() );; SGD *sgd = SGD::instance( cl, 0.1f, 0 ); for( int epoch = 0; epoch < 20; epoch++ ) { net->epochMaker(sgd)->batchSize(4)->numExamples(4)->inputData(ldc.data) ->expectedOutputs(ldc.expectedOutput)->run( epoch ); if( epoch % 5 == 0 ) cout << "Loss L " << net->calcLoss(ldc.expectedOutput) << endl; // AccuracyHelper::printAccuracy( ldc.N, 2, ldc.labels, net->getOutput() ); // net->printWeights(); } // net->print(); AccuracyHelper::printAccuracy( ldc.N, 2, ldc.labels, net->getOutput() ); float loss = net->calcLoss(ldc.expectedOutput); cout << "loss, E, " << loss << endl; EXPECT_GE( 0.4f, loss ); delete sgd; delete net; delete cl; }
int main(int argc, char *argv[]) { std::vector<std::pair<FloatVector, FloatVector>> rel; for(float x = 1; x < 10; x += 1) { for(float y = 1; y < 10; y += 1) { FloatVector point; point.push_back(x); point.push_back(y); point.push_back(1); FloatVector res; res.push_back(x); res.push_back(y + 3); res.push_back(1); rel.push_back(std::make_pair(point, res)); } } Relation<FloatVector, FloatVector> relation(rel); std::vector<unsigned int> structure = { 3, 3}; NeuralNet* nn = new NeuralNet(relation, structure); nn->status(); nn->print(); //nn->print(); Gecode::BAB<NeuralNet> se(nn); Gecode::Gist::Print<NeuralNet> p("Print solution"); Gecode::Gist::Options o; o.inspect.click(&p); Gecode::Gist::bab(nn, o); delete nn; /* if(NeuralNet* nn1 = se.next()) { nn1->print(); delete nn1; }*/ return 0; }
std::vector<std::vector<IMatrix<float>*>> NeuralNetAnalyzer::approximate_bias_gradient(NeuralNet &net) { //setup output std::vector<std::vector<IMatrix<float>*>> output(net.layers.size()); for (int i = 0; i < output.size(); ++i) { output[i] = std::vector<IMatrix<float>*>(net.layers[i]->biases.size()); for (int f = 0; f < output[i].size(); ++f) output[i][f] = net.layers[i]->biases[f]->clone(); } //find error for current network net.discriminate(); float original_error = net.global_error(); //begin evaluating derivatives of the error numerically for only one bias at a time, this requires the network to be ran for every single bias for (int l = 0; l < net.layers.size(); ++l) { for (int f = 0; f < net.layers[l]->biases.size(); ++f) { for (int i = 0; i < net.layers[l]->biases[f]->rows(); ++i) { for (int j = 0; j < net.layers[l]->biases[f]->cols(); ++j) { //adjust current weight net.layers[l]->biases[f]->at(i, j) += .001f; //evaluate network with adjusted weight and approximate derivative net.discriminate(); float adjusted_error = net.global_error(); float diff = (adjusted_error - original_error) / .001f; output[l][f]->at(i, j) = diff; //reset weight net.layers[l]->biases[f]->at(i, j) -= .001f; } } } } return output; }
int main(int argc, char * const argv[]) { using namespace std; srand((unsigned)time(0)); cout.precision(2); cout.setf(ios::fixed | ios::showpoint); DataSet *train = new DataSet(IDX_DIR + TRAINING_IMAGES, IDX_DIR + TRAINING_LABELS, OUTPUT_ENC); DataSet *test = new DataSet(IDX_DIR + TESTING_IMAGES, IDX_DIR + TESTING_LABELS, OUTPUT_ENC); NeuralNet *nn = new NeuralNet(train->image_vector_length(), NUM_HIDDEN_NODES, train->label_vector_length()); int train_len = train->length(); cout << "Training on " << train_len << " examples" << endl; ticks train_start = getticks(); for(int i=0; i < train_len; i++) { nn->train(train->image_vector(i), train->label_vector(i), ETA); } ticks train_end = getticks(); int test_len = test->length(); cout << "Testing on " << test_len << " examples" << endl; ticks test_start = getticks(); int correct = 0; for(int i=0; i < test_len; i++) { int guess = test->label_for_vector(nn->run(test->image_vector(i))); int answer = test->label(i); if(guess == answer) { correct ++; } } ticks test_end = getticks(); cout << "Correct: " << correct << "/" << test_len << " (" << 100 * double(correct)/test_len << "%)" << endl; cout << "Training: " << elapsed(train_end, train_start) << endl; cout << "Testing: " << elapsed(test_end, test_start) << endl; delete nn; delete train; delete test; return 0; }
std::pair<float, float> NeuralNetAnalyzer::proportional_hessian_error(NeuralNet &net) { std::vector<std::vector<IMatrix<float>*>> expected_weights = NeuralNetAnalyzer::approximate_weight_hessian(net); std::vector<std::vector<IMatrix<float>*>> expected_biases = NeuralNetAnalyzer::approximate_bias_hessian(net); net.calculate_hessian(true, 1); float weight_sum = 0.0f; float bias_sum = 0.0f; int weight_n = 0; int bias_n = 0; for (int l = 0; l < expected_weights.size(); ++l) { for (int d = 0; d < expected_weights[l].size(); ++d) { for (int i = 0; i < expected_weights[l][d]->rows(); ++i) { for (int j = 0; j < expected_weights[l][d]->cols(); ++j) { weight_sum += abs(expected_weights[l][d]->at(i, j) - net.layers[l]->hessian_weights[d]->at(i, j)) / net.layers[l]->hessian_weights[d]->at(i, j); ++weight_n; } } } } for (int l = 0; l < expected_biases.size(); ++l) { for (int f_0 = 0; f_0 < expected_biases[l].size(); ++f_0) { for (int i_0 = 0; i_0 < expected_biases[l][f_0]->rows(); ++i_0) { for (int j_0 = 0; j_0 < expected_biases[l][f_0]->cols(); ++j_0) { bias_sum += abs(expected_biases[l][f_0]->at(i_0, j_0) - net.layers[l]->hessian_biases[f_0]->at(i_0, j_0)) / net.layers[l]->hessian_biases[f_0]->at(i_0, j_0); ++bias_n; } } } } for (int l = 0; l < expected_weights.size(); ++l) for (int d = 0; d < expected_weights[l].size(); ++d) delete expected_weights[l][d]; for (int l = 0; l < expected_biases.size(); ++l) for (int f_0 = 0; f_0 < expected_biases[l].size(); ++f_0) delete expected_biases[l][f_0]; return std::make_pair(weight_sum / weight_n, bias_sum / bias_n); }
TEST( testlogicaloperators, DISABLED_Convolve_1layer_And_Nobias ) { cout << "And" << endl; LogicalDataCreator ldc; ldc.applyAndGate(); EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = NeuralNet::maker(cl)->planes(2)->imageSize(1)->instance(); net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(0) ); SGD *sgd = SGD::instance( cl, 4.0f, 0 ); for( int epoch = 0; epoch < 20; epoch++ ) { net->epochMaker(sgd)->batchSize(4)->numExamples(4)->inputData(ldc.data) ->expectedOutputs(ldc.expectedOutput)->run(epoch); cout << "Loss L " << net->calcLoss(ldc.expectedOutput) << endl; // net->printWeights(); } // net->print(); int numCorrect = AccuracyHelper::calcNumRight( ldc.N, 2, ldc.labels, net->getOutput() ); cout << "accuracy: " << numCorrect << "/" << ldc.N << endl; EXPECT_EQ( numCorrect, ldc.N ); delete sgd; delete net; delete cl; }
std::vector<std::vector<IMatrix<float>*>> NeuralNetAnalyzer::approximate_weight_hessian(NeuralNet &net) { //setup output std::vector<std::vector<IMatrix<float>*>> output(net.layers.size()); for (int i = 0; i < output.size(); ++i) { output[i] = std::vector<IMatrix<float>*>(net.layers[i]->recognition_weights.size()); for (int j = 0; j < output[i].size(); ++j) output[i][j] = net.layers[i]->recognition_weights[j]->clone(); } //find error for current network net.discriminate(); float original_error = net.global_error(); //begin evaluating derivatives of the error numerically for only one weight at a time, this requires the network to be ran for every single weight for (int l = 0; l < net.layers.size(); ++l) { for (int d = 0; d < net.layers[l]->recognition_weights.size(); ++d) { for (int i = 0; i < net.layers[l]->recognition_weights[d]->rows(); ++i) { for (int j = 0; j < net.layers[l]->recognition_weights[d]->cols(); ++j) { //adjust current weight net.layers[l]->recognition_weights[d]->at(i, j) -= .001f; //evaluate network with adjusted weight net.discriminate(); float h_minus = net.global_error(); //adjust current weight net.layers[l]->recognition_weights[d]->at(i, j) += .002f; //evaluate network with adjusted weight net.discriminate(); float h = net.global_error(); //approximate with derivative output[l][d]->at(i, j) = (h - 2 * original_error + h_minus) / (.001f * .001f); //reset weight net.layers[l]->recognition_weights[d]->at(i, j) -= .001f; } } } } return output; }
TEST(testbackward, fc1) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); ClBlasInstance blasInstance; NeuralNet *net = new NeuralNet(cl, 2, 4); net->addLayer(ForceBackpropLayerMaker::instance()); net->addLayer(FullyConnectedMaker::instance()->numPlanes(4)->imageSize(1)->biased(0)); net->addLayer(SquareLossMaker::instance()); // net->addLayer(SoftMaxMaker::instance()); // maybe should use square loss maker, or cross entropy, // so that dont have to make filtersize == input image size? cout << net->asString() << endl; net->setBatchSize(4); checkLayer(net, 2); delete net; delete cl; }
bitset<4> Rat::makeChoices(bitset<24> observations) { bitset<25> pain_and_observations; for (int i = 1; i < 25; i++) { pain_and_observations[i] = observations[i-1]; } pain_and_observations[0] = hit_obstacle; bitset<4> choices = brain.makeChoices(pain_and_observations); /* To see input observations and output decisions uncomment the below code cout << "observed "<< pain_and_observations.template to_string<char, std::char_traits<char>, std::allocator<char> >() << endl; cout << "chose to do: "<< choices.template to_string<char, std::char_traits<char>, std::allocator<char> >() << endl;*/ /* choices[0] is the choice to move down by 1, choices[1] is to move up by 1, * choices[2] is to move right by 1, choices[3] is to move left by 1 * if all four are 1 then the rat doesn't move for example, while if it is * choices[0] = 1, choices[1] = 0, choices[2] = 0 and choices[3] = 1 then it moves * down and left */ return choices; }
TEST(testbackward, softmaxloss) { // here's the plan: // generate some input, randomly // generate some expected output, randomly // forward propagate // calculate loss // calculate gradInput // change some of the inputs, forward prop, recalculate loss, check corresponds // to the gradient EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = new NeuralNet(cl, 5, 1); net->addLayer(ForceBackpropLayerMaker::instance()); net->addLayer(SoftMaxMaker::instance()); cout << net->asString() << endl; const int batchSize = 2; net->setBatchSize(batchSize); const int outputPlanes = net->getOutputPlanes(); int inputCubeSize = net->getInputCubeSize(); int outputCubeSize = net->getOutputCubeSize(); int inputTotalSize = inputCubeSize * batchSize; int outputTotalSize = outputCubeSize * batchSize; cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl; float *input = new float[inputTotalSize]; float *expectedOutput = new float[outputTotalSize]; WeightRandomizer::randomize(0, input, inputTotalSize, 0.0f, 1.0f); WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, 0.0f, 1.0f); // we should make the input and output a probability distribution I think // so: add up the input, and divide each by that. do same for expectedoutput (?) // normalizeAsProbabilityDistribution(input, inputTotalSize); normalizeAsProbabilityDistribution(outputPlanes, expectedOutput, outputTotalSize); // set all to zero, and one to 1, ie like labelled data // for(int i = 0; i < outputTotalSize; i++) { // expectedOutput[i] = 0; // } // for(int n = 0; n < batchSize; n++) { // int chosenLabel = 0; // WeightRandomizer::randomizeInts(n, &chosenLabel, 1, 0, net->getOutputPlanes()); // expectedOutput[ n * outputPlanes + chosenLabel ] = 1; // } // for(int i = 0; i < outputTotalSize; i++) { // cout << "expected[" << i << "]=" << expectedOutput[i] << endl; // } // // now, forward prop // net->input(input); net->forward(input); net->print(); // net->printOutput(); // calculate loss float lossBefore = net->calcLoss(expectedOutput); // calculate gradInput net->backward(expectedOutput); // modify input slightly mt19937 random; const int numSamples = 10; for(int i = 0; i < numSamples; i++) { int inputIndex; WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize); // cout << "i=" << i << " index " << inputIndex << endl; float oldValue = input[inputIndex]; // grad for this index is.... float grad = net->getLayer(2)->getGradInput()[inputIndex]; // cout << "grad=" << grad << endl; // tweak slightly float newValue = oldValue * 1.001f; float inputDelta = newValue - oldValue; float predictedLossChange = inputDelta * grad; input[inputIndex] = newValue; // cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl; // forwardProp net->forward(input); input[inputIndex] = oldValue; // net->printOutput(); float lossAfter = net->calcLoss(expectedOutput); float lossChange = lossAfter - lossBefore; cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl; } delete[] expectedOutput; delete[] input; delete net; delete cl; }
void go(Config config) { Timer timer; int Ntrain; int Ntest; int numPlanes; int imageSize; float *trainData = 0; float *testData = 0; int *trainLabels = 0; int *testLabels = 0; int trainAllocateN = 0; int testAllocateN = 0; // int totalLinearSize; GenericLoader::getDimensions((config.dataDir + "/" + config.trainFile).c_str(), &Ntrain, &numPlanes, &imageSize); Ntrain = config.numTrain == -1 ? Ntrain : config.numTrain; // long allocateSize = (long)Ntrain * numPlanes * imageSize * imageSize; cout << "Ntrain " << Ntrain << " numPlanes " << numPlanes << " imageSize " << imageSize << endl; trainAllocateN = Ntrain; trainData = new float[ (long)trainAllocateN * numPlanes * imageSize * imageSize ]; trainLabels = new int[trainAllocateN]; if(Ntrain > 0) { GenericLoader::load((config.dataDir + "/" + config.trainFile).c_str(), trainData, trainLabels, 0, Ntrain); } GenericLoader::getDimensions((config.dataDir + "/" + config.validateFile).c_str(), &Ntest, &numPlanes, &imageSize); Ntest = config.numTest == -1 ? Ntest : config.numTest; testAllocateN = Ntest; testData = new float[ (long)testAllocateN * numPlanes * imageSize * imageSize ]; testLabels = new int[testAllocateN]; if(Ntest > 0) { GenericLoader::load((config.dataDir + "/" + config.validateFile).c_str(), testData, testLabels, 0, Ntest); } timer.timeCheck("after load images"); const int inputCubeSize = numPlanes * imageSize * imageSize; float translate; float scale; int normalizationExamples = config.normalizationExamples > Ntrain ? Ntrain : config.normalizationExamples; if(config.normalization == "stddev") { float mean, stdDev; NormalizationHelper::getMeanAndStdDev(trainData, normalizationExamples * inputCubeSize, &mean, &stdDev); cout << " image stats mean " << mean << " stdDev " << stdDev << endl; translate = - mean; scale = 1.0f / stdDev / config.normalizationNumStds; } else if(config.normalization == "maxmin") { float mean, stdDev; NormalizationHelper::getMinMax(trainData, normalizationExamples * inputCubeSize, &mean, &stdDev); translate = - mean; scale = 1.0f / stdDev; } else { cout << "Error: Unknown normalization: " << config.normalization << endl; return; } cout << " image norm translate " << translate << " scale " << scale << endl; timer.timeCheck("after getting stats"); // const int numToTrain = Ntrain; // const int batchSize = config.batchSize; EasyCL *cl = new EasyCL(); NeuralNet *net = new NeuralNet(cl); // net->inputMaker<unsigned char>()->numPlanes(numPlanes)->imageSize(imageSize)->insert(); net->addLayer(InputLayerMaker::instance()->numPlanes(numPlanes)->imageSize(imageSize)); net->addLayer(NormalizationLayerMaker::instance()->translate(translate)->scale(scale)); if(!NetdefToNet::createNetFromNetdef(net, config.netDef)) { return; } net->print(); for(int i = 1; i < net->getNumLayers() - 1; i++) { Layer *layer = net->getLayer(i); FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >(layer); ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >(layer); if(fc != 0) { conv = fc->convolutionalLayer; } if(conv == 0) { continue; } initrand.seed(0); int weightsSize = conv->getWeightsSize(); //int weightsSize = layer->getPersistSize(); if(weightsSize > 0) { cout << "weightsSize " << weightsSize << endl; float *weights = new float[weightsSize]; for(int j = 0; j < weightsSize; j++) { int thisrand = (int)initrand(); float thisweight = (thisrand % 100000) / 1000000.0f; weights[j] = thisweight; } conv->initWeights(weights); } if(conv->dim.biased) { initrand.seed(0); int biasedSize = conv->getBiasSize(); float *biasWeights = new float[biasedSize]; for(int j = 0; j < biasedSize; j++) { int thisrand = (int)initrand(); float thisweight = (thisrand % 100000) / 1000000.0f; biasWeights[j] = thisweight; //biasWeights[j] = 0; } conv->initBias(biasWeights); } } cout << "weight samples before learning:" << endl; sampleWeights(net); bool afterRestart = false; int restartEpoch = 0; // int restartBatch = 0; // float restartAnnealedLearningRate = 0; // int restartNumRight = 0; // float restartLoss = 0; timer.timeCheck("before learning start"); if(config.dumpTimings) { StatefulTimer::dump(true); } StatefulTimer::timeCheck("START"); SGD *sgd = SGD::instance(cl, config.learningRate, 0.0f); Trainable *trainable = net; NetLearner netLearner( sgd, trainable, Ntrain, trainData, trainLabels, Ntest, testData, testLabels, config.batchSize); netLearner.setSchedule(config.numEpochs, afterRestart ? restartEpoch : 1); // netLearner.setBatchSize(config.batchSize); netLearner.setDumpTimings(config.dumpTimings); // netLearner.learn(config.learningRate, 1.0f); cout << "forward output" << endl; for(int layerId = 0; layerId < net->getNumLayers(); layerId++) { Layer *layer = net->getLayer(layerId); FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >(layer); ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >(layer); PoolingLayer *pool = dynamic_cast< PoolingLayer * >(layer); SoftMaxLayer *softMax = dynamic_cast< SoftMaxLayer * >(layer); if(fc != 0) { conv = fc->convolutionalLayer; } int planes = 0; int imageSize = 0; if(conv != 0) { cout << "convolutional (or conv based, ie fc)" << endl; planes = conv->dim.numFilters; imageSize = conv->dim.outputSize; // continue; } else if(pool != 0) { cout << "pooling" << endl; planes = pool->numPlanes; imageSize = pool->outputSize; } else if(softMax != 0) { cout << "softmax" << endl; planes = softMax->numPlanes; imageSize = softMax->imageSize; } else { continue; } cout << "layer " << layerId << endl; // conv->getOutput(); float const*output = layer->getOutput(); // for(int i = 0; i < 3; i++) { // cout << conv->getOutput()[i] << endl; // } initrand.seed(0); // LayerDimensions &dim = conv->dim; for(int i = 0; i < 10; i++) { int thisrand = abs((int)initrand()); int seq = thisrand % (planes * imageSize * imageSize); int outPlane = seq / (imageSize * imageSize); int rowcol = seq % (imageSize * imageSize); int row = rowcol / imageSize; int col = rowcol % imageSize; cout << "out[" << outPlane << "," << row << "," << col << "]=" << output[ seq ] << endl; } } cout << "weight samples after learning:" << endl; sampleWeights(net); cout << "backprop output" << endl; for(int layerId = net->getNumLayers() - 1; layerId >= 0; layerId--) { Layer *layer = net->getLayer(layerId); FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >(layer); ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >(layer); if(fc != 0) { conv = fc->convolutionalLayer; } if(conv == 0) { continue; } cout << "layer " << layerId << endl; float const*weights = conv->getWeights(); float const*biases = conv->getBias(); int weightsSize = conv->getWeightsSize() / conv->dim.numFilters; for(int i = 0; i < weightsSize; i++) { cout << " weight " << i << " " << weights[i] << endl; } for(int i = 0; i < 3; i++) { cout << " bias " << i << " " << biases[i] << endl; } } cout << "done" << endl; delete sgd; delete net; delete cl; if(trainData != 0) { delete[] trainData; } if(testData != 0) { delete[] testData; } if(testLabels != 0) { delete[] testLabels; } if(trainLabels != 0) { delete[] trainLabels; } }
void go(Config config) { Timer timer; int Ntrain; int Ntest; int numPlanes; int imageSize; unsigned char *trainData = 0; unsigned char *testData = 0; int *trainLabels = 0; int *testLabels = 0; int trainAllocateN = 0; int testAllocateN = 0; // int totalLinearSize; GenericLoader::getDimensions( config.dataDir + "/" + config.trainFile, &Ntrain, &numPlanes, &imageSize ); Ntrain = config.numTrain == -1 ? Ntrain : config.numTrain; // long allocateSize = (long)Ntrain * numPlanes * imageSize * imageSize; cout << "Ntrain " << Ntrain << " numPlanes " << numPlanes << " imageSize " << imageSize << endl; if( config.loadOnDemand ) { trainAllocateN = config.batchSize; // can improve this later } else { trainAllocateN = Ntrain; } trainData = new unsigned char[ (long)trainAllocateN * numPlanes * imageSize * imageSize ]; trainLabels = new int[trainAllocateN]; if( !config.loadOnDemand && Ntrain > 0 ) { GenericLoader::load( config.dataDir + "/" + config.trainFile, trainData, trainLabels, 0, Ntrain ); } GenericLoader::getDimensions( config.dataDir + "/" + config.validateFile, &Ntest, &numPlanes, &imageSize ); Ntest = config.numTest == -1 ? Ntest : config.numTest; if( config.loadOnDemand ) { testAllocateN = config.batchSize; // can improve this later } else { testAllocateN = Ntest; } testData = new unsigned char[ (long)testAllocateN * numPlanes * imageSize * imageSize ]; testLabels = new int[testAllocateN]; if( !config.loadOnDemand && Ntest > 0 ) { GenericLoader::load( config.dataDir + "/" + config.validateFile, testData, testLabels, 0, Ntest ); } cout << "Ntest " << Ntest << " Ntest" << endl; timer.timeCheck("after load images"); const int inputCubeSize = numPlanes * imageSize * imageSize; float translate; float scale; int normalizationExamples = config.normalizationExamples > Ntrain ? Ntrain : config.normalizationExamples; if( !config.loadOnDemand ) { if( config.normalization == "stddev" ) { float mean, stdDev; NormalizationHelper::getMeanAndStdDev( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev ); cout << " image stats mean " << mean << " stdDev " << stdDev << endl; translate = - mean; scale = 1.0f / stdDev / config.normalizationNumStds; } else if( config.normalization == "maxmin" ) { float mean, stdDev; NormalizationHelper::getMinMax( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev ); translate = - mean; scale = 1.0f / stdDev; } else { cout << "Error: Unknown normalization: " << config.normalization << endl; return; } } else { if( config.normalization == "stddev" ) { float mean, stdDev; NormalizeGetStdDev<unsigned char> normalizeGetStdDev( trainData, trainLabels ); BatchProcess::run<unsigned char>( config.dataDir + "/" + config.trainFile, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetStdDev ); normalizeGetStdDev.calcMeanStdDev( &mean, &stdDev ); cout << " image stats mean " << mean << " stdDev " << stdDev << endl; translate = - mean; scale = 1.0f / stdDev / config.normalizationNumStds; } else if( config.normalization == "maxmin" ) { NormalizeGetMinMax<unsigned char> normalizeGetMinMax( trainData, trainLabels ); BatchProcess::run( config.dataDir + "/" + config.trainFile, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetMinMax ); normalizeGetMinMax.calcMinMaxTransform( &translate, &scale ); } else { cout << "Error: Unknown normalization: " << config.normalization << endl; return; } } cout << " image norm translate " << translate << " scale " << scale << endl; timer.timeCheck("after getting stats"); // const int numToTrain = Ntrain; // const int batchSize = config.batchSize; NeuralNet *net = new NeuralNet(); // net->inputMaker<unsigned char>()->numPlanes(numPlanes)->imageSize(imageSize)->insert(); net->addLayer( InputLayerMaker<unsigned char>::instance()->numPlanes(numPlanes)->imageSize(imageSize) ); net->addLayer( NormalizationLayerMaker::instance()->translate(translate)->scale(scale) ); if( !NetdefToNet::createNetFromNetdef( net, config.netDef ) ) { return; } net->print(); bool afterRestart = false; int restartEpoch = 0; int restartBatch = 0; float restartAnnealedLearningRate = 0; int restartNumRight = 0; float restartLoss = 0; if( config.loadWeights && config.weightsFile != "" ) { afterRestart = WeightsPersister::loadWeights( config.weightsFile, config.getTrainingString(), net, &restartEpoch, &restartBatch, &restartAnnealedLearningRate, &restartNumRight, &restartLoss ); if( !afterRestart && FileHelper::exists( config.weightsFile ) ) { cout << "Weights file " << config.weightsFile << " exists, but doesnt match training options provided => aborting" << endl; cout << "Please either check the training options, or choose a weights file that doesnt exist yet" << endl; return; } } timer.timeCheck("before learning start"); if( config.dumpTimings ) { StatefulTimer::dump( true ); } StatefulTimer::timeCheck("START"); Trainable *trainable = net; MultiNet *multiNet = 0; if( config.multiNet > 1 ) { multiNet = new MultiNet( config.multiNet, net ); trainable = multiNet; } if( config.loadOnDemand ) { NetLearnerOnDemand<unsigned char> netLearner( trainable ); netLearner.setTrainingData( config.dataDir + "/" + config.trainFile, Ntrain ); netLearner.setTestingData( config.dataDir + "/" + config.validateFile, Ntest ); netLearner.setSchedule( config.numEpochs, afterRestart ? restartEpoch : 1 ); netLearner.setBatchSize( config.fileReadBatches, config.batchSize ); netLearner.setDumpTimings( config.dumpTimings ); WeightsWriter weightsWriter( net, &config ); if( config.weightsFile != "" ) { netLearner.addPostEpochAction( &weightsWriter ); } netLearner.learn( config.learningRate, config.annealLearningRate ); } else { NetLearner<unsigned char> netLearner( trainable ); netLearner.setTrainingData( Ntrain, trainData, trainLabels ); netLearner.setTestingData( Ntest, testData, testLabels ); netLearner.setSchedule( config.numEpochs, afterRestart ? restartEpoch : 1 ); netLearner.setBatchSize( config.batchSize ); netLearner.setDumpTimings( config.dumpTimings ); WeightsWriter weightsWriter( net, &config ); if( config.weightsFile != "" ) { netLearner.addPostEpochAction( &weightsWriter ); } netLearner.learn( config.learningRate, config.annealLearningRate ); } if( multiNet != 0 ) { delete multiNet; } delete net; if( trainData != 0 ) { delete[] trainData; } if( testData != 0 ) { delete[] testData; } if( testLabels != 0 ) { delete[] testLabels; } if( trainLabels != 0 ) { delete[] trainLabels; } }
int main( int argc, char *argv[] ) { // ScenarioImage scenario; ScenarioImage *scenario = new ScenarioImage( 5, true); EasyCL *cl = new EasyCL(); NeuralNet *net = new NeuralNet( cl ); SGD *sgd = SGD::instance( cl, 0.1f, 0.0f ); const int size = scenario->getPerceptionSize(); const int planes = scenario->getPerceptionPlanes(); const int numActions = scenario->getNumActions(); net->addLayer( InputLayerMaker::instance()->numPlanes(planes)->imageSize(size) ); net->addLayer( ConvolutionalMaker::instance()->filterSize(5)->numFilters(8)->biased()->padZeros() ); net->addLayer( ActivationMaker::instance()->relu() ); net->addLayer( ConvolutionalMaker::instance()->filterSize(5)->numFilters(8)->biased()->padZeros() ); net->addLayer( ActivationMaker::instance()->relu() ); net->addLayer( FullyConnectedMaker::instance()->imageSize(1)->numPlanes(100)->biased() ); net->addLayer( ActivationMaker::instance()->tanh() ); net->addLayer( FullyConnectedMaker::instance()->imageSize(1)->numPlanes(numActions)->biased() ); net->addLayer( SquareLossMaker::instance() ); net->print(); scenario->setNet( net ); // used by the printQRepresentation method QLearner qLearner( sgd, scenario, net ); qLearner.run(); // delete[] expectedOutputs; // delete[] lastPerception; // delete[] perception; delete sgd; delete net; delete scenario; delete cl; return 0; }
int main() { // init variables double error = 0.; int truecnt = 0; int times,timed; // print useful info for reference std::cout << "\n" << "hidden neurons: " << "\t \t" << HIDDEN << std::endl; // init random number generator srand((int)time(NULL)); // create network std::cout << "initializing network..." << "\t \t"; NeuralNet DigitNet; NeuralLayer * pHiddenLayer1 = new NeuralTanhLayer(INPUT,HIDDEN); DigitNet.addLayer( pHiddenLayer1 ); NeuralLayer * pOutputLayer = new NeuralSoftmaxLayer(HIDDEN,OUTPUT); DigitNet.addLayer( pOutputLayer ); // set output type: // SCALAR = tanh or sigmoid output layer (use one output neuron) // PROB = softmax output layer, 1-of-N output encoding (use two output neurons) const unsigned int outType = PROB; // set learning rate, momentum, decay rate const double learningRate = 0.15; const double momentum = 0.0; const double decayRate = 0.0; DigitNet.setParams(learningRate,momentum,decayRate,outType); std::cout << "done" << std::endl; // load training and test data std::cout << "loading data..." << "\t \t \t"; std::vector< std::vector<double> > bigData( DATA_SIZE,std::vector<double>(INPUT+1,0.0) ); loadFromFile(bigData,"train.txt"); std::vector< std::vector<double> > trainData( TRAIN_SIZE,std::vector<double>(INPUT+1,0.0) ); std::vector< std::vector<double> > testData( TEST_SIZE,std::vector<double>(INPUT+1,0.0) ); buildData(bigData,trainData,TRAIN_SIZE,testData,TEST_SIZE); std::cout << "done" << std::endl; // loop over training data points and train net // slice off first column of each row (example) times=(int)time(NULL); // init time counter std::cout << "\n" << "training examples: " << "\t \t" << TRAIN_SIZE << std::endl; std::cout << "learning rate: " << "\t \t \t" << learningRate << std::endl; std::cout << "momentum: " << "\t \t \t" << momentum << std::endl; std::cout << "weight decay: " << "\t \t \t" << decayRate << std::endl; std::cout << "training network..." << "\t \t"; for(int i=0;i<TRAIN_SIZE;++i) { std::vector<double> data = trainData[i]; // extract data point double label = data[0]; // extract point label data.erase(data.begin()); std::vector<double> nLabel = encode((int)label); // encode to 1-of-N std::vector<double> outputs = DigitNet.runNet(data); error = DigitNet.trainNet(data,nLabel,outType); // train net, return MSE // decode output and compare to correct output if( decode(outputs) == (int)label ) truecnt++; } // stop timer and print out useful info timed=(int)time(NULL); times=timed-times; std::cout << "done" << std::endl; std::cout << "training time: " << "\t \t \t" << times << " seconds " << std::endl; std::cout << "training accuracy: " << "\t \t" << truecnt*100./TRAIN_SIZE << "%" << std::endl; // test net on test data times=(int)time(NULL); // init time counter std::cout << "\n" << "test points: " << "\t \t \t" << TEST_SIZE << std::endl; std::cout << "testing network..." << "\t \t"; truecnt = 0; for(int i=0;i<TEST_SIZE;++i) { std::vector<double> data = testData[i]; // extract data point double label = data[0]; // extract label data.erase(data.begin()); std::vector<double> outputs = DigitNet.runNet(data); // run net // decode output and compare to correct output if( decode(outputs) == (int)label ) truecnt++; } // stop timer and print out useful info timed=(int)time(NULL); times=timed-times; std::cout << "done" << std::endl; std::cout << "testing time: " << "\t \t \t" << times << " seconds " << std::endl; std::cout << "test accuracy: " << "\t \t \t" << truecnt*100./TEST_SIZE << "% " << std::endl; // save weights to reuse net in the future DigitNet.saveNet(); }
int main(int argc, char *argv[]) { // create network NeuralNet network; NeuralLayer * pHiddenLayer1 = new NeuralTanhLayer(2,16); network.addLayer( pHiddenLayer1 ); //NeuralLayer * pHiddenLayer2 = new NeuralTanhLayer(16,16); //network.addLayer( pHiddenLayer2 ); NeuralLayer * pOutputLayer = new NeuralSoftmaxLayer(16,2); network.addLayer( pOutputLayer ); // set learning rate, momentum, decay rate, output type // SCALAR = tanh or sigmoid output layer (use one output neuron) // PROB = softmax output layer, 1-of-C output encoding (use two output neurons) const unsigned int outType = PROB; network.setParams(0.2, 0, 0, outType); const unsigned int iters = 1000; const unsigned int testers = 2500; int rightCount = 0; for(int i=0;i<iters+testers;++i) { double error = 0.0; std::vector<double> exor, training; // generate training data switch(outType) { case SCALAR: exor = XOR_training(); training.push_back(exor[2]); exor.pop_back(); break; case PROB: exor = softmax_XOR_training(); training.push_back(exor[2]); training.push_back(exor[3]); exor.pop_back(); exor.pop_back(); break; } // training if( i<iters ) { std::vector<double> outputs = network.runNet(exor); error = network.trainNet(exor,training,outType); switch(outType) { case SCALAR: std::cout << exor[0] << "\t\t" << exor[1] << "\t\t" << outputs[0] << "\t\t" << error << std::endl; break; case PROB: std::cout << exor[0] << "\t\t" << exor[1] << "\t\t" << outputs[0] << "\t\t" << outputs[1] << "\t\t" << error << std::endl; break; } } // testing if( i>=iters ) { std::vector<double> outputs = network.runNet(exor); unsigned int out = 0; switch(outType) { case SCALAR: out = ( (outputs[0]>0.5) ? 1 : 0 ); if( out == (int)training[0] ) { ++rightCount; } break; case PROB: int classLabel = 0; if( outputs[0] < outputs[1] ) { classLabel = 1; } if( 1 == (int)training[classLabel] ) { ++rightCount; } break; } } } std::cout << std::endl << "accuracy: " << 100.0 * rightCount/testers << "%" << std::endl; return 0; }
TEST( testforward, softmax_byplane ) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = NeuralNet::maker(cl)->imageSize(2)->planes(1)->instance(); net->addLayer( SoftMaxMaker::instance()->perPlane() ); net->setBatchSize( 1 ); int imageSizeSquared = net->getLayer(0)->getOutputSize() * net->getLayer(0)->getOutputSize(); float *input = new float[imageSizeSquared]; input[0] = 0; input[1] = 1; input[2] = 3; input[3] = 2; net->forward( input ); float const*output = net->getOutput(); float sum = 0; for( int i = 0; i < imageSizeSquared; i++ ) { cout << "output[" << i << "]=" << output[i] << endl; sum += output[i]; EXPECT_LE( 0, output[i] ); EXPECT_GE( 1, output[i] ); } EXPECT_FLOAT_NEAR( 1.0f, sum ); EXPECT_FLOAT_NEAR( (float)( exp(0.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[0] ); EXPECT_FLOAT_NEAR( (float)( exp(1.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[1] ); EXPECT_FLOAT_NEAR( (float)( exp(3.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[2] ); EXPECT_FLOAT_NEAR( (float)( exp(2.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[3] ); float *expected = new float[imageSizeSquared]; memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[2] = 1; float loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(output[2]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[0] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(output[0]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[1] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(output[1]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[3] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(output[3]), loss ); delete[] input; delete[] expected; delete net; delete cl; }
void testNumerically(float learningRate, int batchSize, int imageSize, int filterSize, int numPlanes, ActivationFunction *fn, bool padZeros, int its = 20) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); ClBlasInstance clblasInstance; NeuralNet *net = NeuralNet::maker(cl)->planes(numPlanes)->imageSize(imageSize)->instance(); net->addLayer(ConvolutionalMaker::instance()->numFilters(1)->filterSize(filterSize)->biased(0)->padZeros(padZeros)); net->addLayer(ActivationMaker::instance()->fn(fn)); net->addLayer(ConvolutionalMaker::instance()->numFilters(1)->filterSize(filterSize)->biased(0)->padZeros(padZeros)); net->addLayer(ActivationMaker::instance()->fn(fn)); net->addLayer(SquareLossMaker::instance()); net->setBatchSize(batchSize); int inputNumElements = net->getLayer(0)->getOutputNumElements(); int outputNumElements = net->getLastLayer()->getOutputNumElements(); int weightsSize1 = net->getLayer(1)->getWeightsSize(); int weightsSize2 = net->getLayer(3)->getWeightsSize(); float *inputData = new float[std::max<int>(10000, inputNumElements)]; float *expectedOutput = new float[std::max<int>(10000, outputNumElements)]; memset(inputData, 0, sizeof(float) * std::max<int>(10000, inputNumElements)); memset(expectedOutput, 0, sizeof(float) * std::max<int>(10000, outputNumElements)); // int seed = 0; std::mt19937 random = WeightRandomizer::randomize(inputData, std::max<int>(10000, inputNumElements), -2.0f, 2.0f); WeightRandomizer::randomize(random, expectedOutput, std::max<int>(10000, outputNumElements), -2.0f, 2.0f); WeightRandomizer::randomize(random, dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weights, weightsSize1, -2.0f, 2.0f); dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weightsWrapper->copyToDevice(); WeightRandomizer::randomize(random, dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weights, weightsSize2, -2.0f, 2.0f); dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weightsWrapper->copyToDevice(); SGD *sgd = SGD::instance(cl, learningRate, 0.0f); for(int it = 0; it < its; it++) { float *weightsBefore1 = new float[weightsSize1]; float *currentWeights = net->getLayer(1)->getWeights(); for(int i = 0; i < weightsSize1; i++) { weightsBefore1[i] = currentWeights[i]; } float *weightsBefore2 = new float[weightsSize2]; currentWeights = net->getLayer(3)->getWeights(); for(int i = 0; i < weightsSize2; i++) { weightsBefore2[i] = currentWeights[i]; } net->forward(inputData); // net->print(); float loss = net->calcLoss(expectedOutput); dynamic_cast<LossLayer*>(net->getLayer(5))->calcLoss(expectedOutput); // net->backward(expectedOutput); TrainingContext context(0, 0); sgd->train(net, &context, inputData, expectedOutput); dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weightsWrapper->copyToHost(); // restore 2nd layer weights :-) for(int i = 0; i < weightsSize2; i++) { // dynamic_cast<ConvolutionalLayer*>(net->getLayer(2))->weights[i] = weightsBefore2[i]; } dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weightsWrapper->copyToDevice(); net->forward(inputData); float loss2 = net->calcLoss(expectedOutput); float lossChange = loss - loss2; cout << " loss " << loss << " loss2 " << loss2 << " change: " << lossChange << endl; float *newWeights = net->getLayer(1)->getWeights(); float sumWeightDiff = 0; float sumWeightDiffSquared = 0; for(int i = 0; i < weightsSize1; i++) { float diff = newWeights[i] - weightsBefore1[i]; sumWeightDiff += diff; sumWeightDiffSquared += diff * diff; } newWeights = net->getLayer(3)->getWeights(); for(int i = 0; i < weightsSize2; i++) { float diff = newWeights[i] - weightsBefore2[i]; sumWeightDiff += diff; sumWeightDiffSquared += diff * diff; } cout << "sumweightsdiff " << sumWeightDiff << endl; // cout << "sumweightsdiff / learningrate " << (sumWeightDiff / learningRate) << endl; // cout << "sum weightsdiffsquared " << (sumWeightDiffSquared/ learningRate / learningRate * imageSize) << endl; float estimatedLossChangeFromW = sumWeightDiffSquared/ learningRate; // / filterSize; cout << " loss change " << lossChange << endl; cout << " estimatedLossChangeFromW " << estimatedLossChangeFromW << endl; // cout << abs(estimatedLossChangeFromW - lossChange) / lossChange << endl; // cout << abs(estimatedLossChangeFromW - lossChange) / estimatedLossChangeFromW << endl; EXPECT_GT(0.01f * imageSize * imageSize, abs(estimatedLossChangeFromW - lossChange) / lossChange); EXPECT_GT(0.01f * imageSize * imageSize, abs(estimatedLossChangeFromW - lossChange) / estimatedLossChangeFromW); delete[] weightsBefore1; delete[] weightsBefore2; } // delete[] weights1; // delete[] errors; // delete[] output; delete sgd; delete[] inputData; delete[] expectedOutput; delete net; delete cl; }
//layer2 plane0=0 "planes not both -1 and planes not both 1" // weights = plane0*(-1) + plane1*(-1) // plane1=1 "planes both -1 or planes both 1" // weights = plane0*(1) + plane1*(1) TEST( testlogicaloperators, Convolve_2layers_relu_Xor ) { cout << "Xor, convolve" << endl; // LogicalDataCreator ldc(new TanhActivation()); // ldc.applyXorGate(); // int imageSize = 1; // int inPlanes = 2; int numExamples = 4; // int filterSize = 1; float data[] = { -1, -1, -1, 1, 1, -1, 1, 1 }; float layer1weights[] = { // going to preset these, to near an optimal solution, // and at least show the network is stable, and gives the correct -0.4f,-0.55f, // result... 0.52f, 0.53f, }; float layer1bias[] = { 0.1f, -0.1f }; float layer2weights[] = { 1.1f, 0.9f, -0.8f, -1.2f }; float layer2bias[] = { 0.1f, 1.1 }; float expectedOutput[] = { 1, 0, 0, 1, 0, 1, 1, 0 }; int labels[] = { 0, 1, 1, 0 }; EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = NeuralNet::maker(cl)->planes(2)->imageSize(1)->instance(); net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(1) ); net->addLayer( ActivationMaker::instance()->relu() ); net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(1) ); net->addLayer( ActivationMaker::instance()->relu() ); net->addLayer( SquareLossMaker::instance() );; cout << "hand-setting weights..." << endl; net->initWeights( 1, layer1weights, layer1bias ); net->initWeights( 3, layer2weights, layer2bias ); // net->printWeights(); // net->setBatchSize(4); // net->forward( data ); // net->print(); SGD *sgd = SGD::instance( cl, 0.1f, 0 ); for( int epoch = 0; epoch < 200; epoch++ ) { net->epochMaker(sgd)->batchSize(numExamples)->numExamples(numExamples)->inputData(data) ->expectedOutputs(expectedOutput)->run( epoch ); if( epoch % 5 == 0 ) cout << "Loss L " << net->calcLoss(expectedOutput) << endl; } net->print(); AccuracyHelper::printAccuracy( numExamples, 2, labels, net->getOutput() ); float loss = net->calcLoss(expectedOutput); cout << "loss, E, " << loss << endl; EXPECT_GE( 0.0000001f, loss ); delete sgd; delete net; delete cl; }
TEST(testbackward, squareloss) { // here's the plan: // generate some input, randomly // generate some expected output, randomly // forward propagate // calculate loss // calculate gradInput // change some of the inputs, forward prop, recalculate loss, check corresponds // to the gradient EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = new NeuralNet(cl, 3, 5); net->addLayer(ForceBackpropLayerMaker::instance()); net->addLayer(SquareLossMaker::instance()); cout << net->asString() << endl; int batchSize = 32; net->setBatchSize(batchSize); int inputCubeSize = net->getInputCubeSize(); int outputCubeSize = net->getOutputCubeSize(); int inputTotalSize = inputCubeSize * batchSize; int outputTotalSize = outputCubeSize * batchSize; cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl; float *input = new float[inputTotalSize]; float *expectedOutput = new float[outputTotalSize]; WeightRandomizer::randomize(0, input, inputTotalSize, -2.0f, 2.0f); WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, -2.0f, 2.0f); // now, forward prop // net->input(input); net->forward(input); net->print(); // net->printOutput(); // calculate loss float lossBefore = net->calcLoss(expectedOutput); // calculate gradInput net->backward(expectedOutput); // modify input slightly mt19937 random; const int numSamples = 10; for(int i = 0; i < numSamples; i++) { int inputIndex; WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize); // cout << "i=" << i << " index " << inputIndex << endl; float oldValue = input[inputIndex]; // grad for this index is.... float grad = net->getLayer(2)->getGradInput()[inputIndex]; // cout << "grad=" << grad << endl; // tweak slightly float newValue = oldValue * 1.01f; float inputDelta = newValue - oldValue; float predictedLossChange = inputDelta * grad; input[inputIndex] = newValue; // cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl; // forwardProp net->forward(input); input[inputIndex] = oldValue; // net->printOutput(); float lossAfter = net->calcLoss(expectedOutput); float lossChange = lossAfter - lossBefore; cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl; } delete[] expectedOutput; delete[] input; delete net; delete cl; }