TEST( testforward, softmax_byplane ) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = NeuralNet::maker(cl)->imageSize(2)->planes(1)->instance(); net->addLayer( SoftMaxMaker::instance()->perPlane() ); net->setBatchSize( 1 ); int imageSizeSquared = net->getLayer(0)->getOutputSize() * net->getLayer(0)->getOutputSize(); float *input = new float[imageSizeSquared]; input[0] = 0; input[1] = 1; input[2] = 3; input[3] = 2; net->forward( input ); float const*output = net->getOutput(); float sum = 0; for( int i = 0; i < imageSizeSquared; i++ ) { cout << "output[" << i << "]=" << output[i] << endl; sum += output[i]; EXPECT_LE( 0, output[i] ); EXPECT_GE( 1, output[i] ); } EXPECT_FLOAT_NEAR( 1.0f, sum ); EXPECT_FLOAT_NEAR( (float)( exp(0.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[0] ); EXPECT_FLOAT_NEAR( (float)( exp(1.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[1] ); EXPECT_FLOAT_NEAR( (float)( exp(3.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[2] ); EXPECT_FLOAT_NEAR( (float)( exp(2.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[3] ); float *expected = new float[imageSizeSquared]; memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[2] = 1; float loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(output[2]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[0] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(output[0]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[1] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(output[1]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[3] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(output[3]), loss ); delete[] input; delete[] expected; delete net; delete cl; }
TEST( testlogicaloperators, Convolve_1layerbiased_Or ) { cout << "Or, convolve" << endl; LogicalDataCreator ldc; ldc.applyOrGate(); EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = NeuralNet::maker(cl)->planes(2)->imageSize(1)->instance(); net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(1) ); net->addLayer( SquareLossMaker::instance() );; SGD *sgd = SGD::instance( cl, 0.1f, 0 ); for( int epoch = 0; epoch < 20; epoch++ ) { net->epochMaker(sgd)->batchSize(4)->numExamples(4)->inputData(ldc.data) ->expectedOutputs(ldc.expectedOutput)->run( epoch ); if( epoch % 5 == 0 ) cout << "Loss L " << net->calcLoss(ldc.expectedOutput) << endl; // AccuracyHelper::printAccuracy( ldc.N, 2, ldc.labels, net->getOutput() ); // net->printWeights(); } // net->print(); AccuracyHelper::printAccuracy( ldc.N, 2, ldc.labels, net->getOutput() ); float loss = net->calcLoss(ldc.expectedOutput); cout << "loss, E, " << loss << endl; EXPECT_GE( 0.4f, loss ); delete sgd; delete net; delete cl; }
TEST( testpropagate, softmax_byplane ) { NeuralNet *net = NeuralNet::maker()->imageSize(2)->planes(1)->instance(); net->addLayer( SoftMaxMaker::instance()->perPlane() ); net->setBatchSize( 1 ); int imageSizeSquared = net->layers[0]->getOutputImageSize() * net->layers[0]->getOutputImageSize(); float *input = new float[imageSizeSquared]; input[0] = 0; input[1] = 1; input[2] = 3; input[3] = 2; net->propagate( input ); float const*results = net->getResults(); float sum = 0; for( int i = 0; i < imageSizeSquared; i++ ) { cout << "results[" << i << "]=" << results[i] << endl; sum += results[i]; EXPECT_LE( 0, results[i] ); EXPECT_GE( 1, results[i] ); } EXPECT_FLOAT_NEAR( 1.0f, sum ); EXPECT_FLOAT_NEAR( (float)( exp(0.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[0] ); EXPECT_FLOAT_NEAR( (float)( exp(1.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[1] ); EXPECT_FLOAT_NEAR( (float)( exp(3.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[2] ); EXPECT_FLOAT_NEAR( (float)( exp(2.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[3] ); float *expected = new float[imageSizeSquared]; memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[2] = 1; float loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(results[2]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[0] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(results[0]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[1] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(results[1]), loss ); memset( expected, 0, sizeof(float) * imageSizeSquared ); expected[3] = 1; loss = net->calcLoss( expected ); cout << "loss " << loss << endl; EXPECT_LT( 0, loss ); EXPECT_FLOAT_NEAR( - log(results[3]), loss ); delete[] input; delete[] expected; delete net; }
TEST( testlogicaloperators, DISABLED_Convolve_1layer_And_Nobias ) { cout << "And" << endl; LogicalDataCreator ldc; ldc.applyAndGate(); EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = NeuralNet::maker(cl)->planes(2)->imageSize(1)->instance(); net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(0) ); SGD *sgd = SGD::instance( cl, 4.0f, 0 ); for( int epoch = 0; epoch < 20; epoch++ ) { net->epochMaker(sgd)->batchSize(4)->numExamples(4)->inputData(ldc.data) ->expectedOutputs(ldc.expectedOutput)->run(epoch); cout << "Loss L " << net->calcLoss(ldc.expectedOutput) << endl; // net->printWeights(); } // net->print(); int numCorrect = AccuracyHelper::calcNumRight( ldc.N, 2, ldc.labels, net->getOutput() ); cout << "accuracy: " << numCorrect << "/" << ldc.N << endl; EXPECT_EQ( numCorrect, ldc.N ); delete sgd; delete net; delete cl; }
//layer2 plane0=0 "planes not both -1 and planes not both 1" // weights = plane0*(-1) + plane1*(-1) // plane1=1 "planes both -1 or planes both 1" // weights = plane0*(1) + plane1*(1) TEST( testlogicaloperators, Convolve_2layers_relu_Xor ) { cout << "Xor, convolve" << endl; // LogicalDataCreator ldc(new TanhActivation()); // ldc.applyXorGate(); // int imageSize = 1; // int inPlanes = 2; int numExamples = 4; // int filterSize = 1; float data[] = { -1, -1, -1, 1, 1, -1, 1, 1 }; float layer1weights[] = { // going to preset these, to near an optimal solution, // and at least show the network is stable, and gives the correct -0.4f,-0.55f, // result... 0.52f, 0.53f, }; float layer1bias[] = { 0.1f, -0.1f }; float layer2weights[] = { 1.1f, 0.9f, -0.8f, -1.2f }; float layer2bias[] = { 0.1f, 1.1 }; float expectedOutput[] = { 1, 0, 0, 1, 0, 1, 1, 0 }; int labels[] = { 0, 1, 1, 0 }; EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = NeuralNet::maker(cl)->planes(2)->imageSize(1)->instance(); net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(1) ); net->addLayer( ActivationMaker::instance()->relu() ); net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(1) ); net->addLayer( ActivationMaker::instance()->relu() ); net->addLayer( SquareLossMaker::instance() );; cout << "hand-setting weights..." << endl; net->initWeights( 1, layer1weights, layer1bias ); net->initWeights( 3, layer2weights, layer2bias ); // net->printWeights(); // net->setBatchSize(4); // net->forward( data ); // net->print(); SGD *sgd = SGD::instance( cl, 0.1f, 0 ); for( int epoch = 0; epoch < 200; epoch++ ) { net->epochMaker(sgd)->batchSize(numExamples)->numExamples(numExamples)->inputData(data) ->expectedOutputs(expectedOutput)->run( epoch ); if( epoch % 5 == 0 ) cout << "Loss L " << net->calcLoss(expectedOutput) << endl; } net->print(); AccuracyHelper::printAccuracy( numExamples, 2, labels, net->getOutput() ); float loss = net->calcLoss(expectedOutput); cout << "loss, E, " << loss << endl; EXPECT_GE( 0.0000001f, loss ); delete sgd; delete net; delete cl; }
void testNumerically(float learningRate, int batchSize, int imageSize, int filterSize, int numPlanes, ActivationFunction *fn, bool padZeros, int its = 20) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); ClBlasInstance clblasInstance; NeuralNet *net = NeuralNet::maker(cl)->planes(numPlanes)->imageSize(imageSize)->instance(); net->addLayer(ConvolutionalMaker::instance()->numFilters(1)->filterSize(filterSize)->biased(0)->padZeros(padZeros)); net->addLayer(ActivationMaker::instance()->fn(fn)); net->addLayer(ConvolutionalMaker::instance()->numFilters(1)->filterSize(filterSize)->biased(0)->padZeros(padZeros)); net->addLayer(ActivationMaker::instance()->fn(fn)); net->addLayer(SquareLossMaker::instance()); net->setBatchSize(batchSize); int inputNumElements = net->getLayer(0)->getOutputNumElements(); int outputNumElements = net->getLastLayer()->getOutputNumElements(); int weightsSize1 = net->getLayer(1)->getWeightsSize(); int weightsSize2 = net->getLayer(3)->getWeightsSize(); float *inputData = new float[std::max<int>(10000, inputNumElements)]; float *expectedOutput = new float[std::max<int>(10000, outputNumElements)]; memset(inputData, 0, sizeof(float) * std::max<int>(10000, inputNumElements)); memset(expectedOutput, 0, sizeof(float) * std::max<int>(10000, outputNumElements)); // int seed = 0; std::mt19937 random = WeightRandomizer::randomize(inputData, std::max<int>(10000, inputNumElements), -2.0f, 2.0f); WeightRandomizer::randomize(random, expectedOutput, std::max<int>(10000, outputNumElements), -2.0f, 2.0f); WeightRandomizer::randomize(random, dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weights, weightsSize1, -2.0f, 2.0f); dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weightsWrapper->copyToDevice(); WeightRandomizer::randomize(random, dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weights, weightsSize2, -2.0f, 2.0f); dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weightsWrapper->copyToDevice(); SGD *sgd = SGD::instance(cl, learningRate, 0.0f); for(int it = 0; it < its; it++) { float *weightsBefore1 = new float[weightsSize1]; float *currentWeights = net->getLayer(1)->getWeights(); for(int i = 0; i < weightsSize1; i++) { weightsBefore1[i] = currentWeights[i]; } float *weightsBefore2 = new float[weightsSize2]; currentWeights = net->getLayer(3)->getWeights(); for(int i = 0; i < weightsSize2; i++) { weightsBefore2[i] = currentWeights[i]; } net->forward(inputData); // net->print(); float loss = net->calcLoss(expectedOutput); dynamic_cast<LossLayer*>(net->getLayer(5))->calcLoss(expectedOutput); // net->backward(expectedOutput); TrainingContext context(0, 0); sgd->train(net, &context, inputData, expectedOutput); dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weightsWrapper->copyToHost(); // restore 2nd layer weights :-) for(int i = 0; i < weightsSize2; i++) { // dynamic_cast<ConvolutionalLayer*>(net->getLayer(2))->weights[i] = weightsBefore2[i]; } dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weightsWrapper->copyToDevice(); net->forward(inputData); float loss2 = net->calcLoss(expectedOutput); float lossChange = loss - loss2; cout << " loss " << loss << " loss2 " << loss2 << " change: " << lossChange << endl; float *newWeights = net->getLayer(1)->getWeights(); float sumWeightDiff = 0; float sumWeightDiffSquared = 0; for(int i = 0; i < weightsSize1; i++) { float diff = newWeights[i] - weightsBefore1[i]; sumWeightDiff += diff; sumWeightDiffSquared += diff * diff; } newWeights = net->getLayer(3)->getWeights(); for(int i = 0; i < weightsSize2; i++) { float diff = newWeights[i] - weightsBefore2[i]; sumWeightDiff += diff; sumWeightDiffSquared += diff * diff; } cout << "sumweightsdiff " << sumWeightDiff << endl; // cout << "sumweightsdiff / learningrate " << (sumWeightDiff / learningRate) << endl; // cout << "sum weightsdiffsquared " << (sumWeightDiffSquared/ learningRate / learningRate * imageSize) << endl; float estimatedLossChangeFromW = sumWeightDiffSquared/ learningRate; // / filterSize; cout << " loss change " << lossChange << endl; cout << " estimatedLossChangeFromW " << estimatedLossChangeFromW << endl; // cout << abs(estimatedLossChangeFromW - lossChange) / lossChange << endl; // cout << abs(estimatedLossChangeFromW - lossChange) / estimatedLossChangeFromW << endl; EXPECT_GT(0.01f * imageSize * imageSize, abs(estimatedLossChangeFromW - lossChange) / lossChange); EXPECT_GT(0.01f * imageSize * imageSize, abs(estimatedLossChangeFromW - lossChange) / estimatedLossChangeFromW); delete[] weightsBefore1; delete[] weightsBefore2; } // delete[] weights1; // delete[] errors; // delete[] output; delete sgd; delete[] inputData; delete[] expectedOutput; delete net; delete cl; }
TEST(testbackward, squareloss) { // here's the plan: // generate some input, randomly // generate some expected output, randomly // forward propagate // calculate loss // calculate gradInput // change some of the inputs, forward prop, recalculate loss, check corresponds // to the gradient EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = new NeuralNet(cl, 3, 5); net->addLayer(ForceBackpropLayerMaker::instance()); net->addLayer(SquareLossMaker::instance()); cout << net->asString() << endl; int batchSize = 32; net->setBatchSize(batchSize); int inputCubeSize = net->getInputCubeSize(); int outputCubeSize = net->getOutputCubeSize(); int inputTotalSize = inputCubeSize * batchSize; int outputTotalSize = outputCubeSize * batchSize; cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl; float *input = new float[inputTotalSize]; float *expectedOutput = new float[outputTotalSize]; WeightRandomizer::randomize(0, input, inputTotalSize, -2.0f, 2.0f); WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, -2.0f, 2.0f); // now, forward prop // net->input(input); net->forward(input); net->print(); // net->printOutput(); // calculate loss float lossBefore = net->calcLoss(expectedOutput); // calculate gradInput net->backward(expectedOutput); // modify input slightly mt19937 random; const int numSamples = 10; for(int i = 0; i < numSamples; i++) { int inputIndex; WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize); // cout << "i=" << i << " index " << inputIndex << endl; float oldValue = input[inputIndex]; // grad for this index is.... float grad = net->getLayer(2)->getGradInput()[inputIndex]; // cout << "grad=" << grad << endl; // tweak slightly float newValue = oldValue * 1.01f; float inputDelta = newValue - oldValue; float predictedLossChange = inputDelta * grad; input[inputIndex] = newValue; // cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl; // forwardProp net->forward(input); input[inputIndex] = oldValue; // net->printOutput(); float lossAfter = net->calcLoss(expectedOutput); float lossChange = lossAfter - lossBefore; cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl; } delete[] expectedOutput; delete[] input; delete net; delete cl; }
TEST(testbackward, softmaxloss) { // here's the plan: // generate some input, randomly // generate some expected output, randomly // forward propagate // calculate loss // calculate gradInput // change some of the inputs, forward prop, recalculate loss, check corresponds // to the gradient EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); NeuralNet *net = new NeuralNet(cl, 5, 1); net->addLayer(ForceBackpropLayerMaker::instance()); net->addLayer(SoftMaxMaker::instance()); cout << net->asString() << endl; const int batchSize = 2; net->setBatchSize(batchSize); const int outputPlanes = net->getOutputPlanes(); int inputCubeSize = net->getInputCubeSize(); int outputCubeSize = net->getOutputCubeSize(); int inputTotalSize = inputCubeSize * batchSize; int outputTotalSize = outputCubeSize * batchSize; cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl; float *input = new float[inputTotalSize]; float *expectedOutput = new float[outputTotalSize]; WeightRandomizer::randomize(0, input, inputTotalSize, 0.0f, 1.0f); WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, 0.0f, 1.0f); // we should make the input and output a probability distribution I think // so: add up the input, and divide each by that. do same for expectedoutput (?) // normalizeAsProbabilityDistribution(input, inputTotalSize); normalizeAsProbabilityDistribution(outputPlanes, expectedOutput, outputTotalSize); // set all to zero, and one to 1, ie like labelled data // for(int i = 0; i < outputTotalSize; i++) { // expectedOutput[i] = 0; // } // for(int n = 0; n < batchSize; n++) { // int chosenLabel = 0; // WeightRandomizer::randomizeInts(n, &chosenLabel, 1, 0, net->getOutputPlanes()); // expectedOutput[ n * outputPlanes + chosenLabel ] = 1; // } // for(int i = 0; i < outputTotalSize; i++) { // cout << "expected[" << i << "]=" << expectedOutput[i] << endl; // } // // now, forward prop // net->input(input); net->forward(input); net->print(); // net->printOutput(); // calculate loss float lossBefore = net->calcLoss(expectedOutput); // calculate gradInput net->backward(expectedOutput); // modify input slightly mt19937 random; const int numSamples = 10; for(int i = 0; i < numSamples; i++) { int inputIndex; WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize); // cout << "i=" << i << " index " << inputIndex << endl; float oldValue = input[inputIndex]; // grad for this index is.... float grad = net->getLayer(2)->getGradInput()[inputIndex]; // cout << "grad=" << grad << endl; // tweak slightly float newValue = oldValue * 1.001f; float inputDelta = newValue - oldValue; float predictedLossChange = inputDelta * grad; input[inputIndex] = newValue; // cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl; // forwardProp net->forward(input); input[inputIndex] = oldValue; // net->printOutput(); float lossAfter = net->calcLoss(expectedOutput); float lossChange = lossAfter - lossBefore; cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl; } delete[] expectedOutput; delete[] input; delete net; delete cl; }