Exemple #1
0
TEST( testforward, softmax_byplane ) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = NeuralNet::maker(cl)->imageSize(2)->planes(1)->instance();
    net->addLayer( SoftMaxMaker::instance()->perPlane() );
    net->setBatchSize( 1 );
    int imageSizeSquared = net->getLayer(0)->getOutputSize() * net->getLayer(0)->getOutputSize();
    float *input = new float[imageSizeSquared];
    input[0] = 0;
    input[1] = 1;
    input[2] = 3;
    input[3] = 2;
    net->forward( input );
    float const*output = net->getOutput();
    float sum = 0;
    for( int i = 0; i < imageSizeSquared; i++ ) {
        cout << "output[" << i << "]=" << output[i] << endl;
        sum += output[i];
        EXPECT_LE( 0, output[i] );
        EXPECT_GE( 1, output[i] );
    }
    EXPECT_FLOAT_NEAR( 1.0f, sum );
    EXPECT_FLOAT_NEAR( (float)( exp(0.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[0] );
    EXPECT_FLOAT_NEAR( (float)( exp(1.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[1] );
    EXPECT_FLOAT_NEAR( (float)( exp(3.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[2] );
    EXPECT_FLOAT_NEAR( (float)( exp(2.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[3] );

    float *expected = new float[imageSizeSquared];
    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[2] = 1;
    float loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(output[2]), loss );

    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[0] = 1;
    loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(output[0]), loss );

    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[1] = 1;
    loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(output[1]), loss );

    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[3] = 1;
    loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(output[3]), loss );

    delete[] input;
    delete[] expected;
    delete net;
    delete cl;
}
void go(Config config) {
    Timer timer;

    int Ntrain;
    int Ntest;
    int numPlanes;
    int imageSize;

    float *trainData = 0;
    float *testData = 0;
    int *trainLabels = 0;
    int *testLabels = 0;

    int trainAllocateN = 0;
    int testAllocateN = 0;

//    int totalLinearSize;
    GenericLoader::getDimensions((config.dataDir + "/" + config.trainFile).c_str(), &Ntrain, &numPlanes, &imageSize);
    Ntrain = config.numTrain == -1 ? Ntrain : config.numTrain;
//    long allocateSize = (long)Ntrain * numPlanes * imageSize * imageSize;
    cout << "Ntrain " << Ntrain << " numPlanes " << numPlanes << " imageSize " << imageSize << endl;
    trainAllocateN = Ntrain;
    trainData = new float[ (long)trainAllocateN * numPlanes * imageSize * imageSize ];
    trainLabels = new int[trainAllocateN];
    if(Ntrain > 0) {
        GenericLoader::load((config.dataDir + "/" + config.trainFile).c_str(), trainData, trainLabels, 0, Ntrain);
    }

    GenericLoader::getDimensions((config.dataDir + "/" + config.validateFile).c_str(), &Ntest, &numPlanes, &imageSize);
    Ntest = config.numTest == -1 ? Ntest : config.numTest;
    testAllocateN = Ntest;
    testData = new float[ (long)testAllocateN * numPlanes * imageSize * imageSize ];
    testLabels = new int[testAllocateN]; 
    if(Ntest > 0) {
        GenericLoader::load((config.dataDir + "/" + config.validateFile).c_str(), testData, testLabels, 0, Ntest);
    }
    
    timer.timeCheck("after load images");

    const int inputCubeSize = numPlanes * imageSize * imageSize;
    float translate;
    float scale;
    int normalizationExamples = config.normalizationExamples > Ntrain ? Ntrain : config.normalizationExamples;
    if(config.normalization == "stddev") {
        float mean, stdDev;
        NormalizationHelper::getMeanAndStdDev(trainData, normalizationExamples * inputCubeSize, &mean, &stdDev);
        cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
        translate = - mean;
        scale = 1.0f / stdDev / config.normalizationNumStds;
    } else if(config.normalization == "maxmin") {
        float mean, stdDev;
        NormalizationHelper::getMinMax(trainData, normalizationExamples * inputCubeSize, &mean, &stdDev);
        translate = - mean;
        scale = 1.0f / stdDev;
    } else {
        cout << "Error: Unknown normalization: " << config.normalization << endl;
        return;
    }
    
    cout << " image norm translate " << translate << " scale " << scale << endl;
    timer.timeCheck("after getting stats");

//    const int numToTrain = Ntrain;
//    const int batchSize = config.batchSize;
    EasyCL *cl = new EasyCL();
    NeuralNet *net = new NeuralNet(cl);
//    net->inputMaker<unsigned char>()->numPlanes(numPlanes)->imageSize(imageSize)->insert();
    net->addLayer(InputLayerMaker::instance()->numPlanes(numPlanes)->imageSize(imageSize));
    net->addLayer(NormalizationLayerMaker::instance()->translate(translate)->scale(scale));
    if(!NetdefToNet::createNetFromNetdef(net, config.netDef)) {
        return;
    }
    net->print();
    for(int i = 1; i < net->getNumLayers() - 1; i++) {
        Layer *layer = net->getLayer(i);
        FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >(layer);
        ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >(layer);
        if(fc != 0) {
            conv = fc->convolutionalLayer;
        }
        if(conv == 0) {
            continue;
        }
        initrand.seed(0);
        int weightsSize = conv->getWeightsSize();
    //int weightsSize = layer->getPersistSize();
        if(weightsSize > 0) {
            cout << "weightsSize " << weightsSize << endl;
            float *weights = new float[weightsSize];
            for(int j = 0; j < weightsSize; j++) {
                int thisrand = (int)initrand();
                float thisweight = (thisrand % 100000) / 1000000.0f;
                weights[j] = thisweight;
            }        
            conv->initWeights(weights);
        }
        if(conv->dim.biased) {
            initrand.seed(0);
            int biasedSize = conv->getBiasSize();
            float *biasWeights = new float[biasedSize];
            for(int j = 0; j < biasedSize; j++) {
                int thisrand = (int)initrand();
                float thisweight = (thisrand % 100000) / 1000000.0f;
                biasWeights[j] = thisweight;
                //biasWeights[j] = 0;
            }        
            conv->initBias(biasWeights);
        }
    }

    cout << "weight samples before learning:" << endl;
    sampleWeights(net);

    bool afterRestart = false;
    int restartEpoch = 0;
//    int restartBatch = 0;
//    float restartAnnealedLearningRate = 0;
//    int restartNumRight = 0;
//    float restartLoss = 0;

    timer.timeCheck("before learning start");
    if(config.dumpTimings) {
        StatefulTimer::dump(true);
    }
    StatefulTimer::timeCheck("START");

    SGD *sgd = SGD::instance(cl, config.learningRate, 0.0f);
    Trainable *trainable = net;
    NetLearner netLearner(
        sgd, trainable,
        Ntrain, trainData, trainLabels,
        Ntest, testData, testLabels,
        config.batchSize);
    netLearner.setSchedule(config.numEpochs, afterRestart ? restartEpoch : 1);
//    netLearner.setBatchSize(config.batchSize);
    netLearner.setDumpTimings(config.dumpTimings);
//    netLearner.learn(config.learningRate, 1.0f);

    cout << "forward output" << endl;
    for(int layerId = 0; layerId < net->getNumLayers(); layerId++) {
        Layer *layer = net->getLayer(layerId);
        FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >(layer);
        ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >(layer);
        PoolingLayer *pool = dynamic_cast< PoolingLayer * >(layer);
        SoftMaxLayer *softMax = dynamic_cast< SoftMaxLayer * >(layer);
        if(fc != 0) {
            conv = fc->convolutionalLayer;
        }
        int planes = 0;
        int imageSize = 0;
        if(conv != 0) {
            cout << "convolutional (or conv based, ie fc)" << endl;
            planes = conv->dim.numFilters;
            imageSize = conv->dim.outputSize;
          //  continue;
        } else if(pool != 0) {
            cout << "pooling" << endl;
            planes = pool->numPlanes;
            imageSize = pool->outputSize;
        } else if(softMax != 0) {
            cout << "softmax" << endl;
            planes = softMax->numPlanes;
            imageSize = softMax->imageSize;
        } else {
            continue;
        }
        cout << "layer " << layerId << endl;
//        conv->getOutput();
        float const*output = layer->getOutput();
//        for(int i = 0; i < 3; i++) {
//            cout << conv->getOutput()[i] << endl;
//        }
        initrand.seed(0);
//        LayerDimensions &dim = conv->dim;
        for(int i = 0; i < 10; i++) {
            int thisrand = abs((int)initrand());
            int seq = thisrand % (planes * imageSize * imageSize);
            int outPlane = seq / (imageSize * imageSize);
            int rowcol = seq % (imageSize * imageSize);
            int row = rowcol / imageSize;
            int col = rowcol % imageSize;
            cout << "out[" << outPlane << "," << row << "," << col << "]=" << output[ seq ] << endl;
        }
    }

    cout << "weight samples after learning:" << endl;
    sampleWeights(net);

    cout << "backprop output" << endl;
    for(int layerId = net->getNumLayers() - 1; layerId >= 0; layerId--) {
        Layer *layer = net->getLayer(layerId);
        FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >(layer);
        ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >(layer);
        if(fc != 0) {
            conv = fc->convolutionalLayer;
        }
        if(conv == 0) {
            continue;
        }

        cout << "layer " << layerId << endl;
        float const*weights = conv->getWeights();
        float const*biases = conv->getBias();
        int weightsSize = conv->getWeightsSize() / conv->dim.numFilters;
        for(int i = 0; i < weightsSize; i++) {
            cout << " weight " << i << " " << weights[i] << endl;
        }
        for(int i = 0; i < 3; i++) {
            cout << " bias " << i << " " << biases[i] << endl;
        }
    }
    cout << "done" << endl;

    delete sgd;
    delete net;
    delete cl;

    if(trainData != 0) {
        delete[] trainData;
    }
    if(testData != 0) {
        delete[] testData;
    }
    if(testLabels != 0) {
        delete[] testLabels;
    }
    if(trainLabels != 0) {
        delete[] trainLabels;
    }
}
Exemple #3
0
TEST(testbackward, squareloss) {
    // here's the plan:
    // generate some input, randomly
    // generate some expected output, randomly
    // forward propagate
    // calculate loss
    // calculate gradInput
    // change some of the inputs, forward prop, recalculate loss, check corresponds
    // to the gradient
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = new NeuralNet(cl, 3, 5);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(SquareLossMaker::instance());
    cout << net->asString() << endl;

    int batchSize = 32;
    net->setBatchSize(batchSize);

    int inputCubeSize = net->getInputCubeSize();
    int outputCubeSize = net->getOutputCubeSize();

    int inputTotalSize = inputCubeSize * batchSize;
    int outputTotalSize = outputCubeSize * batchSize;

    cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl;

    float *input = new float[inputTotalSize];
    float *expectedOutput = new float[outputTotalSize];

    WeightRandomizer::randomize(0, input, inputTotalSize, -2.0f, 2.0f);
    WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, -2.0f, 2.0f);
    
    // now, forward prop
//    net->input(input);
    net->forward(input);
    net->print();
//    net->printOutput();

    // calculate loss
    float lossBefore = net->calcLoss(expectedOutput);

    // calculate gradInput
    net->backward(expectedOutput);

    // modify input slightly
    mt19937 random;
    const int numSamples = 10;
    for(int i = 0; i < numSamples; i++) {
        int inputIndex;
        WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize);
//        cout << "i=" << i << " index " << inputIndex << endl;
        float oldValue = input[inputIndex];
        // grad for this index is....
        float grad = net->getLayer(2)->getGradInput()[inputIndex];
//        cout << "grad=" << grad << endl;
        // tweak slightly
        float newValue = oldValue * 1.01f;
        float inputDelta = newValue - oldValue;
        float predictedLossChange = inputDelta * grad;
        input[inputIndex] = newValue;
//        cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl;
        // forwardProp
        net->forward(input);
        input[inputIndex] = oldValue;
//        net->printOutput();
        float lossAfter = net->calcLoss(expectedOutput);
        float lossChange = lossAfter - lossBefore;
        cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl;
    }

    delete[] expectedOutput;
    delete[] input;

    delete net;
    delete cl;
}
Exemple #4
0
void testNumerically(float learningRate, int batchSize, int imageSize, int filterSize, int numPlanes, ActivationFunction *fn, bool padZeros, int its = 20) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    ClBlasInstance clblasInstance;
    NeuralNet *net = NeuralNet::maker(cl)->planes(numPlanes)->imageSize(imageSize)->instance();
    net->addLayer(ConvolutionalMaker::instance()->numFilters(1)->filterSize(filterSize)->biased(0)->padZeros(padZeros));
    net->addLayer(ActivationMaker::instance()->fn(fn));
    net->addLayer(ConvolutionalMaker::instance()->numFilters(1)->filterSize(filterSize)->biased(0)->padZeros(padZeros));
    net->addLayer(ActivationMaker::instance()->fn(fn));
    net->addLayer(SquareLossMaker::instance());
    net->setBatchSize(batchSize);

    int inputNumElements = net->getLayer(0)->getOutputNumElements();
    int outputNumElements = net->getLastLayer()->getOutputNumElements();
    int weightsSize1 = net->getLayer(1)->getWeightsSize();
    int weightsSize2 = net->getLayer(3)->getWeightsSize();

    float *inputData = new float[std::max<int>(10000, inputNumElements)];
    float *expectedOutput = new float[std::max<int>(10000, outputNumElements)];
    memset(inputData, 0, sizeof(float) * std::max<int>(10000, inputNumElements));
    memset(expectedOutput, 0, sizeof(float) * std::max<int>(10000, outputNumElements));
//    int seed = 0;
    std::mt19937 random = WeightRandomizer::randomize(inputData, std::max<int>(10000, inputNumElements), -2.0f, 2.0f);
    WeightRandomizer::randomize(random, expectedOutput, std::max<int>(10000, outputNumElements), -2.0f, 2.0f);
    WeightRandomizer::randomize(random, dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weights, weightsSize1, -2.0f, 2.0f);
    dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weightsWrapper->copyToDevice();
    WeightRandomizer::randomize(random, dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weights, weightsSize2, -2.0f, 2.0f);
    dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weightsWrapper->copyToDevice();

    SGD *sgd = SGD::instance(cl, learningRate, 0.0f);
    for(int it = 0; it < its; it++) {
        float *weightsBefore1 = new float[weightsSize1];
        float *currentWeights = net->getLayer(1)->getWeights();
        for(int i = 0; i < weightsSize1; i++) {
            weightsBefore1[i] = currentWeights[i];
        }
        float *weightsBefore2 = new float[weightsSize2];
        currentWeights = net->getLayer(3)->getWeights();
        for(int i = 0; i < weightsSize2; i++) {
            weightsBefore2[i] = currentWeights[i];
        }

        net->forward(inputData);
    //    net->print();
        float loss = net->calcLoss(expectedOutput);
        dynamic_cast<LossLayer*>(net->getLayer(5))->calcLoss(expectedOutput);
//        net->backward(expectedOutput);
        TrainingContext context(0, 0);
        sgd->train(net, &context, inputData, expectedOutput);
        dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weightsWrapper->copyToHost();
        // restore 2nd layer weights :-)
        for(int i = 0; i < weightsSize2; i++) {
//            dynamic_cast<ConvolutionalLayer*>(net->getLayer(2))->weights[i] = weightsBefore2[i];
        }
        dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weightsWrapper->copyToDevice();
        net->forward(inputData);

        float loss2 = net->calcLoss(expectedOutput);
        float lossChange = loss - loss2;
        cout << " loss " << loss << " loss2 " << loss2 << " change: " << lossChange << endl;

        float *newWeights = net->getLayer(1)->getWeights();
        float sumWeightDiff = 0;
        float sumWeightDiffSquared = 0;
        for(int i = 0; i < weightsSize1; i++) {
            float diff = newWeights[i] - weightsBefore1[i];
            sumWeightDiff += diff;
            sumWeightDiffSquared += diff * diff;
        }
        newWeights = net->getLayer(3)->getWeights();
        for(int i = 0; i < weightsSize2; i++) {
            float diff = newWeights[i] - weightsBefore2[i];
            sumWeightDiff += diff;
            sumWeightDiffSquared += diff * diff;
        }
        cout << "sumweightsdiff " << sumWeightDiff << endl;
    //    cout << "sumweightsdiff / learningrate " << (sumWeightDiff / learningRate) << endl;
    //    cout << "sum weightsdiffsquared " << (sumWeightDiffSquared/ learningRate / learningRate * imageSize) << endl;

        float estimatedLossChangeFromW = sumWeightDiffSquared/ learningRate; // / filterSize;

        cout << " loss change              " << lossChange << endl;
        cout << " estimatedLossChangeFromW " << estimatedLossChangeFromW << endl;
    //    cout << abs(estimatedLossChangeFromW - lossChange) / lossChange << endl;    
    //    cout << abs(estimatedLossChangeFromW - lossChange) / estimatedLossChangeFromW << endl;    
        EXPECT_GT(0.01f * imageSize * imageSize, abs(estimatedLossChangeFromW - lossChange) / lossChange); 
        EXPECT_GT(0.01f * imageSize * imageSize, abs(estimatedLossChangeFromW - lossChange) / estimatedLossChangeFromW); 
        delete[] weightsBefore1;
        delete[] weightsBefore2;
    }
//    delete[] weights1;
//    delete[] errors;
//    delete[] output;
    delete sgd;
    delete[] inputData;
    delete[] expectedOutput;
    delete net;
    delete cl;
}
Exemple #5
0
TEST(testbackward, softmaxloss) {
    // here's the plan:
    // generate some input, randomly
    // generate some expected output, randomly
    // forward propagate
    // calculate loss
    // calculate gradInput
    // change some of the inputs, forward prop, recalculate loss, check corresponds
    // to the gradient
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = new NeuralNet(cl, 5, 1);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(SoftMaxMaker::instance());
    cout << net->asString() << endl;

    const int batchSize = 2;
    net->setBatchSize(batchSize);
    const int outputPlanes = net->getOutputPlanes();

    int inputCubeSize = net->getInputCubeSize();
    int outputCubeSize = net->getOutputCubeSize();

    int inputTotalSize = inputCubeSize * batchSize;
    int outputTotalSize = outputCubeSize * batchSize;

    cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl;

    float *input = new float[inputTotalSize];
    float *expectedOutput = new float[outputTotalSize];

    WeightRandomizer::randomize(0, input, inputTotalSize, 0.0f, 1.0f);
    WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, 0.0f, 1.0f);

    // we should make the input and output a probability distribution I think
    // so: add up the input, and divide each by that.  do same for expectedoutput (?)
//    normalizeAsProbabilityDistribution(input, inputTotalSize);
    normalizeAsProbabilityDistribution(outputPlanes, expectedOutput, outputTotalSize);

    // set all to zero, and one to 1, ie like labelled data
//    for(int i = 0; i < outputTotalSize; i++) {
//        expectedOutput[i] = 0;
//    }
//    for(int n = 0; n < batchSize; n++) {
//        int chosenLabel = 0;
//        WeightRandomizer::randomizeInts(n, &chosenLabel, 1, 0, net->getOutputPlanes());
//        expectedOutput[ n * outputPlanes + chosenLabel ] = 1;
//    }
//    for(int i = 0; i < outputTotalSize; i++) {
//        cout << "expected[" << i << "]=" << expectedOutput[i] << endl;
//    }
//        
    // now, forward prop
//    net->input(input);
    net->forward(input);
    net->print();
//    net->printOutput();

    // calculate loss
    float lossBefore = net->calcLoss(expectedOutput);

    // calculate gradInput
    net->backward(expectedOutput);

    // modify input slightly
    mt19937 random;
    const int numSamples = 10;
    for(int i = 0; i < numSamples; i++) {
        int inputIndex;
        WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize);
//        cout << "i=" << i << " index " << inputIndex << endl;
        float oldValue = input[inputIndex];
        // grad for this index is....
        float grad = net->getLayer(2)->getGradInput()[inputIndex];
//        cout << "grad=" << grad << endl;
        // tweak slightly
        float newValue = oldValue * 1.001f;
        float inputDelta = newValue - oldValue;
        float predictedLossChange = inputDelta * grad;
        input[inputIndex] = newValue;
//        cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl;
        // forwardProp
        net->forward(input);
        input[inputIndex] = oldValue;
//        net->printOutput();
        float lossAfter = net->calcLoss(expectedOutput);
        float lossChange = lossAfter - lossBefore;
        cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl;
    }

    delete[] expectedOutput;
    delete[] input;

    delete net;
    delete cl;
}