Exemple #1
0
NeuralNet *NeuralNet::clone() {
    NeuralNet *copy = new NeuralNet(cl);
    for(vector<Layer *>::iterator it = layers.begin(); it != layers.end(); it++) {
        LayerMaker2 *maker = (*it)->maker;

        LayerMaker2 *makerCopy = maker->clone();
        copy->addLayer(makerCopy);
    }
    copy->print();
    cout << "outputimagesize: " << copy->getOutputSize() << endl;
    return copy;
}
Exemple #2
0
int main(int argc, char *argv[])
{
    std::vector<std::pair<FloatVector, FloatVector>> rel;
    
    for(float x = 1; x < 10; x += 1) {
        for(float y = 1; y < 10; y += 1) {
            FloatVector point;
            point.push_back(x);
            point.push_back(y);
            point.push_back(1);

            FloatVector res;
            res.push_back(x);
            res.push_back(y + 3);
            res.push_back(1);

            rel.push_back(std::make_pair(point, res));
        }
    }

    Relation<FloatVector, FloatVector> relation(rel);

    std::vector<unsigned int> structure = { 3, 3};

    NeuralNet* nn = new NeuralNet(relation, structure);
    nn->status();
    nn->print();
    //nn->print();
    Gecode::BAB<NeuralNet> se(nn);
    Gecode::Gist::Print<NeuralNet> p("Print solution");
    Gecode::Gist::Options o;
    o.inspect.click(&p);
    Gecode::Gist::bab(nn, o);
    delete nn;

    /*
    if(NeuralNet* nn1 = se.next()) {
        nn1->print();
        delete nn1;
    }*/

    return 0;
}
Exemple #3
0
int main( int argc, char *argv[] ) {
//    ScenarioImage scenario;

    ScenarioImage *scenario = new ScenarioImage( 5, true);

    EasyCL *cl = new EasyCL();
    NeuralNet *net = new NeuralNet( cl );
    SGD *sgd = SGD::instance( cl, 0.1f, 0.0f );

    const int size = scenario->getPerceptionSize();
    const int planes = scenario->getPerceptionPlanes();
    const int numActions = scenario->getNumActions();
    net->addLayer( InputLayerMaker::instance()->numPlanes(planes)->imageSize(size) );
    net->addLayer( ConvolutionalMaker::instance()->filterSize(5)->numFilters(8)->biased()->padZeros() );
    net->addLayer( ActivationMaker::instance()->relu() );
    net->addLayer( ConvolutionalMaker::instance()->filterSize(5)->numFilters(8)->biased()->padZeros() );
    net->addLayer( ActivationMaker::instance()->relu() );
    net->addLayer( FullyConnectedMaker::instance()->imageSize(1)->numPlanes(100)->biased() );
        net->addLayer( ActivationMaker::instance()->tanh() );
    net->addLayer( FullyConnectedMaker::instance()->imageSize(1)->numPlanes(numActions)->biased() );
    net->addLayer( SquareLossMaker::instance() );
    net->print();

    scenario->setNet( net ); // used by the printQRepresentation method

    QLearner qLearner( sgd, scenario, net );
    qLearner.run();
    
//    delete[] expectedOutputs;
//    delete[] lastPerception;
//    delete[] perception;
    delete sgd;
    delete net;
    delete scenario;
    delete cl;
    
    return 0;
}
//layer2 plane0=0 "planes not both -1 and planes not both 1"
//      weights = plane0*(-1) + plane1*(-1)
//      plane1=1 "planes both -1 or planes both 1"
//      weights = plane0*(1) + plane1*(1)
TEST( testlogicaloperators, Convolve_2layers_relu_Xor ) {
    cout << "Xor, convolve" << endl;
//    LogicalDataCreator ldc(new TanhActivation());
//    ldc.applyXorGate();

//    int imageSize = 1;
//    int inPlanes = 2;
    int numExamples = 4;
//    int filterSize = 1;
    float data[] = { -1, -1,
                     -1, 1,
                     1, -1,
                     1, 1 };
    float layer1weights[] = {  // going to preset these, to near an optimal solution,
                              //  and at least show the network is stable, and gives the correct
         -0.4f,-0.55f,                      // result...
         0.52f, 0.53f,
    };
    float layer1bias[] = {
       0.1f,
       -0.1f
    };
    float layer2weights[] = {
        1.1f, 0.9f,
        -0.8f, -1.2f
    };
    float layer2bias[] = {
       0.1f,
       1.1
    };
    float expectedOutput[] = {
        1, 0,
        0, 1,
        0, 1,
        1, 0
    };
    int labels[] = {
        0,
        1,
        1,
        0
    };

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = NeuralNet::maker(cl)->planes(2)->imageSize(1)->instance();
    net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(1) );
    net->addLayer( ActivationMaker::instance()->relu() );
    net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(1) );
    net->addLayer( ActivationMaker::instance()->relu() );
    net->addLayer( SquareLossMaker::instance() );;
    cout << "hand-setting weights..." << endl;
    net->initWeights( 1, layer1weights, layer1bias );
    net->initWeights( 3, layer2weights, layer2bias );
//    net->printWeights();
//    net->setBatchSize(4);
//    net->forward( data );
//    net->print();
    SGD *sgd = SGD::instance( cl, 0.1f, 0 );
    for( int epoch = 0; epoch < 200; epoch++ ) {
        net->epochMaker(sgd)->batchSize(numExamples)->numExamples(numExamples)->inputData(data)
           ->expectedOutputs(expectedOutput)->run( epoch );
        if( epoch % 5 == 0 ) cout << "Loss L " << net->calcLoss(expectedOutput) << endl;
    }
    net->print();
    AccuracyHelper::printAccuracy( numExamples, 2, labels, net->getOutput() );

    float loss = net->calcLoss(expectedOutput);
    cout << "loss, E, " << loss << endl;
    EXPECT_GE( 0.0000001f, loss );

    delete sgd;
    delete net;
    delete cl;
}
void go(Config config) {
    Timer timer;

    int Ntrain;
    int Ntest;
    int numPlanes;
    int imageSize;

    float *trainData = 0;
    float *testData = 0;
    int *trainLabels = 0;
    int *testLabels = 0;

    int trainAllocateN = 0;
    int testAllocateN = 0;

//    int totalLinearSize;
    GenericLoader::getDimensions((config.dataDir + "/" + config.trainFile).c_str(), &Ntrain, &numPlanes, &imageSize);
    Ntrain = config.numTrain == -1 ? Ntrain : config.numTrain;
//    long allocateSize = (long)Ntrain * numPlanes * imageSize * imageSize;
    cout << "Ntrain " << Ntrain << " numPlanes " << numPlanes << " imageSize " << imageSize << endl;
    trainAllocateN = Ntrain;
    trainData = new float[ (long)trainAllocateN * numPlanes * imageSize * imageSize ];
    trainLabels = new int[trainAllocateN];
    if(Ntrain > 0) {
        GenericLoader::load((config.dataDir + "/" + config.trainFile).c_str(), trainData, trainLabels, 0, Ntrain);
    }

    GenericLoader::getDimensions((config.dataDir + "/" + config.validateFile).c_str(), &Ntest, &numPlanes, &imageSize);
    Ntest = config.numTest == -1 ? Ntest : config.numTest;
    testAllocateN = Ntest;
    testData = new float[ (long)testAllocateN * numPlanes * imageSize * imageSize ];
    testLabels = new int[testAllocateN]; 
    if(Ntest > 0) {
        GenericLoader::load((config.dataDir + "/" + config.validateFile).c_str(), testData, testLabels, 0, Ntest);
    }
    
    timer.timeCheck("after load images");

    const int inputCubeSize = numPlanes * imageSize * imageSize;
    float translate;
    float scale;
    int normalizationExamples = config.normalizationExamples > Ntrain ? Ntrain : config.normalizationExamples;
    if(config.normalization == "stddev") {
        float mean, stdDev;
        NormalizationHelper::getMeanAndStdDev(trainData, normalizationExamples * inputCubeSize, &mean, &stdDev);
        cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
        translate = - mean;
        scale = 1.0f / stdDev / config.normalizationNumStds;
    } else if(config.normalization == "maxmin") {
        float mean, stdDev;
        NormalizationHelper::getMinMax(trainData, normalizationExamples * inputCubeSize, &mean, &stdDev);
        translate = - mean;
        scale = 1.0f / stdDev;
    } else {
        cout << "Error: Unknown normalization: " << config.normalization << endl;
        return;
    }
    
    cout << " image norm translate " << translate << " scale " << scale << endl;
    timer.timeCheck("after getting stats");

//    const int numToTrain = Ntrain;
//    const int batchSize = config.batchSize;
    EasyCL *cl = new EasyCL();
    NeuralNet *net = new NeuralNet(cl);
//    net->inputMaker<unsigned char>()->numPlanes(numPlanes)->imageSize(imageSize)->insert();
    net->addLayer(InputLayerMaker::instance()->numPlanes(numPlanes)->imageSize(imageSize));
    net->addLayer(NormalizationLayerMaker::instance()->translate(translate)->scale(scale));
    if(!NetdefToNet::createNetFromNetdef(net, config.netDef)) {
        return;
    }
    net->print();
    for(int i = 1; i < net->getNumLayers() - 1; i++) {
        Layer *layer = net->getLayer(i);
        FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >(layer);
        ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >(layer);
        if(fc != 0) {
            conv = fc->convolutionalLayer;
        }
        if(conv == 0) {
            continue;
        }
        initrand.seed(0);
        int weightsSize = conv->getWeightsSize();
    //int weightsSize = layer->getPersistSize();
        if(weightsSize > 0) {
            cout << "weightsSize " << weightsSize << endl;
            float *weights = new float[weightsSize];
            for(int j = 0; j < weightsSize; j++) {
                int thisrand = (int)initrand();
                float thisweight = (thisrand % 100000) / 1000000.0f;
                weights[j] = thisweight;
            }        
            conv->initWeights(weights);
        }
        if(conv->dim.biased) {
            initrand.seed(0);
            int biasedSize = conv->getBiasSize();
            float *biasWeights = new float[biasedSize];
            for(int j = 0; j < biasedSize; j++) {
                int thisrand = (int)initrand();
                float thisweight = (thisrand % 100000) / 1000000.0f;
                biasWeights[j] = thisweight;
                //biasWeights[j] = 0;
            }        
            conv->initBias(biasWeights);
        }
    }

    cout << "weight samples before learning:" << endl;
    sampleWeights(net);

    bool afterRestart = false;
    int restartEpoch = 0;
//    int restartBatch = 0;
//    float restartAnnealedLearningRate = 0;
//    int restartNumRight = 0;
//    float restartLoss = 0;

    timer.timeCheck("before learning start");
    if(config.dumpTimings) {
        StatefulTimer::dump(true);
    }
    StatefulTimer::timeCheck("START");

    SGD *sgd = SGD::instance(cl, config.learningRate, 0.0f);
    Trainable *trainable = net;
    NetLearner netLearner(
        sgd, trainable,
        Ntrain, trainData, trainLabels,
        Ntest, testData, testLabels,
        config.batchSize);
    netLearner.setSchedule(config.numEpochs, afterRestart ? restartEpoch : 1);
//    netLearner.setBatchSize(config.batchSize);
    netLearner.setDumpTimings(config.dumpTimings);
//    netLearner.learn(config.learningRate, 1.0f);

    cout << "forward output" << endl;
    for(int layerId = 0; layerId < net->getNumLayers(); layerId++) {
        Layer *layer = net->getLayer(layerId);
        FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >(layer);
        ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >(layer);
        PoolingLayer *pool = dynamic_cast< PoolingLayer * >(layer);
        SoftMaxLayer *softMax = dynamic_cast< SoftMaxLayer * >(layer);
        if(fc != 0) {
            conv = fc->convolutionalLayer;
        }
        int planes = 0;
        int imageSize = 0;
        if(conv != 0) {
            cout << "convolutional (or conv based, ie fc)" << endl;
            planes = conv->dim.numFilters;
            imageSize = conv->dim.outputSize;
          //  continue;
        } else if(pool != 0) {
            cout << "pooling" << endl;
            planes = pool->numPlanes;
            imageSize = pool->outputSize;
        } else if(softMax != 0) {
            cout << "softmax" << endl;
            planes = softMax->numPlanes;
            imageSize = softMax->imageSize;
        } else {
            continue;
        }
        cout << "layer " << layerId << endl;
//        conv->getOutput();
        float const*output = layer->getOutput();
//        for(int i = 0; i < 3; i++) {
//            cout << conv->getOutput()[i] << endl;
//        }
        initrand.seed(0);
//        LayerDimensions &dim = conv->dim;
        for(int i = 0; i < 10; i++) {
            int thisrand = abs((int)initrand());
            int seq = thisrand % (planes * imageSize * imageSize);
            int outPlane = seq / (imageSize * imageSize);
            int rowcol = seq % (imageSize * imageSize);
            int row = rowcol / imageSize;
            int col = rowcol % imageSize;
            cout << "out[" << outPlane << "," << row << "," << col << "]=" << output[ seq ] << endl;
        }
    }

    cout << "weight samples after learning:" << endl;
    sampleWeights(net);

    cout << "backprop output" << endl;
    for(int layerId = net->getNumLayers() - 1; layerId >= 0; layerId--) {
        Layer *layer = net->getLayer(layerId);
        FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >(layer);
        ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >(layer);
        if(fc != 0) {
            conv = fc->convolutionalLayer;
        }
        if(conv == 0) {
            continue;
        }

        cout << "layer " << layerId << endl;
        float const*weights = conv->getWeights();
        float const*biases = conv->getBias();
        int weightsSize = conv->getWeightsSize() / conv->dim.numFilters;
        for(int i = 0; i < weightsSize; i++) {
            cout << " weight " << i << " " << weights[i] << endl;
        }
        for(int i = 0; i < 3; i++) {
            cout << " bias " << i << " " << biases[i] << endl;
        }
    }
    cout << "done" << endl;

    delete sgd;
    delete net;
    delete cl;

    if(trainData != 0) {
        delete[] trainData;
    }
    if(testData != 0) {
        delete[] testData;
    }
    if(testLabels != 0) {
        delete[] testLabels;
    }
    if(trainLabels != 0) {
        delete[] trainLabels;
    }
}
Exemple #6
0
TEST(testbackward, squareloss) {
    // here's the plan:
    // generate some input, randomly
    // generate some expected output, randomly
    // forward propagate
    // calculate loss
    // calculate gradInput
    // change some of the inputs, forward prop, recalculate loss, check corresponds
    // to the gradient
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = new NeuralNet(cl, 3, 5);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(SquareLossMaker::instance());
    cout << net->asString() << endl;

    int batchSize = 32;
    net->setBatchSize(batchSize);

    int inputCubeSize = net->getInputCubeSize();
    int outputCubeSize = net->getOutputCubeSize();

    int inputTotalSize = inputCubeSize * batchSize;
    int outputTotalSize = outputCubeSize * batchSize;

    cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl;

    float *input = new float[inputTotalSize];
    float *expectedOutput = new float[outputTotalSize];

    WeightRandomizer::randomize(0, input, inputTotalSize, -2.0f, 2.0f);
    WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, -2.0f, 2.0f);
    
    // now, forward prop
//    net->input(input);
    net->forward(input);
    net->print();
//    net->printOutput();

    // calculate loss
    float lossBefore = net->calcLoss(expectedOutput);

    // calculate gradInput
    net->backward(expectedOutput);

    // modify input slightly
    mt19937 random;
    const int numSamples = 10;
    for(int i = 0; i < numSamples; i++) {
        int inputIndex;
        WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize);
//        cout << "i=" << i << " index " << inputIndex << endl;
        float oldValue = input[inputIndex];
        // grad for this index is....
        float grad = net->getLayer(2)->getGradInput()[inputIndex];
//        cout << "grad=" << grad << endl;
        // tweak slightly
        float newValue = oldValue * 1.01f;
        float inputDelta = newValue - oldValue;
        float predictedLossChange = inputDelta * grad;
        input[inputIndex] = newValue;
//        cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl;
        // forwardProp
        net->forward(input);
        input[inputIndex] = oldValue;
//        net->printOutput();
        float lossAfter = net->calcLoss(expectedOutput);
        float lossChange = lossAfter - lossBefore;
        cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl;
    }

    delete[] expectedOutput;
    delete[] input;

    delete net;
    delete cl;
}
Exemple #7
0
TEST(testbackward, softmaxloss) {
    // here's the plan:
    // generate some input, randomly
    // generate some expected output, randomly
    // forward propagate
    // calculate loss
    // calculate gradInput
    // change some of the inputs, forward prop, recalculate loss, check corresponds
    // to the gradient
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = new NeuralNet(cl, 5, 1);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(SoftMaxMaker::instance());
    cout << net->asString() << endl;

    const int batchSize = 2;
    net->setBatchSize(batchSize);
    const int outputPlanes = net->getOutputPlanes();

    int inputCubeSize = net->getInputCubeSize();
    int outputCubeSize = net->getOutputCubeSize();

    int inputTotalSize = inputCubeSize * batchSize;
    int outputTotalSize = outputCubeSize * batchSize;

    cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl;

    float *input = new float[inputTotalSize];
    float *expectedOutput = new float[outputTotalSize];

    WeightRandomizer::randomize(0, input, inputTotalSize, 0.0f, 1.0f);
    WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, 0.0f, 1.0f);

    // we should make the input and output a probability distribution I think
    // so: add up the input, and divide each by that.  do same for expectedoutput (?)
//    normalizeAsProbabilityDistribution(input, inputTotalSize);
    normalizeAsProbabilityDistribution(outputPlanes, expectedOutput, outputTotalSize);

    // set all to zero, and one to 1, ie like labelled data
//    for(int i = 0; i < outputTotalSize; i++) {
//        expectedOutput[i] = 0;
//    }
//    for(int n = 0; n < batchSize; n++) {
//        int chosenLabel = 0;
//        WeightRandomizer::randomizeInts(n, &chosenLabel, 1, 0, net->getOutputPlanes());
//        expectedOutput[ n * outputPlanes + chosenLabel ] = 1;
//    }
//    for(int i = 0; i < outputTotalSize; i++) {
//        cout << "expected[" << i << "]=" << expectedOutput[i] << endl;
//    }
//        
    // now, forward prop
//    net->input(input);
    net->forward(input);
    net->print();
//    net->printOutput();

    // calculate loss
    float lossBefore = net->calcLoss(expectedOutput);

    // calculate gradInput
    net->backward(expectedOutput);

    // modify input slightly
    mt19937 random;
    const int numSamples = 10;
    for(int i = 0; i < numSamples; i++) {
        int inputIndex;
        WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize);
//        cout << "i=" << i << " index " << inputIndex << endl;
        float oldValue = input[inputIndex];
        // grad for this index is....
        float grad = net->getLayer(2)->getGradInput()[inputIndex];
//        cout << "grad=" << grad << endl;
        // tweak slightly
        float newValue = oldValue * 1.001f;
        float inputDelta = newValue - oldValue;
        float predictedLossChange = inputDelta * grad;
        input[inputIndex] = newValue;
//        cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl;
        // forwardProp
        net->forward(input);
        input[inputIndex] = oldValue;
//        net->printOutput();
        float lossAfter = net->calcLoss(expectedOutput);
        float lossChange = lossAfter - lossBefore;
        cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl;
    }

    delete[] expectedOutput;
    delete[] input;

    delete net;
    delete cl;
}
Exemple #8
0
void go(Config config) {
    Timer timer;

    int Ntrain;
    int Ntest;
    int numPlanes;
    int imageSize;

    unsigned char *trainData = 0;
    unsigned char *testData = 0;
    int *trainLabels = 0;
    int *testLabels = 0;

    int trainAllocateN = 0;
    int testAllocateN = 0;

//    int totalLinearSize;
    GenericLoader::getDimensions( config.dataDir + "/" + config.trainFile, &Ntrain, &numPlanes, &imageSize );
    Ntrain = config.numTrain == -1 ? Ntrain : config.numTrain;
//    long allocateSize = (long)Ntrain * numPlanes * imageSize * imageSize;
    cout << "Ntrain " << Ntrain << " numPlanes " << numPlanes << " imageSize " << imageSize << endl;
    if( config.loadOnDemand ) {
        trainAllocateN = config.batchSize; // can improve this later
    } else {
        trainAllocateN = Ntrain;
    }
    trainData = new unsigned char[ (long)trainAllocateN * numPlanes * imageSize * imageSize ];
    trainLabels = new int[trainAllocateN];
    if( !config.loadOnDemand && Ntrain > 0 ) {
        GenericLoader::load( config.dataDir + "/" + config.trainFile, trainData, trainLabels, 0, Ntrain );
    }

    GenericLoader::getDimensions( config.dataDir + "/" + config.validateFile, &Ntest, &numPlanes, &imageSize );
    Ntest = config.numTest == -1 ? Ntest : config.numTest;
    if( config.loadOnDemand ) {
        testAllocateN = config.batchSize; // can improve this later
    } else {
        testAllocateN = Ntest;
    }
    testData = new unsigned char[ (long)testAllocateN * numPlanes * imageSize * imageSize ];
    testLabels = new int[testAllocateN]; 
    if( !config.loadOnDemand && Ntest > 0 ) {
        GenericLoader::load( config.dataDir + "/" + config.validateFile, testData, testLabels, 0, Ntest );
    }
    cout << "Ntest " << Ntest << " Ntest" << endl;
    
    timer.timeCheck("after load images");

    const int inputCubeSize = numPlanes * imageSize * imageSize;
    float translate;
    float scale;
    int normalizationExamples = config.normalizationExamples > Ntrain ? Ntrain : config.normalizationExamples;
    if( !config.loadOnDemand ) {
        if( config.normalization == "stddev" ) {
            float mean, stdDev;
            NormalizationHelper::getMeanAndStdDev( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
            cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
            translate = - mean;
            scale = 1.0f / stdDev / config.normalizationNumStds;
        } else if( config.normalization == "maxmin" ) {
            float mean, stdDev;
            NormalizationHelper::getMinMax( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
            translate = - mean;
            scale = 1.0f / stdDev;
        } else {
            cout << "Error: Unknown normalization: " << config.normalization << endl;
            return;
        }
    } else {
        if( config.normalization == "stddev" ) {
            float mean, stdDev;
            NormalizeGetStdDev<unsigned char> normalizeGetStdDev( trainData, trainLabels ); 
            BatchProcess::run<unsigned char>( config.dataDir + "/" + config.trainFile, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetStdDev );
            normalizeGetStdDev.calcMeanStdDev( &mean, &stdDev );
            cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
            translate = - mean;
            scale = 1.0f / stdDev / config.normalizationNumStds;
        } else if( config.normalization == "maxmin" ) {
            NormalizeGetMinMax<unsigned char> normalizeGetMinMax( trainData, trainLabels );
            BatchProcess::run( config.dataDir + "/" + config.trainFile, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetMinMax );
            normalizeGetMinMax.calcMinMaxTransform( &translate, &scale );
        } else {
            cout << "Error: Unknown normalization: " << config.normalization << endl;
            return;
        }
    }
    cout << " image norm translate " << translate << " scale " << scale << endl;
    timer.timeCheck("after getting stats");

//    const int numToTrain = Ntrain;
//    const int batchSize = config.batchSize;
    NeuralNet *net = new NeuralNet();
//    net->inputMaker<unsigned char>()->numPlanes(numPlanes)->imageSize(imageSize)->insert();
    net->addLayer( InputLayerMaker<unsigned char>::instance()->numPlanes(numPlanes)->imageSize(imageSize) );
    net->addLayer( NormalizationLayerMaker::instance()->translate(translate)->scale(scale) );
    if( !NetdefToNet::createNetFromNetdef( net, config.netDef ) ) {
        return;
    }
    net->print();

    bool afterRestart = false;
    int restartEpoch = 0;
    int restartBatch = 0;
    float restartAnnealedLearningRate = 0;
    int restartNumRight = 0;
    float restartLoss = 0;
    if( config.loadWeights && config.weightsFile != "" ) {
        afterRestart = WeightsPersister::loadWeights( config.weightsFile, config.getTrainingString(), net, &restartEpoch, &restartBatch, &restartAnnealedLearningRate, &restartNumRight, &restartLoss );
        if( !afterRestart && FileHelper::exists( config.weightsFile ) ) {
            cout << "Weights file " << config.weightsFile << " exists, but doesnt match training options provided => aborting" << endl;
            cout << "Please either check the training options, or choose a weights file that doesnt exist yet" << endl;
            return;
        }
    }

    timer.timeCheck("before learning start");
    if( config.dumpTimings ) {
        StatefulTimer::dump( true );
    }
    StatefulTimer::timeCheck("START");

    Trainable *trainable = net;
    MultiNet *multiNet = 0;
    if( config.multiNet > 1 ) {
        multiNet = new MultiNet( config.multiNet, net );
        trainable = multiNet;
    }
    if( config.loadOnDemand ) {
        NetLearnerOnDemand<unsigned char> netLearner( trainable );
        netLearner.setTrainingData( config.dataDir + "/" + config.trainFile, Ntrain );
        netLearner.setTestingData( config.dataDir + "/" + config.validateFile, Ntest );
        netLearner.setSchedule( config.numEpochs, afterRestart ? restartEpoch : 1 );
        netLearner.setBatchSize( config.fileReadBatches, config.batchSize );
        netLearner.setDumpTimings( config.dumpTimings );
        WeightsWriter weightsWriter( net, &config );
        if( config.weightsFile != "" ) {
            netLearner.addPostEpochAction( &weightsWriter );
        }
        netLearner.learn( config.learningRate, config.annealLearningRate );
    } else {
        NetLearner<unsigned char> netLearner( trainable );
        netLearner.setTrainingData( Ntrain, trainData, trainLabels );
        netLearner.setTestingData( Ntest, testData, testLabels );
        netLearner.setSchedule( config.numEpochs, afterRestart ? restartEpoch : 1 );
        netLearner.setBatchSize( config.batchSize );
        netLearner.setDumpTimings( config.dumpTimings );
        WeightsWriter weightsWriter( net, &config );
        if( config.weightsFile != "" ) {
            netLearner.addPostEpochAction( &weightsWriter );
        }
        netLearner.learn( config.learningRate, config.annealLearningRate );
    }

    if( multiNet != 0 ) {
        delete multiNet;
    }
    delete net;

    if( trainData != 0 ) {
        delete[] trainData;
    }
    if( testData != 0 ) {
        delete[] testData;
    }
    if( testLabels != 0 ) {
        delete[] testLabels;
    }
    if( trainLabels != 0 ) {
        delete[] trainLabels;
    }
}
Exemple #9
0
void go(Config config) {
    Timer timer;

    int Ntrain;
    int Ntest;
    int numPlanes;
    int imageSize;

    float *trainData = 0;
    float *testData = 0;
    int *trainLabels = 0;
    int *testLabels = 0;

    int trainAllocateN = 0;
    int testAllocateN = 0;

//    int totalLinearSize;
    GenericLoaderv2 trainLoader( config.dataDir + "/" + config.trainFile );
    Ntrain = trainLoader.getN();
    numPlanes = trainLoader.getPlanes();
    imageSize = trainLoader.getImageSize();
    // GenericLoader::getDimensions( , &Ntrain, &numPlanes, &imageSize );
    Ntrain = config.numTrain == -1 ? Ntrain : config.numTrain;
//    long allocateSize = (long)Ntrain * numPlanes * imageSize * imageSize;
    cout << "Ntrain " << Ntrain << " numPlanes " << numPlanes << " imageSize " << imageSize << endl;
    if( config.loadOnDemand ) {
        trainAllocateN = config.batchSize; // can improve this later
    } else {
        trainAllocateN = Ntrain;
    }
    trainData = new float[ (long)trainAllocateN * numPlanes * imageSize * imageSize ];
    trainLabels = new int[trainAllocateN];
    if( !config.loadOnDemand && Ntrain > 0 ) {
        trainLoader.load( trainData, trainLabels, 0, Ntrain );
    }

    GenericLoaderv2 testLoader( config.dataDir + "/" + config.validateFile );
    Ntest = testLoader.getN();
    numPlanes = testLoader.getPlanes();
    imageSize = testLoader.getImageSize();
    Ntest = config.numTest == -1 ? Ntest : config.numTest;
    if( config.loadOnDemand ) {
        testAllocateN = config.batchSize; // can improve this later
    } else {
        testAllocateN = Ntest;
    }
    testData = new float[ (long)testAllocateN * numPlanes * imageSize * imageSize ];
    testLabels = new int[testAllocateN]; 
    if( !config.loadOnDemand && Ntest > 0 ) {
        testLoader.load( testData, testLabels, 0, Ntest );
    }
    cout << "Ntest " << Ntest << " Ntest" << endl;
    
    timer.timeCheck("after load images");

    const int inputCubeSize = numPlanes * imageSize * imageSize;
    float translate;
    float scale;
    int normalizationExamples = config.normalizationExamples > Ntrain ? Ntrain : config.normalizationExamples;
    if( !config.loadOnDemand ) {
        if( config.normalization == "stddev" ) {
            float mean, stdDev;
            NormalizationHelper::getMeanAndStdDev( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
            cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
            translate = - mean;
            scale = 1.0f / stdDev / config.normalizationNumStds;
        } else if( config.normalization == "maxmin" ) {
            float mean, stdDev;
            NormalizationHelper::getMinMax( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
            translate = - mean;
            scale = 1.0f / stdDev;
        } else {
            cout << "Error: Unknown normalization: " << config.normalization << endl;
            return;
        }
    } else {
        if( config.normalization == "stddev" ) {
            float mean, stdDev;
            NormalizeGetStdDev normalizeGetStdDev( trainData, trainLabels ); 
            BatchProcessv2::run( &trainLoader, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetStdDev );
            normalizeGetStdDev.calcMeanStdDev( &mean, &stdDev );
            cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
            translate = - mean;
            scale = 1.0f / stdDev / config.normalizationNumStds;
        } else if( config.normalization == "maxmin" ) {
            NormalizeGetMinMax normalizeGetMinMax( trainData, trainLabels );
            BatchProcessv2::run( &trainLoader, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetMinMax );
            normalizeGetMinMax.calcMinMaxTransform( &translate, &scale );
        } else {
            cout << "Error: Unknown normalization: " << config.normalization << endl;
            return;
        }
    }
    cout << " image norm translate " << translate << " scale " << scale << endl;
    timer.timeCheck("after getting stats");

//    const int numToTrain = Ntrain;
//    const int batchSize = config.batchSize;

    EasyCL *cl = 0;
    if( config.gpuIndex >= 0 ) {
        cl = EasyCL::createForIndexedGpu( config.gpuIndex );
    } else {
        cl = EasyCL::createForFirstGpuOtherwiseCpu();
    }

    NeuralNet *net;
    net = new NeuralNet(cl);

    WeightsInitializer *weightsInitializer = 0;
    if( toLower( config.weightsInitializer ) == "original" ) {
        weightsInitializer = new OriginalInitializer();
    } else if( toLower( config.weightsInitializer ) == "uniform" ) {
        weightsInitializer = new UniformInitializer( config.initialWeights );
    } else {
        cout << "Unknown weights initializer " << config.weightsInitializer << endl;
        return;
    }

//    net->inputMaker<unsigned char>()->numPlanes(numPlanes)->imageSize(imageSize)->insert();
    net->addLayer( InputLayerMaker::instance()->numPlanes(numPlanes)->imageSize(imageSize) );
    net->addLayer( NormalizationLayerMaker::instance()->translate(translate)->scale(scale) );
    if( !NetdefToNet::createNetFromNetdef( net, config.netDef, weightsInitializer ) ) {
        return;
    }
    // apply the trainer
    Trainer *trainer = 0;
    if( toLower( config.trainer ) == "sgd" ) {
        SGD *sgd = new SGD( cl );
        sgd->setLearningRate( config.learningRate );
        sgd->setMomentum( config.momentum );
        sgd->setWeightDecay( config.weightDecay );
        trainer = sgd;
    } else if( toLower( config.trainer ) == "anneal" ) {
        Annealer *annealer = new Annealer( cl );
        annealer->setLearningRate( config.learningRate );
        annealer->setAnneal( config.anneal );
        trainer = annealer;
    } else if( toLower( config.trainer ) == "nesterov" ) {
        Nesterov *nesterov = new Nesterov( cl );
        nesterov->setLearningRate( config.learningRate );
        nesterov->setMomentum( config.momentum );
        trainer = nesterov;
    } else if( toLower( config.trainer ) == "adagrad" ) {
        Adagrad *adagrad = new Adagrad( cl );
        adagrad->setLearningRate( config.learningRate );
        trainer = adagrad;
    } else if( toLower( config.trainer ) == "rmsprop" ) {
        Rmsprop *rmsprop = new Rmsprop( cl );
        rmsprop->setLearningRate( config.learningRate );
        trainer = rmsprop;
    } else if( toLower( config.trainer ) == "adadelta" ) {
        Adadelta *adadelta = new Adadelta( cl, config.rho );
        trainer = adadelta;
    } else {
        cout << "trainer " << config.trainer << " unknown." << endl;
        return;
    }
    cout << "Using trainer " << trainer->asString() << endl;
//    trainer->bindTo( net );
//    net->setTrainer( trainer );
    net->setBatchSize( config.batchSize );
    net->print();

    bool afterRestart = false;
    int restartEpoch = 0;
    int restartBatch = 0;
    float restartAnnealedLearningRate = 0;
    int restartNumRight = 0;
    float restartLoss = 0;
    if( config.loadWeights && config.weightsFile != "" ) {
        cout << "loadingweights" << endl;
        afterRestart = WeightsPersister::loadWeights( config.weightsFile, config.getTrainingString(), net, &restartEpoch, &restartBatch, &restartAnnealedLearningRate, &restartNumRight, &restartLoss );
        if( !afterRestart && FileHelper::exists( config.weightsFile ) ) {
            // try old trainingstring
            afterRestart = WeightsPersister::loadWeights( config.weightsFile, config.getOldTrainingString(), net, &restartEpoch, &restartBatch, &restartAnnealedLearningRate, &restartNumRight, &restartLoss );
        }
        if( !afterRestart && FileHelper::exists( config.weightsFile ) ) {
            cout << "Weights file " << config.weightsFile << " exists, but doesnt match training options provided." << endl;
            cout << "Continue loading anyway (might crash, or weights might be completely inappropriate)? (y/n)" << endl;
            string response;
            cin >> response;
            if( response != "y" ) {
                cout << "Please either check the training options, or choose a weights file that doesnt exist yet" << endl;
                return;
            }
        }