Exemple #1
0
//layer2 plane0=0 "planes not both -1 and planes not both 1"
//      weights = plane0*(-1) + plane1*(-1)
//      plane1=1 "planes both -1 or planes both 1"
//      weights = plane0*(1) + plane1*(1)
TEST( testlogicaloperators, Convolve_2layers_relu_Xor ) {
    cout << "Xor, convolve" << endl;
//    LogicalDataCreator ldc(new TanhActivation());
//    ldc.applyXorGate();

//    int imageSize = 1;
//    int inPlanes = 2;
    int numExamples = 4;
//    int filterSize = 1;
    float data[] = { -1, -1,
                     -1, 1,
                     1, -1,
                     1, 1 };
    float layer1weights[] = {  // going to preset these, to near an optimal solution,
                              //  and at least show the network is stable, and gives the correct
         -0.4f,-0.55f,                      // result...
         0.52f, 0.53f,
    };
    float layer1bias[] = {
       0.1f,
       -0.1f
    };
    float layer2weights[] = {
        1.1f, 0.9f,
        -0.8f, -1.2f
    };
    float layer2bias[] = {
       0.1f,
       1.1
    };
    float expectedOutput[] = {
        1, 0,
        0, 1,
        0, 1,
        1, 0
    };
    int labels[] = {
        0,
        1,
        1,
        0
    };

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    ClBlasInstance blasInstance;
    NeuralNet *net = NeuralNet::maker(cl)->planes(2)->imageSize(1)->instance();
    net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(1) );
    net->addLayer( ActivationMaker::instance()->relu() );
    net->addLayer( ConvolutionalMaker::instance()->numFilters(2)->filterSize(1)->biased(1) );
    net->addLayer( ActivationMaker::instance()->relu() );
    net->addLayer( SquareLossMaker::instance() );;
    cout << "hand-setting weights..." << endl;
    net->initWeights( 1, layer1weights, layer1bias );
    net->initWeights( 3, layer2weights, layer2bias );
//    net->printWeights();
//    net->setBatchSize(4);
//    net->forward( data );
//    net->print();
    SGD *sgd = SGD::instance( cl, 0.1f, 0 );
    for( int epoch = 0; epoch < 200; epoch++ ) {
        net->epochMaker(sgd)->batchSize(numExamples)->numExamples(numExamples)->inputData(data)
           ->expectedOutputs(expectedOutput)->run( epoch );
        if( epoch % 5 == 0 ) cout << "Loss L " << net->calcLoss(expectedOutput) << endl;
    }
    net->print();
    AccuracyHelper::printAccuracy( numExamples, 2, labels, net->getOutput() );

    float loss = net->calcLoss(expectedOutput);
    cout << "loss, E, " << loss << endl;
    EXPECT_GE( 0.0000001f, loss );

    delete sgd;
    delete net;
    delete cl;
}
Exemple #2
0
TEST(testbackward, squareloss) {
    // here's the plan:
    // generate some input, randomly
    // generate some expected output, randomly
    // forward propagate
    // calculate loss
    // calculate gradInput
    // change some of the inputs, forward prop, recalculate loss, check corresponds
    // to the gradient
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = new NeuralNet(cl, 3, 5);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(SquareLossMaker::instance());
    cout << net->asString() << endl;

    int batchSize = 32;
    net->setBatchSize(batchSize);

    int inputCubeSize = net->getInputCubeSize();
    int outputCubeSize = net->getOutputCubeSize();

    int inputTotalSize = inputCubeSize * batchSize;
    int outputTotalSize = outputCubeSize * batchSize;

    cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl;

    float *input = new float[inputTotalSize];
    float *expectedOutput = new float[outputTotalSize];

    WeightRandomizer::randomize(0, input, inputTotalSize, -2.0f, 2.0f);
    WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, -2.0f, 2.0f);
    
    // now, forward prop
//    net->input(input);
    net->forward(input);
    net->print();
//    net->printOutput();

    // calculate loss
    float lossBefore = net->calcLoss(expectedOutput);

    // calculate gradInput
    net->backward(expectedOutput);

    // modify input slightly
    mt19937 random;
    const int numSamples = 10;
    for(int i = 0; i < numSamples; i++) {
        int inputIndex;
        WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize);
//        cout << "i=" << i << " index " << inputIndex << endl;
        float oldValue = input[inputIndex];
        // grad for this index is....
        float grad = net->getLayer(2)->getGradInput()[inputIndex];
//        cout << "grad=" << grad << endl;
        // tweak slightly
        float newValue = oldValue * 1.01f;
        float inputDelta = newValue - oldValue;
        float predictedLossChange = inputDelta * grad;
        input[inputIndex] = newValue;
//        cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl;
        // forwardProp
        net->forward(input);
        input[inputIndex] = oldValue;
//        net->printOutput();
        float lossAfter = net->calcLoss(expectedOutput);
        float lossChange = lossAfter - lossBefore;
        cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl;
    }

    delete[] expectedOutput;
    delete[] input;

    delete net;
    delete cl;
}
Exemple #3
0
void testNumerically(float learningRate, int batchSize, int imageSize, int filterSize, int numPlanes, ActivationFunction *fn, bool padZeros, int its = 20) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    ClBlasInstance clblasInstance;
    NeuralNet *net = NeuralNet::maker(cl)->planes(numPlanes)->imageSize(imageSize)->instance();
    net->addLayer(ConvolutionalMaker::instance()->numFilters(1)->filterSize(filterSize)->biased(0)->padZeros(padZeros));
    net->addLayer(ActivationMaker::instance()->fn(fn));
    net->addLayer(ConvolutionalMaker::instance()->numFilters(1)->filterSize(filterSize)->biased(0)->padZeros(padZeros));
    net->addLayer(ActivationMaker::instance()->fn(fn));
    net->addLayer(SquareLossMaker::instance());
    net->setBatchSize(batchSize);

    int inputNumElements = net->getLayer(0)->getOutputNumElements();
    int outputNumElements = net->getLastLayer()->getOutputNumElements();
    int weightsSize1 = net->getLayer(1)->getWeightsSize();
    int weightsSize2 = net->getLayer(3)->getWeightsSize();

    float *inputData = new float[std::max<int>(10000, inputNumElements)];
    float *expectedOutput = new float[std::max<int>(10000, outputNumElements)];
    memset(inputData, 0, sizeof(float) * std::max<int>(10000, inputNumElements));
    memset(expectedOutput, 0, sizeof(float) * std::max<int>(10000, outputNumElements));
//    int seed = 0;
    std::mt19937 random = WeightRandomizer::randomize(inputData, std::max<int>(10000, inputNumElements), -2.0f, 2.0f);
    WeightRandomizer::randomize(random, expectedOutput, std::max<int>(10000, outputNumElements), -2.0f, 2.0f);
    WeightRandomizer::randomize(random, dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weights, weightsSize1, -2.0f, 2.0f);
    dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weightsWrapper->copyToDevice();
    WeightRandomizer::randomize(random, dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weights, weightsSize2, -2.0f, 2.0f);
    dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weightsWrapper->copyToDevice();

    SGD *sgd = SGD::instance(cl, learningRate, 0.0f);
    for(int it = 0; it < its; it++) {
        float *weightsBefore1 = new float[weightsSize1];
        float *currentWeights = net->getLayer(1)->getWeights();
        for(int i = 0; i < weightsSize1; i++) {
            weightsBefore1[i] = currentWeights[i];
        }
        float *weightsBefore2 = new float[weightsSize2];
        currentWeights = net->getLayer(3)->getWeights();
        for(int i = 0; i < weightsSize2; i++) {
            weightsBefore2[i] = currentWeights[i];
        }

        net->forward(inputData);
    //    net->print();
        float loss = net->calcLoss(expectedOutput);
        dynamic_cast<LossLayer*>(net->getLayer(5))->calcLoss(expectedOutput);
//        net->backward(expectedOutput);
        TrainingContext context(0, 0);
        sgd->train(net, &context, inputData, expectedOutput);
        dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weightsWrapper->copyToHost();
        // restore 2nd layer weights :-)
        for(int i = 0; i < weightsSize2; i++) {
//            dynamic_cast<ConvolutionalLayer*>(net->getLayer(2))->weights[i] = weightsBefore2[i];
        }
        dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weightsWrapper->copyToDevice();
        net->forward(inputData);

        float loss2 = net->calcLoss(expectedOutput);
        float lossChange = loss - loss2;
        cout << " loss " << loss << " loss2 " << loss2 << " change: " << lossChange << endl;

        float *newWeights = net->getLayer(1)->getWeights();
        float sumWeightDiff = 0;
        float sumWeightDiffSquared = 0;
        for(int i = 0; i < weightsSize1; i++) {
            float diff = newWeights[i] - weightsBefore1[i];
            sumWeightDiff += diff;
            sumWeightDiffSquared += diff * diff;
        }
        newWeights = net->getLayer(3)->getWeights();
        for(int i = 0; i < weightsSize2; i++) {
            float diff = newWeights[i] - weightsBefore2[i];
            sumWeightDiff += diff;
            sumWeightDiffSquared += diff * diff;
        }
        cout << "sumweightsdiff " << sumWeightDiff << endl;
    //    cout << "sumweightsdiff / learningrate " << (sumWeightDiff / learningRate) << endl;
    //    cout << "sum weightsdiffsquared " << (sumWeightDiffSquared/ learningRate / learningRate * imageSize) << endl;

        float estimatedLossChangeFromW = sumWeightDiffSquared/ learningRate; // / filterSize;

        cout << " loss change              " << lossChange << endl;
        cout << " estimatedLossChangeFromW " << estimatedLossChangeFromW << endl;
    //    cout << abs(estimatedLossChangeFromW - lossChange) / lossChange << endl;    
    //    cout << abs(estimatedLossChangeFromW - lossChange) / estimatedLossChangeFromW << endl;    
        EXPECT_GT(0.01f * imageSize * imageSize, abs(estimatedLossChangeFromW - lossChange) / lossChange); 
        EXPECT_GT(0.01f * imageSize * imageSize, abs(estimatedLossChangeFromW - lossChange) / estimatedLossChangeFromW); 
        delete[] weightsBefore1;
        delete[] weightsBefore2;
    }
//    delete[] weights1;
//    delete[] errors;
//    delete[] output;
    delete sgd;
    delete[] inputData;
    delete[] expectedOutput;
    delete net;
    delete cl;
}
Exemple #4
0
TEST(testbackward, softmaxloss) {
    // here's the plan:
    // generate some input, randomly
    // generate some expected output, randomly
    // forward propagate
    // calculate loss
    // calculate gradInput
    // change some of the inputs, forward prop, recalculate loss, check corresponds
    // to the gradient
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = new NeuralNet(cl, 5, 1);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(SoftMaxMaker::instance());
    cout << net->asString() << endl;

    const int batchSize = 2;
    net->setBatchSize(batchSize);
    const int outputPlanes = net->getOutputPlanes();

    int inputCubeSize = net->getInputCubeSize();
    int outputCubeSize = net->getOutputCubeSize();

    int inputTotalSize = inputCubeSize * batchSize;
    int outputTotalSize = outputCubeSize * batchSize;

    cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl;

    float *input = new float[inputTotalSize];
    float *expectedOutput = new float[outputTotalSize];

    WeightRandomizer::randomize(0, input, inputTotalSize, 0.0f, 1.0f);
    WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, 0.0f, 1.0f);

    // we should make the input and output a probability distribution I think
    // so: add up the input, and divide each by that.  do same for expectedoutput (?)
//    normalizeAsProbabilityDistribution(input, inputTotalSize);
    normalizeAsProbabilityDistribution(outputPlanes, expectedOutput, outputTotalSize);

    // set all to zero, and one to 1, ie like labelled data
//    for(int i = 0; i < outputTotalSize; i++) {
//        expectedOutput[i] = 0;
//    }
//    for(int n = 0; n < batchSize; n++) {
//        int chosenLabel = 0;
//        WeightRandomizer::randomizeInts(n, &chosenLabel, 1, 0, net->getOutputPlanes());
//        expectedOutput[ n * outputPlanes + chosenLabel ] = 1;
//    }
//    for(int i = 0; i < outputTotalSize; i++) {
//        cout << "expected[" << i << "]=" << expectedOutput[i] << endl;
//    }
//        
    // now, forward prop
//    net->input(input);
    net->forward(input);
    net->print();
//    net->printOutput();

    // calculate loss
    float lossBefore = net->calcLoss(expectedOutput);

    // calculate gradInput
    net->backward(expectedOutput);

    // modify input slightly
    mt19937 random;
    const int numSamples = 10;
    for(int i = 0; i < numSamples; i++) {
        int inputIndex;
        WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize);
//        cout << "i=" << i << " index " << inputIndex << endl;
        float oldValue = input[inputIndex];
        // grad for this index is....
        float grad = net->getLayer(2)->getGradInput()[inputIndex];
//        cout << "grad=" << grad << endl;
        // tweak slightly
        float newValue = oldValue * 1.001f;
        float inputDelta = newValue - oldValue;
        float predictedLossChange = inputDelta * grad;
        input[inputIndex] = newValue;
//        cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl;
        // forwardProp
        net->forward(input);
        input[inputIndex] = oldValue;
//        net->printOutput();
        float lossAfter = net->calcLoss(expectedOutput);
        float lossChange = lossAfter - lossBefore;
        cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl;
    }

    delete[] expectedOutput;
    delete[] input;

    delete net;
    delete cl;
}
int main()
{

  // init variables
  double error = 0.;
  int truecnt = 0;
  int times,timed;
  
  // print useful info for reference
  std::cout << "\n" << "hidden neurons: " << "\t \t" << HIDDEN << std::endl;
  // init random number generator
  srand((int)time(NULL));  

  // create network
  std::cout << "initializing network..." << "\t \t";
  NeuralNet DigitNet;

  NeuralLayer * pHiddenLayer1 = new NeuralTanhLayer(INPUT,HIDDEN);
  DigitNet.addLayer( pHiddenLayer1 );
  NeuralLayer * pOutputLayer = new NeuralSoftmaxLayer(HIDDEN,OUTPUT);
  DigitNet.addLayer( pOutputLayer );

  // set output type:
  // SCALAR = tanh or sigmoid output layer (use one output neuron)
  // PROB = softmax output layer, 1-of-N output encoding (use two output neurons)
  const unsigned int outType = PROB;

  // set learning rate, momentum, decay rate
  const double learningRate = 0.15;
  const double momentum =     0.0;
  const double decayRate =    0.0;
  DigitNet.setParams(learningRate,momentum,decayRate,outType);

  std::cout << "done" << std::endl;
  
  // load training and test data
  std::cout << "loading data..." << "\t \t \t";
  std::vector< std::vector<double> > bigData( DATA_SIZE,std::vector<double>(INPUT+1,0.0) );
  loadFromFile(bigData,"train.txt");

  std::vector< std::vector<double> > trainData( TRAIN_SIZE,std::vector<double>(INPUT+1,0.0) );
  std::vector< std::vector<double> > testData( TEST_SIZE,std::vector<double>(INPUT+1,0.0) );
  
  buildData(bigData,trainData,TRAIN_SIZE,testData,TEST_SIZE);
  std::cout << "done" << std::endl;
  
  // loop over training data points and train net
  // slice off first column of each row (example)
  times=(int)time(NULL);   // init time counter
  std::cout << "\n" << "training examples: " << "\t \t" << TRAIN_SIZE << std::endl;
  std::cout << "learning rate: " << "\t \t \t" << learningRate << std::endl;
  std::cout << "momentum: " << "\t \t \t" << momentum << std::endl;
  std::cout << "weight decay: " << "\t \t \t" << decayRate << std::endl;
  std::cout << "training network..." << "\t \t";
  for(int i=0;i<TRAIN_SIZE;++i)
  {
    std::vector<double> data = trainData[i];            // extract data point
    double label = data[0];                             // extract point label
    data.erase(data.begin());
    std::vector<double> nLabel = encode((int)label);    // encode to 1-of-N   
    
    std::vector<double> outputs = DigitNet.runNet(data);
    error = DigitNet.trainNet(data,nLabel,outType);    // train net, return MSE

    // decode output and compare to correct output 
    if( decode(outputs) == (int)label )
        truecnt++;    
  }

  // stop timer and print out useful info
  timed=(int)time(NULL);
  times=timed-times;
  std::cout << "done" << std::endl;
  std::cout << "training time: " << "\t \t \t" << times << " seconds " << std::endl;
  std::cout << "training accuracy: " << "\t \t" << truecnt*100./TRAIN_SIZE << "%" << std::endl;
  
  // test net on test data
  times=(int)time(NULL);   // init time counter
  std::cout << "\n" << "test points: " << "\t \t \t" << TEST_SIZE << std::endl;
  std::cout << "testing network..." << "\t \t";
  truecnt = 0;
  for(int i=0;i<TEST_SIZE;++i)
  {
    
    std::vector<double> data = testData[i];     // extract data point 
    double label = data[0];                     // extract label
    data.erase(data.begin());
   
    std::vector<double> outputs = DigitNet.runNet(data);    // run net

    // decode output and compare to correct output 
    if( decode(outputs) == (int)label )
        truecnt++;    
    
  }

  // stop timer and print out useful info
  timed=(int)time(NULL);
  times=timed-times;
  std::cout << "done" << std::endl;
  std::cout << "testing time: " << "\t \t \t" << times << " seconds " << std::endl;
  std::cout << "test accuracy: " << "\t \t \t" << truecnt*100./TEST_SIZE << "% " << std::endl;
  
  // save weights to reuse net in the future
  DigitNet.saveNet();
  
}
Exemple #6
0
void go(Config config) {
    Timer timer;

    int Ntrain;
    int Ntest;
    int numPlanes;
    int imageSize;

    unsigned char *trainData = 0;
    unsigned char *testData = 0;
    int *trainLabels = 0;
    int *testLabels = 0;

    int trainAllocateN = 0;
    int testAllocateN = 0;

//    int totalLinearSize;
    GenericLoader::getDimensions( config.dataDir + "/" + config.trainFile, &Ntrain, &numPlanes, &imageSize );
    Ntrain = config.numTrain == -1 ? Ntrain : config.numTrain;
//    long allocateSize = (long)Ntrain * numPlanes * imageSize * imageSize;
    cout << "Ntrain " << Ntrain << " numPlanes " << numPlanes << " imageSize " << imageSize << endl;
    if( config.loadOnDemand ) {
        trainAllocateN = config.batchSize; // can improve this later
    } else {
        trainAllocateN = Ntrain;
    }
    trainData = new unsigned char[ (long)trainAllocateN * numPlanes * imageSize * imageSize ];
    trainLabels = new int[trainAllocateN];
    if( !config.loadOnDemand && Ntrain > 0 ) {
        GenericLoader::load( config.dataDir + "/" + config.trainFile, trainData, trainLabels, 0, Ntrain );
    }

    GenericLoader::getDimensions( config.dataDir + "/" + config.validateFile, &Ntest, &numPlanes, &imageSize );
    Ntest = config.numTest == -1 ? Ntest : config.numTest;
    if( config.loadOnDemand ) {
        testAllocateN = config.batchSize; // can improve this later
    } else {
        testAllocateN = Ntest;
    }
    testData = new unsigned char[ (long)testAllocateN * numPlanes * imageSize * imageSize ];
    testLabels = new int[testAllocateN]; 
    if( !config.loadOnDemand && Ntest > 0 ) {
        GenericLoader::load( config.dataDir + "/" + config.validateFile, testData, testLabels, 0, Ntest );
    }
    cout << "Ntest " << Ntest << " Ntest" << endl;
    
    timer.timeCheck("after load images");

    const int inputCubeSize = numPlanes * imageSize * imageSize;
    float translate;
    float scale;
    int normalizationExamples = config.normalizationExamples > Ntrain ? Ntrain : config.normalizationExamples;
    if( !config.loadOnDemand ) {
        if( config.normalization == "stddev" ) {
            float mean, stdDev;
            NormalizationHelper::getMeanAndStdDev( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
            cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
            translate = - mean;
            scale = 1.0f / stdDev / config.normalizationNumStds;
        } else if( config.normalization == "maxmin" ) {
            float mean, stdDev;
            NormalizationHelper::getMinMax( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
            translate = - mean;
            scale = 1.0f / stdDev;
        } else {
            cout << "Error: Unknown normalization: " << config.normalization << endl;
            return;
        }
    } else {
        if( config.normalization == "stddev" ) {
            float mean, stdDev;
            NormalizeGetStdDev<unsigned char> normalizeGetStdDev( trainData, trainLabels ); 
            BatchProcess::run<unsigned char>( config.dataDir + "/" + config.trainFile, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetStdDev );
            normalizeGetStdDev.calcMeanStdDev( &mean, &stdDev );
            cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
            translate = - mean;
            scale = 1.0f / stdDev / config.normalizationNumStds;
        } else if( config.normalization == "maxmin" ) {
            NormalizeGetMinMax<unsigned char> normalizeGetMinMax( trainData, trainLabels );
            BatchProcess::run( config.dataDir + "/" + config.trainFile, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetMinMax );
            normalizeGetMinMax.calcMinMaxTransform( &translate, &scale );
        } else {
            cout << "Error: Unknown normalization: " << config.normalization << endl;
            return;
        }
    }
    cout << " image norm translate " << translate << " scale " << scale << endl;
    timer.timeCheck("after getting stats");

//    const int numToTrain = Ntrain;
//    const int batchSize = config.batchSize;
    NeuralNet *net = new NeuralNet();
//    net->inputMaker<unsigned char>()->numPlanes(numPlanes)->imageSize(imageSize)->insert();
    net->addLayer( InputLayerMaker<unsigned char>::instance()->numPlanes(numPlanes)->imageSize(imageSize) );
    net->addLayer( NormalizationLayerMaker::instance()->translate(translate)->scale(scale) );
    if( !NetdefToNet::createNetFromNetdef( net, config.netDef ) ) {
        return;
    }
    net->print();

    bool afterRestart = false;
    int restartEpoch = 0;
    int restartBatch = 0;
    float restartAnnealedLearningRate = 0;
    int restartNumRight = 0;
    float restartLoss = 0;
    if( config.loadWeights && config.weightsFile != "" ) {
        afterRestart = WeightsPersister::loadWeights( config.weightsFile, config.getTrainingString(), net, &restartEpoch, &restartBatch, &restartAnnealedLearningRate, &restartNumRight, &restartLoss );
        if( !afterRestart && FileHelper::exists( config.weightsFile ) ) {
            cout << "Weights file " << config.weightsFile << " exists, but doesnt match training options provided => aborting" << endl;
            cout << "Please either check the training options, or choose a weights file that doesnt exist yet" << endl;
            return;
        }
    }

    timer.timeCheck("before learning start");
    if( config.dumpTimings ) {
        StatefulTimer::dump( true );
    }
    StatefulTimer::timeCheck("START");

    Trainable *trainable = net;
    MultiNet *multiNet = 0;
    if( config.multiNet > 1 ) {
        multiNet = new MultiNet( config.multiNet, net );
        trainable = multiNet;
    }
    if( config.loadOnDemand ) {
        NetLearnerOnDemand<unsigned char> netLearner( trainable );
        netLearner.setTrainingData( config.dataDir + "/" + config.trainFile, Ntrain );
        netLearner.setTestingData( config.dataDir + "/" + config.validateFile, Ntest );
        netLearner.setSchedule( config.numEpochs, afterRestart ? restartEpoch : 1 );
        netLearner.setBatchSize( config.fileReadBatches, config.batchSize );
        netLearner.setDumpTimings( config.dumpTimings );
        WeightsWriter weightsWriter( net, &config );
        if( config.weightsFile != "" ) {
            netLearner.addPostEpochAction( &weightsWriter );
        }
        netLearner.learn( config.learningRate, config.annealLearningRate );
    } else {
        NetLearner<unsigned char> netLearner( trainable );
        netLearner.setTrainingData( Ntrain, trainData, trainLabels );
        netLearner.setTestingData( Ntest, testData, testLabels );
        netLearner.setSchedule( config.numEpochs, afterRestart ? restartEpoch : 1 );
        netLearner.setBatchSize( config.batchSize );
        netLearner.setDumpTimings( config.dumpTimings );
        WeightsWriter weightsWriter( net, &config );
        if( config.weightsFile != "" ) {
            netLearner.addPostEpochAction( &weightsWriter );
        }
        netLearner.learn( config.learningRate, config.annealLearningRate );
    }

    if( multiNet != 0 ) {
        delete multiNet;
    }
    delete net;

    if( trainData != 0 ) {
        delete[] trainData;
    }
    if( testData != 0 ) {
        delete[] testData;
    }
    if( testLabels != 0 ) {
        delete[] testLabels;
    }
    if( trainLabels != 0 ) {
        delete[] trainLabels;
    }
}
Exemple #7
0
void go(Config config) {
    Timer timer;

    int Ntrain;
    int Ntest;
    int numPlanes;
    int imageSize;

    float *trainData = 0;
    float *testData = 0;
    int *trainLabels = 0;
    int *testLabels = 0;

    int trainAllocateN = 0;
    int testAllocateN = 0;

//    int totalLinearSize;
    GenericLoaderv2 trainLoader( config.dataDir + "/" + config.trainFile );
    Ntrain = trainLoader.getN();
    numPlanes = trainLoader.getPlanes();
    imageSize = trainLoader.getImageSize();
    // GenericLoader::getDimensions( , &Ntrain, &numPlanes, &imageSize );
    Ntrain = config.numTrain == -1 ? Ntrain : config.numTrain;
//    long allocateSize = (long)Ntrain * numPlanes * imageSize * imageSize;
    cout << "Ntrain " << Ntrain << " numPlanes " << numPlanes << " imageSize " << imageSize << endl;
    if( config.loadOnDemand ) {
        trainAllocateN = config.batchSize; // can improve this later
    } else {
        trainAllocateN = Ntrain;
    }
    trainData = new float[ (long)trainAllocateN * numPlanes * imageSize * imageSize ];
    trainLabels = new int[trainAllocateN];
    if( !config.loadOnDemand && Ntrain > 0 ) {
        trainLoader.load( trainData, trainLabels, 0, Ntrain );
    }

    GenericLoaderv2 testLoader( config.dataDir + "/" + config.validateFile );
    Ntest = testLoader.getN();
    numPlanes = testLoader.getPlanes();
    imageSize = testLoader.getImageSize();
    Ntest = config.numTest == -1 ? Ntest : config.numTest;
    if( config.loadOnDemand ) {
        testAllocateN = config.batchSize; // can improve this later
    } else {
        testAllocateN = Ntest;
    }
    testData = new float[ (long)testAllocateN * numPlanes * imageSize * imageSize ];
    testLabels = new int[testAllocateN]; 
    if( !config.loadOnDemand && Ntest > 0 ) {
        testLoader.load( testData, testLabels, 0, Ntest );
    }
    cout << "Ntest " << Ntest << " Ntest" << endl;
    
    timer.timeCheck("after load images");

    const int inputCubeSize = numPlanes * imageSize * imageSize;
    float translate;
    float scale;
    int normalizationExamples = config.normalizationExamples > Ntrain ? Ntrain : config.normalizationExamples;
    if( !config.loadOnDemand ) {
        if( config.normalization == "stddev" ) {
            float mean, stdDev;
            NormalizationHelper::getMeanAndStdDev( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
            cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
            translate = - mean;
            scale = 1.0f / stdDev / config.normalizationNumStds;
        } else if( config.normalization == "maxmin" ) {
            float mean, stdDev;
            NormalizationHelper::getMinMax( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
            translate = - mean;
            scale = 1.0f / stdDev;
        } else {
            cout << "Error: Unknown normalization: " << config.normalization << endl;
            return;
        }
    } else {
        if( config.normalization == "stddev" ) {
            float mean, stdDev;
            NormalizeGetStdDev normalizeGetStdDev( trainData, trainLabels ); 
            BatchProcessv2::run( &trainLoader, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetStdDev );
            normalizeGetStdDev.calcMeanStdDev( &mean, &stdDev );
            cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
            translate = - mean;
            scale = 1.0f / stdDev / config.normalizationNumStds;
        } else if( config.normalization == "maxmin" ) {
            NormalizeGetMinMax normalizeGetMinMax( trainData, trainLabels );
            BatchProcessv2::run( &trainLoader, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetMinMax );
            normalizeGetMinMax.calcMinMaxTransform( &translate, &scale );
        } else {
            cout << "Error: Unknown normalization: " << config.normalization << endl;
            return;
        }
    }
    cout << " image norm translate " << translate << " scale " << scale << endl;
    timer.timeCheck("after getting stats");

//    const int numToTrain = Ntrain;
//    const int batchSize = config.batchSize;

    EasyCL *cl = 0;
    if( config.gpuIndex >= 0 ) {
        cl = EasyCL::createForIndexedGpu( config.gpuIndex );
    } else {
        cl = EasyCL::createForFirstGpuOtherwiseCpu();
    }

    NeuralNet *net;
    net = new NeuralNet(cl);

    WeightsInitializer *weightsInitializer = 0;
    if( toLower( config.weightsInitializer ) == "original" ) {
        weightsInitializer = new OriginalInitializer();
    } else if( toLower( config.weightsInitializer ) == "uniform" ) {
        weightsInitializer = new UniformInitializer( config.initialWeights );
    } else {
        cout << "Unknown weights initializer " << config.weightsInitializer << endl;
        return;
    }

//    net->inputMaker<unsigned char>()->numPlanes(numPlanes)->imageSize(imageSize)->insert();
    net->addLayer( InputLayerMaker::instance()->numPlanes(numPlanes)->imageSize(imageSize) );
    net->addLayer( NormalizationLayerMaker::instance()->translate(translate)->scale(scale) );
    if( !NetdefToNet::createNetFromNetdef( net, config.netDef, weightsInitializer ) ) {
        return;
    }
    // apply the trainer
    Trainer *trainer = 0;
    if( toLower( config.trainer ) == "sgd" ) {
        SGD *sgd = new SGD( cl );
        sgd->setLearningRate( config.learningRate );
        sgd->setMomentum( config.momentum );
        sgd->setWeightDecay( config.weightDecay );
        trainer = sgd;
    } else if( toLower( config.trainer ) == "anneal" ) {
        Annealer *annealer = new Annealer( cl );
        annealer->setLearningRate( config.learningRate );
        annealer->setAnneal( config.anneal );
        trainer = annealer;
    } else if( toLower( config.trainer ) == "nesterov" ) {
        Nesterov *nesterov = new Nesterov( cl );
        nesterov->setLearningRate( config.learningRate );
        nesterov->setMomentum( config.momentum );
        trainer = nesterov;
    } else if( toLower( config.trainer ) == "adagrad" ) {
        Adagrad *adagrad = new Adagrad( cl );
        adagrad->setLearningRate( config.learningRate );
        trainer = adagrad;
    } else if( toLower( config.trainer ) == "rmsprop" ) {
        Rmsprop *rmsprop = new Rmsprop( cl );
        rmsprop->setLearningRate( config.learningRate );
        trainer = rmsprop;
    } else if( toLower( config.trainer ) == "adadelta" ) {
        Adadelta *adadelta = new Adadelta( cl, config.rho );
        trainer = adadelta;
    } else {
        cout << "trainer " << config.trainer << " unknown." << endl;
        return;
    }
    cout << "Using trainer " << trainer->asString() << endl;
//    trainer->bindTo( net );
//    net->setTrainer( trainer );
    net->setBatchSize( config.batchSize );
    net->print();

    bool afterRestart = false;
    int restartEpoch = 0;
    int restartBatch = 0;
    float restartAnnealedLearningRate = 0;
    int restartNumRight = 0;
    float restartLoss = 0;
    if( config.loadWeights && config.weightsFile != "" ) {
        cout << "loadingweights" << endl;
        afterRestart = WeightsPersister::loadWeights( config.weightsFile, config.getTrainingString(), net, &restartEpoch, &restartBatch, &restartAnnealedLearningRate, &restartNumRight, &restartLoss );
        if( !afterRestart && FileHelper::exists( config.weightsFile ) ) {
            // try old trainingstring
            afterRestart = WeightsPersister::loadWeights( config.weightsFile, config.getOldTrainingString(), net, &restartEpoch, &restartBatch, &restartAnnealedLearningRate, &restartNumRight, &restartLoss );
        }
        if( !afterRestart && FileHelper::exists( config.weightsFile ) ) {
            cout << "Weights file " << config.weightsFile << " exists, but doesnt match training options provided." << endl;
            cout << "Continue loading anyway (might crash, or weights might be completely inappropriate)? (y/n)" << endl;
            string response;
            cin >> response;
            if( response != "y" ) {
                cout << "Please either check the training options, or choose a weights file that doesnt exist yet" << endl;
                return;
            }
        }
void go(Config config) {
    Timer timer;

    int Ntrain;
    int Ntest;
    int numPlanes;
    int imageSize;

    float *trainData = 0;
    float *testData = 0;
    int *trainLabels = 0;
    int *testLabels = 0;

    int trainAllocateN = 0;
    int testAllocateN = 0;

//    int totalLinearSize;
    GenericLoader::getDimensions((config.dataDir + "/" + config.trainFile).c_str(), &Ntrain, &numPlanes, &imageSize );
    Ntrain = config.numTrain == -1 ? Ntrain : config.numTrain;
//    long allocateSize = (long)Ntrain * numPlanes * imageSize * imageSize;
    cout << "Ntrain " << Ntrain << " numPlanes " << numPlanes << " imageSize " << imageSize << endl;
    trainAllocateN = Ntrain;
    trainData = new float[ (long)trainAllocateN * numPlanes * imageSize * imageSize ];
    trainLabels = new int[trainAllocateN];
    if( Ntrain > 0 ) {
        GenericLoader::load((config.dataDir + "/" + config.trainFile).c_str(), trainData, trainLabels, 0, Ntrain );
    }

    GenericLoader::getDimensions((config.dataDir + "/" + config.validateFile).c_str(), &Ntest, &numPlanes, &imageSize );
    Ntest = config.numTest == -1 ? Ntest : config.numTest;
    testAllocateN = Ntest;
    testData = new float[ (long)testAllocateN * numPlanes * imageSize * imageSize ];
    testLabels = new int[testAllocateN]; 
    if( Ntest > 0 ) {
        GenericLoader::load((config.dataDir + "/" + config.validateFile).c_str(), testData, testLabels, 0, Ntest );
    }
    
    timer.timeCheck("after load images");

    const int inputCubeSize = numPlanes * imageSize * imageSize;
    float translate;
    float scale;
    int normalizationExamples = config.normalizationExamples > Ntrain ? Ntrain : config.normalizationExamples;
    if( config.normalization == "stddev" ) {
        float mean, stdDev;
        NormalizationHelper::getMeanAndStdDev( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
        cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
        translate = - mean;
        scale = 1.0f / stdDev / config.normalizationNumStds;
    } else if( config.normalization == "maxmin" ) {
        float mean, stdDev;
        NormalizationHelper::getMinMax( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
        translate = - mean;
        scale = 1.0f / stdDev;
    } else {
        cout << "Error: Unknown normalization: " << config.normalization << endl;
        return;
    }
    
    cout << " image norm translate " << translate << " scale " << scale << endl;
    timer.timeCheck("after getting stats");

//    const int numToTrain = Ntrain;
//    const int batchSize = config.batchSize;
    EasyCL *cl = new EasyCL();
    NeuralNet *net = new NeuralNet( cl );
//    net->inputMaker<unsigned char>()->numPlanes(numPlanes)->imageSize(imageSize)->insert();
    net->addLayer( InputLayerMaker::instance()->numPlanes(numPlanes)->imageSize(imageSize) );
    net->addLayer( NormalizationLayerMaker::instance()->translate(translate)->scale(scale) );
    if( !NetdefToNet::createNetFromNetdef( net, config.netDef ) ) {
        return;
    }
    net->print();
    for( int i = 1; i < net->getNumLayers() - 1; i++ ) {
        Layer *layer = net->getLayer(i);
        FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >(layer);
        ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >(layer);
        if( fc != 0 ) {
            conv = fc->convolutionalLayer;
        }
        if( conv == 0 ) {
            continue;
        }
        initrand.seed(0);
        int weightsSize = conv->getWeightsSize();
    //int weightsSize = layer->getPersistSize();
        if( weightsSize > 0 ) {
            cout << "weightsSize " << weightsSize << endl;
            float *weights = new float[weightsSize];
            for( int j = 0; j < weightsSize; j++ ) {
                int thisrand = (int)initrand();
                float thisweight = ( thisrand % 100000 ) / 1000000.0f;
                weights[j] = thisweight;
            }        
            conv->initWeights( weights );
        }
        if( conv->dim.biased ) {
            initrand.seed(0);
            int biasedSize = conv->getBiasSize();
            float *biasWeights = new float[biasedSize];
            for( int j = 0; j < biasedSize; j++ ) {
                int thisrand = (int)initrand();
                float thisweight = ( thisrand % 100000 ) / 1000000.0f;
                biasWeights[j] = thisweight;
                //biasWeights[j] = 0;
            }        
            conv->initBias( biasWeights );
        }
    }

    cout << "weight samples before learning:" << endl;
    sampleWeights(net);

    bool afterRestart = false;
    int restartEpoch = 0;
//    int restartBatch = 0;
//    float restartAnnealedLearningRate = 0;
//    int restartNumRight = 0;
//    float restartLoss = 0;

    timer.timeCheck("before learning start");
    if( config.dumpTimings ) {
        StatefulTimer::dump( true );
    }
    StatefulTimer::timeCheck("START");

    SGD *sgd = SGD::instance( cl, config.learningRate, 0.0f );
    Trainable *trainable = net;
    NetLearner netLearner( 
        sgd, trainable,
        Ntrain, trainData, trainLabels,
        Ntest, testData, testLabels,
        config.batchSize );
    netLearner.setSchedule( config.numEpochs, afterRestart ? restartEpoch : 1 );
//    netLearner.setBatchSize( config.batchSize );
    netLearner.setDumpTimings( config.dumpTimings );  
//    netLearner.learn( config.learningRate, 1.0f );

    cout << "forward output" << endl;
    for( int layerId = 0; layerId < net->getNumLayers(); layerId++ ) {
        Layer *layer = net->getLayer(layerId);
        FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >( layer );
        ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >( layer );
        PoolingLayer *pool = dynamic_cast< PoolingLayer * >( layer );
        SoftMaxLayer *softMax = dynamic_cast< SoftMaxLayer * >( layer );
        if( fc != 0 ) {
            conv = fc->convolutionalLayer;
        }
        int planes = 0;
        int imageSize = 0;
        if( conv != 0 ) {
            cout << "convolutional (or conv based, ie fc)" << endl;
            planes = conv->dim.numFilters;
            imageSize = conv->dim.outputSize;
          //  continue;
        } else if( pool != 0 ) {
            cout << "pooling" << endl;
            planes = pool->numPlanes;
            imageSize = pool->outputSize;
        } else if( softMax != 0 ) {
            cout << "softmax" << endl;
            planes = softMax->numPlanes;
            imageSize = softMax->imageSize;
        } else {
            continue;
        }
        cout << "layer " << layerId << endl;
//        conv->getOutput();
        float const*output = layer->getOutput();
//        for( int i = 0; i < 3; i++ ) {
//            cout << conv->getOutput()[i] << endl;
//        }
        initrand.seed(0);
//        LayerDimensions &dim = conv->dim;
        for( int i = 0; i < 10; i++ ) {
            int thisrand = abs( (int)initrand() );
            int seq = thisrand % ( planes * imageSize * imageSize );
            int outPlane = seq / ( imageSize * imageSize );
            int rowcol = seq % ( imageSize * imageSize );
            int row = rowcol / imageSize;
            int col = rowcol % imageSize;
            cout << "out[" << outPlane << "," << row << "," << col << "]=" << output[ seq ] << endl;
        }
    }

    cout << "weight samples after learning:" << endl;
    sampleWeights(net);

    cout << "backprop output" << endl;
    for( int layerId = net->getNumLayers() - 1; layerId >= 0; layerId-- ) {
        Layer *layer = net->getLayer(layerId);
        FullyConnectedLayer *fc = dynamic_cast< FullyConnectedLayer * >( layer );
        ConvolutionalLayer *conv = dynamic_cast< ConvolutionalLayer * >( layer );
        if( fc != 0 ) {
            conv = fc->convolutionalLayer;
        }
        if( conv == 0 ) {
            continue;
        }

        cout << "layer " << layerId << endl;
        float const*weights = conv->getWeights();
        float const*biases = conv->getBias();
        int weightsSize = conv->getWeightsSize() / conv->dim.numFilters;
        for( int i = 0; i < weightsSize; i++ ) {
            cout << " weight " << i << " " << weights[i] << endl;
        }
        for( int i = 0; i < 3; i++ ) {
            cout << " bias " << i << " " << biases[i] << endl;
        }
    }
    cout << "done" << endl;

    delete sgd;
    delete net;
    delete cl;

    if( trainData != 0 ) {
        delete[] trainData;
    }
    if( testData != 0 ) {
        delete[] testData;
    }
    if( testLabels != 0 ) {
        delete[] testLabels;
    }
    if( trainLabels != 0 ) {
        delete[] trainLabels;
    }
}