Exemple #1
0
TEST( testforward, softmax_byplane ) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = NeuralNet::maker(cl)->imageSize(2)->planes(1)->instance();
    net->addLayer( SoftMaxMaker::instance()->perPlane() );
    net->setBatchSize( 1 );
    int imageSizeSquared = net->getLayer(0)->getOutputSize() * net->getLayer(0)->getOutputSize();
    float *input = new float[imageSizeSquared];
    input[0] = 0;
    input[1] = 1;
    input[2] = 3;
    input[3] = 2;
    net->forward( input );
    float const*output = net->getOutput();
    float sum = 0;
    for( int i = 0; i < imageSizeSquared; i++ ) {
        cout << "output[" << i << "]=" << output[i] << endl;
        sum += output[i];
        EXPECT_LE( 0, output[i] );
        EXPECT_GE( 1, output[i] );
    }
    EXPECT_FLOAT_NEAR( 1.0f, sum );
    EXPECT_FLOAT_NEAR( (float)( exp(0.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[0] );
    EXPECT_FLOAT_NEAR( (float)( exp(1.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[1] );
    EXPECT_FLOAT_NEAR( (float)( exp(3.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[2] );
    EXPECT_FLOAT_NEAR( (float)( exp(2.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), output[3] );

    float *expected = new float[imageSizeSquared];
    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[2] = 1;
    float loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(output[2]), loss );

    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[0] = 1;
    loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(output[0]), loss );

    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[1] = 1;
    loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(output[1]), loss );

    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[3] = 1;
    loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(output[3]), loss );

    delete[] input;
    delete[] expected;
    delete net;
    delete cl;
}
TEST( testpropagate, softmax_byplane ) {
    NeuralNet *net = NeuralNet::maker()->imageSize(2)->planes(1)->instance();
    net->addLayer( SoftMaxMaker::instance()->perPlane() );
    net->setBatchSize( 1 );
    int imageSizeSquared = net->layers[0]->getOutputImageSize() * net->layers[0]->getOutputImageSize();
    float *input = new float[imageSizeSquared];
    input[0] = 0;
    input[1] = 1;
    input[2] = 3;
    input[3] = 2;
    net->propagate( input );
    float const*results = net->getResults();
    float sum = 0;
    for( int i = 0; i < imageSizeSquared; i++ ) {
        cout << "results[" << i << "]=" << results[i] << endl;
        sum += results[i];
        EXPECT_LE( 0, results[i] );
        EXPECT_GE( 1, results[i] );
    }
    EXPECT_FLOAT_NEAR( 1.0f, sum );
    EXPECT_FLOAT_NEAR( (float)( exp(0.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[0] );
    EXPECT_FLOAT_NEAR( (float)( exp(1.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[1] );
    EXPECT_FLOAT_NEAR( (float)( exp(3.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[2] );
    EXPECT_FLOAT_NEAR( (float)( exp(2.0f)/(exp(0.0f)+exp(1.0f)+exp(3.0f)+exp(2.0f)) ), results[3] );

    float *expected = new float[imageSizeSquared];
    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[2] = 1;
    float loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(results[2]), loss );

    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[0] = 1;
    loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(results[0]), loss );

    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[1] = 1;
    loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(results[1]), loss );

    memset( expected, 0, sizeof(float) * imageSizeSquared );
    expected[3] = 1;
    loss = net->calcLoss( expected );
    cout << "loss " << loss << endl;
    EXPECT_LT( 0, loss );
    EXPECT_FLOAT_NEAR( - log(results[3]), loss );

    delete[] input;
    delete[] expected;
    delete net;
}
Exemple #3
0
TEST(testbackward, softmax2) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = new NeuralNet(cl, 5, 1);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(SoftMaxMaker::instance());
    cout << net->asString() << endl;

//    int batchSize = ;
    net->setBatchSize(2);

    checkLayer(net, 2);
    delete net;
    delete cl;
}
Exemple #4
0
TEST(testbackward, act1) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = new NeuralNet(cl, 1, 2);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(ActivationMaker::instance()->relu());
    net->addLayer(SquareLossMaker::instance());
//    net->addLayer(SoftMaxMaker::instance()); // maybe should use square loss maker, or cross entropy,
                          // so that dont have to make filtersize == input image size?
    cout << net->asString() << endl;

    net->setBatchSize(1);

    checkLayer(net, 2);
    delete net;
    delete cl;
}
Exemple #5
0
TEST(testbackward, fc1) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    ClBlasInstance blasInstance;
    NeuralNet *net = new NeuralNet(cl, 2, 4);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(FullyConnectedMaker::instance()->numPlanes(4)->imageSize(1)->biased(0));
    net->addLayer(SquareLossMaker::instance());
//    net->addLayer(SoftMaxMaker::instance()); // maybe should use square loss maker, or cross entropy,
                          // so that dont have to make filtersize == input image size?
    cout << net->asString() << endl;

    net->setBatchSize(4);

    checkLayer(net, 2);
    delete net;
    delete cl;
}
Exemple #6
0
void testNumerically(float learningRate, int batchSize, int imageSize, int filterSize, int numPlanes, ActivationFunction *fn, bool padZeros, int its = 20) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    ClBlasInstance clblasInstance;
    NeuralNet *net = NeuralNet::maker(cl)->planes(numPlanes)->imageSize(imageSize)->instance();
    net->addLayer(ConvolutionalMaker::instance()->numFilters(1)->filterSize(filterSize)->biased(0)->padZeros(padZeros));
    net->addLayer(ActivationMaker::instance()->fn(fn));
    net->addLayer(ConvolutionalMaker::instance()->numFilters(1)->filterSize(filterSize)->biased(0)->padZeros(padZeros));
    net->addLayer(ActivationMaker::instance()->fn(fn));
    net->addLayer(SquareLossMaker::instance());
    net->setBatchSize(batchSize);

    int inputNumElements = net->getLayer(0)->getOutputNumElements();
    int outputNumElements = net->getLastLayer()->getOutputNumElements();
    int weightsSize1 = net->getLayer(1)->getWeightsSize();
    int weightsSize2 = net->getLayer(3)->getWeightsSize();

    float *inputData = new float[std::max<int>(10000, inputNumElements)];
    float *expectedOutput = new float[std::max<int>(10000, outputNumElements)];
    memset(inputData, 0, sizeof(float) * std::max<int>(10000, inputNumElements));
    memset(expectedOutput, 0, sizeof(float) * std::max<int>(10000, outputNumElements));
//    int seed = 0;
    std::mt19937 random = WeightRandomizer::randomize(inputData, std::max<int>(10000, inputNumElements), -2.0f, 2.0f);
    WeightRandomizer::randomize(random, expectedOutput, std::max<int>(10000, outputNumElements), -2.0f, 2.0f);
    WeightRandomizer::randomize(random, dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weights, weightsSize1, -2.0f, 2.0f);
    dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weightsWrapper->copyToDevice();
    WeightRandomizer::randomize(random, dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weights, weightsSize2, -2.0f, 2.0f);
    dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weightsWrapper->copyToDevice();

    SGD *sgd = SGD::instance(cl, learningRate, 0.0f);
    for(int it = 0; it < its; it++) {
        float *weightsBefore1 = new float[weightsSize1];
        float *currentWeights = net->getLayer(1)->getWeights();
        for(int i = 0; i < weightsSize1; i++) {
            weightsBefore1[i] = currentWeights[i];
        }
        float *weightsBefore2 = new float[weightsSize2];
        currentWeights = net->getLayer(3)->getWeights();
        for(int i = 0; i < weightsSize2; i++) {
            weightsBefore2[i] = currentWeights[i];
        }

        net->forward(inputData);
    //    net->print();
        float loss = net->calcLoss(expectedOutput);
        dynamic_cast<LossLayer*>(net->getLayer(5))->calcLoss(expectedOutput);
//        net->backward(expectedOutput);
        TrainingContext context(0, 0);
        sgd->train(net, &context, inputData, expectedOutput);
        dynamic_cast<ConvolutionalLayer*>(net->getLayer(1))->weightsWrapper->copyToHost();
        // restore 2nd layer weights :-)
        for(int i = 0; i < weightsSize2; i++) {
//            dynamic_cast<ConvolutionalLayer*>(net->getLayer(2))->weights[i] = weightsBefore2[i];
        }
        dynamic_cast<ConvolutionalLayer*>(net->getLayer(3))->weightsWrapper->copyToDevice();
        net->forward(inputData);

        float loss2 = net->calcLoss(expectedOutput);
        float lossChange = loss - loss2;
        cout << " loss " << loss << " loss2 " << loss2 << " change: " << lossChange << endl;

        float *newWeights = net->getLayer(1)->getWeights();
        float sumWeightDiff = 0;
        float sumWeightDiffSquared = 0;
        for(int i = 0; i < weightsSize1; i++) {
            float diff = newWeights[i] - weightsBefore1[i];
            sumWeightDiff += diff;
            sumWeightDiffSquared += diff * diff;
        }
        newWeights = net->getLayer(3)->getWeights();
        for(int i = 0; i < weightsSize2; i++) {
            float diff = newWeights[i] - weightsBefore2[i];
            sumWeightDiff += diff;
            sumWeightDiffSquared += diff * diff;
        }
        cout << "sumweightsdiff " << sumWeightDiff << endl;
    //    cout << "sumweightsdiff / learningrate " << (sumWeightDiff / learningRate) << endl;
    //    cout << "sum weightsdiffsquared " << (sumWeightDiffSquared/ learningRate / learningRate * imageSize) << endl;

        float estimatedLossChangeFromW = sumWeightDiffSquared/ learningRate; // / filterSize;

        cout << " loss change              " << lossChange << endl;
        cout << " estimatedLossChangeFromW " << estimatedLossChangeFromW << endl;
    //    cout << abs(estimatedLossChangeFromW - lossChange) / lossChange << endl;    
    //    cout << abs(estimatedLossChangeFromW - lossChange) / estimatedLossChangeFromW << endl;    
        EXPECT_GT(0.01f * imageSize * imageSize, abs(estimatedLossChangeFromW - lossChange) / lossChange); 
        EXPECT_GT(0.01f * imageSize * imageSize, abs(estimatedLossChangeFromW - lossChange) / estimatedLossChangeFromW); 
        delete[] weightsBefore1;
        delete[] weightsBefore2;
    }
//    delete[] weights1;
//    delete[] errors;
//    delete[] output;
    delete sgd;
    delete[] inputData;
    delete[] expectedOutput;
    delete net;
    delete cl;
}
Exemple #7
0
TEST(testbackward, squareloss) {
    // here's the plan:
    // generate some input, randomly
    // generate some expected output, randomly
    // forward propagate
    // calculate loss
    // calculate gradInput
    // change some of the inputs, forward prop, recalculate loss, check corresponds
    // to the gradient
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = new NeuralNet(cl, 3, 5);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(SquareLossMaker::instance());
    cout << net->asString() << endl;

    int batchSize = 32;
    net->setBatchSize(batchSize);

    int inputCubeSize = net->getInputCubeSize();
    int outputCubeSize = net->getOutputCubeSize();

    int inputTotalSize = inputCubeSize * batchSize;
    int outputTotalSize = outputCubeSize * batchSize;

    cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl;

    float *input = new float[inputTotalSize];
    float *expectedOutput = new float[outputTotalSize];

    WeightRandomizer::randomize(0, input, inputTotalSize, -2.0f, 2.0f);
    WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, -2.0f, 2.0f);
    
    // now, forward prop
//    net->input(input);
    net->forward(input);
    net->print();
//    net->printOutput();

    // calculate loss
    float lossBefore = net->calcLoss(expectedOutput);

    // calculate gradInput
    net->backward(expectedOutput);

    // modify input slightly
    mt19937 random;
    const int numSamples = 10;
    for(int i = 0; i < numSamples; i++) {
        int inputIndex;
        WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize);
//        cout << "i=" << i << " index " << inputIndex << endl;
        float oldValue = input[inputIndex];
        // grad for this index is....
        float grad = net->getLayer(2)->getGradInput()[inputIndex];
//        cout << "grad=" << grad << endl;
        // tweak slightly
        float newValue = oldValue * 1.01f;
        float inputDelta = newValue - oldValue;
        float predictedLossChange = inputDelta * grad;
        input[inputIndex] = newValue;
//        cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl;
        // forwardProp
        net->forward(input);
        input[inputIndex] = oldValue;
//        net->printOutput();
        float lossAfter = net->calcLoss(expectedOutput);
        float lossChange = lossAfter - lossBefore;
        cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl;
    }

    delete[] expectedOutput;
    delete[] input;

    delete net;
    delete cl;
}
Exemple #8
0
TEST(testbackward, softmaxloss) {
    // here's the plan:
    // generate some input, randomly
    // generate some expected output, randomly
    // forward propagate
    // calculate loss
    // calculate gradInput
    // change some of the inputs, forward prop, recalculate loss, check corresponds
    // to the gradient
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    NeuralNet *net = new NeuralNet(cl, 5, 1);
    net->addLayer(ForceBackpropLayerMaker::instance());
    net->addLayer(SoftMaxMaker::instance());
    cout << net->asString() << endl;

    const int batchSize = 2;
    net->setBatchSize(batchSize);
    const int outputPlanes = net->getOutputPlanes();

    int inputCubeSize = net->getInputCubeSize();
    int outputCubeSize = net->getOutputCubeSize();

    int inputTotalSize = inputCubeSize * batchSize;
    int outputTotalSize = outputCubeSize * batchSize;

    cout << "inputtotalsize=" << inputTotalSize << " outputTotalSize=" << outputTotalSize << endl;

    float *input = new float[inputTotalSize];
    float *expectedOutput = new float[outputTotalSize];

    WeightRandomizer::randomize(0, input, inputTotalSize, 0.0f, 1.0f);
    WeightRandomizer::randomize(1, expectedOutput, outputTotalSize, 0.0f, 1.0f);

    // we should make the input and output a probability distribution I think
    // so: add up the input, and divide each by that.  do same for expectedoutput (?)
//    normalizeAsProbabilityDistribution(input, inputTotalSize);
    normalizeAsProbabilityDistribution(outputPlanes, expectedOutput, outputTotalSize);

    // set all to zero, and one to 1, ie like labelled data
//    for(int i = 0; i < outputTotalSize; i++) {
//        expectedOutput[i] = 0;
//    }
//    for(int n = 0; n < batchSize; n++) {
//        int chosenLabel = 0;
//        WeightRandomizer::randomizeInts(n, &chosenLabel, 1, 0, net->getOutputPlanes());
//        expectedOutput[ n * outputPlanes + chosenLabel ] = 1;
//    }
//    for(int i = 0; i < outputTotalSize; i++) {
//        cout << "expected[" << i << "]=" << expectedOutput[i] << endl;
//    }
//        
    // now, forward prop
//    net->input(input);
    net->forward(input);
    net->print();
//    net->printOutput();

    // calculate loss
    float lossBefore = net->calcLoss(expectedOutput);

    // calculate gradInput
    net->backward(expectedOutput);

    // modify input slightly
    mt19937 random;
    const int numSamples = 10;
    for(int i = 0; i < numSamples; i++) {
        int inputIndex;
        WeightRandomizer::randomizeInts(i, &inputIndex, 1, 0, inputTotalSize);
//        cout << "i=" << i << " index " << inputIndex << endl;
        float oldValue = input[inputIndex];
        // grad for this index is....
        float grad = net->getLayer(2)->getGradInput()[inputIndex];
//        cout << "grad=" << grad << endl;
        // tweak slightly
        float newValue = oldValue * 1.001f;
        float inputDelta = newValue - oldValue;
        float predictedLossChange = inputDelta * grad;
        input[inputIndex] = newValue;
//        cout << "oldvalue=" << oldValue << " newvalue=" << newValue << endl;
        // forwardProp
        net->forward(input);
        input[inputIndex] = oldValue;
//        net->printOutput();
        float lossAfter = net->calcLoss(expectedOutput);
        float lossChange = lossAfter - lossBefore;
        cout << "idx=" << inputIndex << " predicted losschange=" << predictedLossChange << " actual=" << lossChange << endl;
    }

    delete[] expectedOutput;
    delete[] input;

    delete net;
    delete cl;
}
Exemple #9
0
void go(Config config) {
    Timer timer;

    int Ntrain;
    int Ntest;
    int numPlanes;
    int imageSize;

    float *trainData = 0;
    float *testData = 0;
    int *trainLabels = 0;
    int *testLabels = 0;

    int trainAllocateN = 0;
    int testAllocateN = 0;

//    int totalLinearSize;
    GenericLoaderv2 trainLoader( config.dataDir + "/" + config.trainFile );
    Ntrain = trainLoader.getN();
    numPlanes = trainLoader.getPlanes();
    imageSize = trainLoader.getImageSize();
    // GenericLoader::getDimensions( , &Ntrain, &numPlanes, &imageSize );
    Ntrain = config.numTrain == -1 ? Ntrain : config.numTrain;
//    long allocateSize = (long)Ntrain * numPlanes * imageSize * imageSize;
    cout << "Ntrain " << Ntrain << " numPlanes " << numPlanes << " imageSize " << imageSize << endl;
    if( config.loadOnDemand ) {
        trainAllocateN = config.batchSize; // can improve this later
    } else {
        trainAllocateN = Ntrain;
    }
    trainData = new float[ (long)trainAllocateN * numPlanes * imageSize * imageSize ];
    trainLabels = new int[trainAllocateN];
    if( !config.loadOnDemand && Ntrain > 0 ) {
        trainLoader.load( trainData, trainLabels, 0, Ntrain );
    }

    GenericLoaderv2 testLoader( config.dataDir + "/" + config.validateFile );
    Ntest = testLoader.getN();
    numPlanes = testLoader.getPlanes();
    imageSize = testLoader.getImageSize();
    Ntest = config.numTest == -1 ? Ntest : config.numTest;
    if( config.loadOnDemand ) {
        testAllocateN = config.batchSize; // can improve this later
    } else {
        testAllocateN = Ntest;
    }
    testData = new float[ (long)testAllocateN * numPlanes * imageSize * imageSize ];
    testLabels = new int[testAllocateN]; 
    if( !config.loadOnDemand && Ntest > 0 ) {
        testLoader.load( testData, testLabels, 0, Ntest );
    }
    cout << "Ntest " << Ntest << " Ntest" << endl;
    
    timer.timeCheck("after load images");

    const int inputCubeSize = numPlanes * imageSize * imageSize;
    float translate;
    float scale;
    int normalizationExamples = config.normalizationExamples > Ntrain ? Ntrain : config.normalizationExamples;
    if( !config.loadOnDemand ) {
        if( config.normalization == "stddev" ) {
            float mean, stdDev;
            NormalizationHelper::getMeanAndStdDev( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
            cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
            translate = - mean;
            scale = 1.0f / stdDev / config.normalizationNumStds;
        } else if( config.normalization == "maxmin" ) {
            float mean, stdDev;
            NormalizationHelper::getMinMax( trainData, normalizationExamples * inputCubeSize, &mean, &stdDev );
            translate = - mean;
            scale = 1.0f / stdDev;
        } else {
            cout << "Error: Unknown normalization: " << config.normalization << endl;
            return;
        }
    } else {
        if( config.normalization == "stddev" ) {
            float mean, stdDev;
            NormalizeGetStdDev normalizeGetStdDev( trainData, trainLabels ); 
            BatchProcessv2::run( &trainLoader, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetStdDev );
            normalizeGetStdDev.calcMeanStdDev( &mean, &stdDev );
            cout << " image stats mean " << mean << " stdDev " << stdDev << endl;
            translate = - mean;
            scale = 1.0f / stdDev / config.normalizationNumStds;
        } else if( config.normalization == "maxmin" ) {
            NormalizeGetMinMax normalizeGetMinMax( trainData, trainLabels );
            BatchProcessv2::run( &trainLoader, 0, config.batchSize, normalizationExamples, inputCubeSize, &normalizeGetMinMax );
            normalizeGetMinMax.calcMinMaxTransform( &translate, &scale );
        } else {
            cout << "Error: Unknown normalization: " << config.normalization << endl;
            return;
        }
    }
    cout << " image norm translate " << translate << " scale " << scale << endl;
    timer.timeCheck("after getting stats");

//    const int numToTrain = Ntrain;
//    const int batchSize = config.batchSize;

    EasyCL *cl = 0;
    if( config.gpuIndex >= 0 ) {
        cl = EasyCL::createForIndexedGpu( config.gpuIndex );
    } else {
        cl = EasyCL::createForFirstGpuOtherwiseCpu();
    }

    NeuralNet *net;
    net = new NeuralNet(cl);

    WeightsInitializer *weightsInitializer = 0;
    if( toLower( config.weightsInitializer ) == "original" ) {
        weightsInitializer = new OriginalInitializer();
    } else if( toLower( config.weightsInitializer ) == "uniform" ) {
        weightsInitializer = new UniformInitializer( config.initialWeights );
    } else {
        cout << "Unknown weights initializer " << config.weightsInitializer << endl;
        return;
    }

//    net->inputMaker<unsigned char>()->numPlanes(numPlanes)->imageSize(imageSize)->insert();
    net->addLayer( InputLayerMaker::instance()->numPlanes(numPlanes)->imageSize(imageSize) );
    net->addLayer( NormalizationLayerMaker::instance()->translate(translate)->scale(scale) );
    if( !NetdefToNet::createNetFromNetdef( net, config.netDef, weightsInitializer ) ) {
        return;
    }
    // apply the trainer
    Trainer *trainer = 0;
    if( toLower( config.trainer ) == "sgd" ) {
        SGD *sgd = new SGD( cl );
        sgd->setLearningRate( config.learningRate );
        sgd->setMomentum( config.momentum );
        sgd->setWeightDecay( config.weightDecay );
        trainer = sgd;
    } else if( toLower( config.trainer ) == "anneal" ) {
        Annealer *annealer = new Annealer( cl );
        annealer->setLearningRate( config.learningRate );
        annealer->setAnneal( config.anneal );
        trainer = annealer;
    } else if( toLower( config.trainer ) == "nesterov" ) {
        Nesterov *nesterov = new Nesterov( cl );
        nesterov->setLearningRate( config.learningRate );
        nesterov->setMomentum( config.momentum );
        trainer = nesterov;
    } else if( toLower( config.trainer ) == "adagrad" ) {
        Adagrad *adagrad = new Adagrad( cl );
        adagrad->setLearningRate( config.learningRate );
        trainer = adagrad;
    } else if( toLower( config.trainer ) == "rmsprop" ) {
        Rmsprop *rmsprop = new Rmsprop( cl );
        rmsprop->setLearningRate( config.learningRate );
        trainer = rmsprop;
    } else if( toLower( config.trainer ) == "adadelta" ) {
        Adadelta *adadelta = new Adadelta( cl, config.rho );
        trainer = adadelta;
    } else {
        cout << "trainer " << config.trainer << " unknown." << endl;
        return;
    }
    cout << "Using trainer " << trainer->asString() << endl;
//    trainer->bindTo( net );
//    net->setTrainer( trainer );
    net->setBatchSize( config.batchSize );
    net->print();

    bool afterRestart = false;
    int restartEpoch = 0;
    int restartBatch = 0;
    float restartAnnealedLearningRate = 0;
    int restartNumRight = 0;
    float restartLoss = 0;
    if( config.loadWeights && config.weightsFile != "" ) {
        cout << "loadingweights" << endl;
        afterRestart = WeightsPersister::loadWeights( config.weightsFile, config.getTrainingString(), net, &restartEpoch, &restartBatch, &restartAnnealedLearningRate, &restartNumRight, &restartLoss );
        if( !afterRestart && FileHelper::exists( config.weightsFile ) ) {
            // try old trainingstring
            afterRestart = WeightsPersister::loadWeights( config.weightsFile, config.getOldTrainingString(), net, &restartEpoch, &restartBatch, &restartAnnealedLearningRate, &restartNumRight, &restartLoss );
        }
        if( !afterRestart && FileHelper::exists( config.weightsFile ) ) {
            cout << "Weights file " << config.weightsFile << " exists, but doesnt match training options provided." << endl;
            cout << "Continue loading anyway (might crash, or weights might be completely inappropriate)? (y/n)" << endl;
            string response;
            cin >> response;
            if( response != "y" ) {
                cout << "Please either check the training options, or choose a weights file that doesnt exist yet" << endl;
                return;
            }
        }