int main(int argc, char *argv[])
{
  
  // create network
  NeuralNet network;
  
  NeuralLayer * pHiddenLayer1 = new NeuralTanhLayer(2,16);
  network.addLayer( pHiddenLayer1 );
  //NeuralLayer * pHiddenLayer2 = new NeuralTanhLayer(16,16);
  //network.addLayer( pHiddenLayer2 );

  NeuralLayer * pOutputLayer = new NeuralSoftmaxLayer(16,2);
  network.addLayer( pOutputLayer );

  // set learning rate, momentum, decay rate, output type
  // SCALAR = tanh or sigmoid output layer (use one output neuron)
  // PROB = softmax output layer, 1-of-C output encoding (use two output neurons)
  const unsigned int outType = PROB;
  network.setParams(0.2, 0, 0, outType);

  const unsigned int iters = 1000;
  const unsigned int testers = 2500;
  int rightCount = 0;

  for(int i=0;i<iters+testers;++i)
  {
  
    double error = 0.0;
    std::vector<double> exor, training;

    // generate training data
    switch(outType)
    {
      case SCALAR:
        exor = XOR_training();
        training.push_back(exor[2]);
        exor.pop_back();
        break;
      case PROB:
        exor = softmax_XOR_training();
        training.push_back(exor[2]);
        training.push_back(exor[3]);
        exor.pop_back();
        exor.pop_back();
        break;
    }

    // training
    if( i<iters )
    {
      std::vector<double> outputs = network.runNet(exor);
      error = network.trainNet(exor,training,outType);
      switch(outType)
      {
        case SCALAR:
          std::cout << exor[0] << "\t\t" << exor[1] << "\t\t" << outputs[0] << "\t\t" << error << std::endl;
          break;
        case PROB:
          std::cout << exor[0] << "\t\t" << exor[1] << "\t\t" << outputs[0] << "\t\t" << outputs[1] << "\t\t" << error << std::endl;
          break;
      }
    }

    // testing
    if( i>=iters )
    {
		  std::vector<double> outputs = network.runNet(exor);
      unsigned int out = 0;
      switch(outType)
      {
        case SCALAR:
          out = ( (outputs[0]>0.5) ? 1 : 0 );
          if( out == (int)training[0] )
          {
			      ++rightCount;
		      }
        break;
        case PROB:
		      int classLabel = 0; 
          if( outputs[0] < outputs[1] )
          {
            classLabel = 1;
          }
		      if( 1 == (int)training[classLabel]  )
          {
			      ++rightCount;
		      }
        break;
      }
    }
  }  
  
  std::cout << std::endl << "accuracy: " << 100.0 * rightCount/testers << "%" << std::endl;

  return 0;

}
int main()
{

  // init variables
  double error = 0.;
  int truecnt = 0;
  int times,timed;
  
  // print useful info for reference
  std::cout << "\n" << "hidden neurons: " << "\t \t" << HIDDEN << std::endl;
  // init random number generator
  srand((int)time(NULL));  

  // create network
  std::cout << "initializing network..." << "\t \t";
  NeuralNet DigitNet;

  NeuralLayer * pHiddenLayer1 = new NeuralTanhLayer(INPUT,HIDDEN);
  DigitNet.addLayer( pHiddenLayer1 );
  NeuralLayer * pOutputLayer = new NeuralSoftmaxLayer(HIDDEN,OUTPUT);
  DigitNet.addLayer( pOutputLayer );

  // set output type:
  // SCALAR = tanh or sigmoid output layer (use one output neuron)
  // PROB = softmax output layer, 1-of-N output encoding (use two output neurons)
  const unsigned int outType = PROB;

  // set learning rate, momentum, decay rate
  const double learningRate = 0.15;
  const double momentum =     0.0;
  const double decayRate =    0.0;
  DigitNet.setParams(learningRate,momentum,decayRate,outType);

  std::cout << "done" << std::endl;
  
  // load training and test data
  std::cout << "loading data..." << "\t \t \t";
  std::vector< std::vector<double> > bigData( DATA_SIZE,std::vector<double>(INPUT+1,0.0) );
  loadFromFile(bigData,"train.txt");

  std::vector< std::vector<double> > trainData( TRAIN_SIZE,std::vector<double>(INPUT+1,0.0) );
  std::vector< std::vector<double> > testData( TEST_SIZE,std::vector<double>(INPUT+1,0.0) );
  
  buildData(bigData,trainData,TRAIN_SIZE,testData,TEST_SIZE);
  std::cout << "done" << std::endl;
  
  // loop over training data points and train net
  // slice off first column of each row (example)
  times=(int)time(NULL);   // init time counter
  std::cout << "\n" << "training examples: " << "\t \t" << TRAIN_SIZE << std::endl;
  std::cout << "learning rate: " << "\t \t \t" << learningRate << std::endl;
  std::cout << "momentum: " << "\t \t \t" << momentum << std::endl;
  std::cout << "weight decay: " << "\t \t \t" << decayRate << std::endl;
  std::cout << "training network..." << "\t \t";
  for(int i=0;i<TRAIN_SIZE;++i)
  {
    std::vector<double> data = trainData[i];            // extract data point
    double label = data[0];                             // extract point label
    data.erase(data.begin());
    std::vector<double> nLabel = encode((int)label);    // encode to 1-of-N   
    
    std::vector<double> outputs = DigitNet.runNet(data);
    error = DigitNet.trainNet(data,nLabel,outType);    // train net, return MSE

    // decode output and compare to correct output 
    if( decode(outputs) == (int)label )
        truecnt++;    
  }

  // stop timer and print out useful info
  timed=(int)time(NULL);
  times=timed-times;
  std::cout << "done" << std::endl;
  std::cout << "training time: " << "\t \t \t" << times << " seconds " << std::endl;
  std::cout << "training accuracy: " << "\t \t" << truecnt*100./TRAIN_SIZE << "%" << std::endl;
  
  // test net on test data
  times=(int)time(NULL);   // init time counter
  std::cout << "\n" << "test points: " << "\t \t \t" << TEST_SIZE << std::endl;
  std::cout << "testing network..." << "\t \t";
  truecnt = 0;
  for(int i=0;i<TEST_SIZE;++i)
  {
    
    std::vector<double> data = testData[i];     // extract data point 
    double label = data[0];                     // extract label
    data.erase(data.begin());
   
    std::vector<double> outputs = DigitNet.runNet(data);    // run net

    // decode output and compare to correct output 
    if( decode(outputs) == (int)label )
        truecnt++;    
    
  }

  // stop timer and print out useful info
  timed=(int)time(NULL);
  times=timed-times;
  std::cout << "done" << std::endl;
  std::cout << "testing time: " << "\t \t \t" << times << " seconds " << std::endl;
  std::cout << "test accuracy: " << "\t \t \t" << truecnt*100./TEST_SIZE << "% " << std::endl;
  
  // save weights to reuse net in the future
  DigitNet.saveNet();
  
}