tnn_error tnn_module_bprop_linear(tnn_module *m){
  tnn_error ret;
  gsl_matrix w;
  gsl_matrix dw;

  //Routine check
  if(m->t != TNN_MODULE_TYPE_LINEAR){
    return TNN_ERROR_MODULE_MISTYPE;
  }
  if(m->input->valid != true || m->output->valid != true || m->w.valid != true){
    return TNN_ERROR_STATE_INVALID;
  }

  //Transform the matrix
  TNN_MACRO_ERRORTEST(tnn_numeric_v2m(&m->w.x, &w, m->output->size, m->input->size),ret);
  TNN_MACRO_ERRORTEST(tnn_numeric_v2m(&m->w.dx, &dw, m->output->size, m->input->size), ret);

  //bprop to input
  TNN_MACRO_GSLTEST(gsl_blas_dgemv(CblasTrans, 1.0, &w, &m->output->dx, 0.0, &m->input->dx));

  //bprop to dw
  gsl_matrix_set_zero(&dw);
  TNN_MACRO_GSLTEST(gsl_blas_dger(1.0, &m->output->dx, &m->input->x, &dw));

  return TNN_ERROR_SUCCESS;
}
tnn_error tnn_module_fprop_bias(tnn_module *m){
  //Routine check
  if(m->t != TNN_MODULE_TYPE_BIAS){
    return TNN_ERROR_MODULE_MISTYPE;
  }
  if(m->input->valid != true || m->output->valid != true || m->w.valid != true){
    return TNN_ERROR_STATE_INVALID;
  }

  //fprop to output
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&m->input->x, &m->output->x));
  TNN_MACRO_GSLTEST(gsl_blas_daxpy(1.0, &m->w.x, &m->output->x));

  return TNN_ERROR_SUCCESS;
}
tnn_error tnn_loss_bprop_euclidean(tnn_loss *l){
  //Routine check
  if(l->t != TNN_LOSS_TYPE_EUCLIDEAN){
    return TNN_ERROR_LOSS_MISTYPE;
  }
  if(l->input1->valid != true || l->input2->valid != true || l->output->valid != true){
    return TNN_ERROR_STATE_INVALID;
  }

  //bprop to input1 and input2 dx = dl 2 (x-y); dy = dl 2 (y-x)
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&l->input1->x, &l->input1->dx));
  TNN_MACRO_GSLTEST(gsl_blas_daxpy(-1.0, &l->input2->x, &l->input1->dx));
  gsl_blas_dscal(2.0*gsl_vector_get(&l->output->dx, 0), &l->input1->dx);
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&l->input1->dx, &l->input2->dx));
  gsl_blas_dscal(-1.0, &l->input2->dx);

  return TNN_ERROR_SUCCESS;
}
//Learn one sample using naive stochastic gradient descent
tnn_error tnn_trainer_class_learn_nsgd(tnn_trainer_class *t, gsl_vector *input, size_t label){
  tnn_error ret;
  tnn_state *sin;
  tnn_param *p;
  gsl_vector_view lb;

  //Routine check
  if(t->t != TNN_TRAINER_CLASS_TYPE_NSGD){
    return TNN_ERROR_TRAINER_CLASS_MISTYPE;
  }

  //Check the input and label
  TNN_MACRO_ERRORTEST(tnn_machine_get_sin(&t->m, &sin),ret);
  if(label >= t->lset->size1 || input->size != sin->size){
    return TNN_ERROR_STATE_INCOMP;
  }
  lb = gsl_matrix_row(t->lset, label);

  //Set the loss output dx to be 1
  gsl_vector_set(&t->l.output->dx, 0, 1.0);

  //Copy the data into the input/label and do forward and backward propagation
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(input, &sin->x));
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&lb.vector, &t->label->x));
  TNN_MACRO_ERRORTEST(tnn_machine_fprop(&t->m), ret);
  TNN_MACRO_ERRORTEST(tnn_loss_fprop(&t->l), ret);
  TNN_MACRO_ERRORTEST(tnn_loss_bprop(&t->l), ret);
  TNN_MACRO_ERRORTEST(tnn_machine_bprop(&t->m), ret);

  //Compute the accumulated regularization paramter
  TNN_MACRO_ERRORTEST(tnn_machine_get_param(&t->m, &p), ret);
  TNN_MACRO_ERRORTEST(tnn_reg_addd(&t->r, p->x, p->dx, t->lambda), ret);

  //Compute the parameter update
  TNN_MACRO_GSLTEST(gsl_blas_daxpy(-((tnn_trainer_class_nsgd*)t->c)->eta, p->dx, p->x));

  //Set the titer parameter
  ((tnn_trainer_class_nsgd*)t->c)->titer = 1;

  return TNN_ERROR_SUCCESS;
}
tnn_error tnn_module_fprop_sum(tnn_module *m){
  tnn_state **t;

   //Routine check
  if(m->t != TNN_MODULE_TYPE_SUM){
    return TNN_ERROR_MODULE_MISTYPE;
  }
  if(m->input->valid != true || m->output->valid != true){
    return TNN_ERROR_STATE_INVALID;
  }

  //fprop to output
  TNN_MACRO_GSLTEST(gsl_blas_dscal(0.0, &m->output->x));
  for(t = (tnn_state **)utarray_front(((tnn_module_sum*)m->c)->sarray);
      t != NULL;
      t = (tnn_state **)utarray_next(((tnn_module_sum*)m->c)->sarray, t)){
    TNN_MACRO_GSLTEST(gsl_blas_daxpy(1.0, &(*t)->x, &m->output->x));
  }

  return TNN_ERROR_SUCCESS;
}
tnn_error tnn_loss_fprop_euclidean(tnn_loss *l){
  gsl_vector *diff;
  double loss;

  //Routine check                                                                                                                                   
  if(l->t != TNN_LOSS_TYPE_EUCLIDEAN){
    return TNN_ERROR_LOSS_MISTYPE;
  }
  if(l->input1->valid != true || l->input2->valid != true || l->output->valid != true){
    return TNN_ERROR_STATE_INVALID;
  }

  //Do the forward propagation
  if((diff = gsl_vector_alloc(l->input1->size)) == NULL){
    return TNN_ERROR_GSL;
  }
  TNN_MACRO_GSLTEST(gsl_blas_dcopy(&l->input1->x, diff));
  TNN_MACRO_GSLTEST(gsl_blas_daxpy(-1.0, &l->input2->x, diff));
  loss = gsl_blas_dnrm2(diff);
  gsl_vector_set(&l->output->x, 0, loss*loss);
  gsl_vector_free(diff);

  return TNN_ERROR_SUCCESS;
}
tnn_error tnn_module_fprop_linear(tnn_module *m){
  tnn_error ret;
  gsl_matrix w;

  //Routine check
  if(m->t != TNN_MODULE_TYPE_LINEAR){
    return TNN_ERROR_MODULE_MISTYPE;
  }
  if(m->input->valid != true || m->output->valid != true || m->w.valid != true){
    return TNN_ERROR_STATE_INVALID;
  }

  //Transform the matrix
  TNN_MACRO_ERRORTEST(tnn_numeric_v2m(&m->w.x, &w, m->output->size, m->input->size),ret);

  //Compute the result using BLAS
  TNN_MACRO_GSLTEST(gsl_blas_dgemv(CblasNoTrans, 1.0, &w, &m->input->x, 0.0, &m->output->x));

  return TNN_ERROR_SUCCESS;
}
//Train all the samples using naive stochastic gradient descent
tnn_error tnn_trainer_class_train_nsgd(tnn_trainer_class *t, gsl_matrix *inputs, size_t *labels){
  tnn_error ret;
  tnn_state *sin;
  tnn_param *p;
  gsl_vector *rd;
  gsl_vector *pw;
  gsl_vector_view in;
  gsl_vector_view lb;
  double eps;
  size_t i,j;

  //Routine check
  if(t->t != TNN_TRAINER_CLASS_TYPE_NSGD){
    return TNN_ERROR_TRAINER_CLASS_MISTYPE;
  }

  //Check the input
  TNN_MACRO_ERRORTEST(tnn_machine_get_sin(&t->m, &sin),ret);
  if(inputs->size2 != sin->size){
    return TNN_ERROR_STATE_INCOMP;
  }

  //Set the loss output dx to be 1
  gsl_vector_set(&t->l.output->dx, 0, 1.0);

  //Get the parameter and allocate rd and pw
  TNN_MACRO_ERRORTEST(tnn_machine_get_param(&t->m, &p), ret);
  rd = gsl_vector_alloc(p->size);
  pw = gsl_vector_alloc(p->size);
  if(rd == NULL || pw == NULL){
    return TNN_ERROR_GSL;
  }

  //Into the main loop
  for(eps = DBL_MAX, ((tnn_trainer_class_nsgd*)t->c)->titer = 0;
      eps > ((tnn_trainer_class_nsgd*)t->c)->epsilon && ((tnn_trainer_class_nsgd*)t->c)->titer < ((tnn_trainer_class_nsgd*)t->c)->niter;
      ((tnn_trainer_class_nsgd*)t->c)->titer = ((tnn_trainer_class_nsgd*)t->c)->titer + ((tnn_trainer_class_nsgd*)t->c)->eiter){

    //Copy the previous pw
    TNN_MACRO_GSLTEST(gsl_blas_dcopy(p->x, pw));

    for(i = 0; i < ((tnn_trainer_class_nsgd*)t->c)->eiter; i = i + 1){

      j = (((tnn_trainer_class_nsgd*)t->c)->titer + i)%inputs->size1;

      //Check the label
      if(labels[j] >= t->lset->size1){
	return TNN_ERROR_STATE_INCOMP;
      }

      //Get the inputs and label vector
      lb = gsl_matrix_row(t->lset, labels[j]);
      in = gsl_matrix_row(inputs, j);

      //Copy the data into the input/label and do forward and backward propagation
      TNN_MACRO_GSLTEST(gsl_blas_dcopy(&in.vector, &sin->x));
      TNN_MACRO_GSLTEST(gsl_blas_dcopy(&lb.vector, &t->label->x));
      TNN_MACRO_ERRORTEST(tnn_machine_fprop(&t->m), ret);
      TNN_MACRO_ERRORTEST(tnn_loss_fprop(&t->l), ret);
      TNN_MACRO_ERRORTEST(tnn_loss_bprop(&t->l), ret);
      TNN_MACRO_ERRORTEST(tnn_machine_bprop(&t->m), ret);

      //Compute the accumulated regularization paramter
      TNN_MACRO_ERRORTEST(tnn_reg_d(&t->r, p->x, rd), ret);
      TNN_MACRO_GSLTEST(gsl_blas_daxpy(t->lambda, rd, p->dx));

      //Compute the parameter update
      TNN_MACRO_GSLTEST(gsl_blas_daxpy(-((tnn_trainer_class_nsgd*)t->c)->eta, p->dx, p->x));
    }

    //Compute the 2 square norm of difference of p as eps
    TNN_MACRO_GSLTEST(gsl_blas_daxpy(-1.0, p->x, pw));
    eps = gsl_blas_dnrm2(pw);
  }
  
  return TNN_ERROR_SUCCESS;
}