Пример #1
0
List objectivex(const arma::mat& transition, const arma::cube& emission,
  const arma::vec& init, const arma::ucube& obs, const arma::umat& ANZ,
  const arma::ucube& BNZ, const arma::uvec& INZ, const arma::uvec& nSymbols,
  const arma::mat& coef, const arma::mat& X, arma::uvec& numberOfStates,
  unsigned int threads) {

  unsigned int q = coef.n_rows;
  arma::vec grad(
      arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ) + (numberOfStates.n_elem- 1) * q,
      arma::fill::zeros);
  arma::mat weights = exp(X * coef).t();
  if (!weights.is_finite()) {
    grad.fill(-arma::datum::inf);
    return List::create(Named("objective") = arma::datum::inf, Named("gradient") = wrap(grad));
  }

  weights.each_row() /= sum(weights, 0);

  arma::mat initk(emission.n_rows, obs.n_slices);

  for (unsigned int k = 0; k < obs.n_slices; k++) {
    initk.col(k) = init % reparma(weights.col(k), numberOfStates);
  }
  
  arma::uvec cumsumstate = arma::cumsum(numberOfStates);
  
  unsigned int error = 0;
  double ll = 0;
#pragma omp parallel for if(obs.n_slices >= threads) schedule(static) reduction(+:ll) num_threads(threads)       \
  default(none) shared(q, grad, nSymbols, ANZ, BNZ, INZ,                                                         \
    numberOfStates, cumsumstate, obs, init, initk, X, weights, transition, emission, error)
    for (unsigned int k = 0; k < obs.n_slices; k++) {
      if (error == 0) {
        arma::mat alpha(emission.n_rows, obs.n_cols); //m,n
        arma::vec scales(obs.n_cols); //n
        arma::sp_mat sp_trans(transition);
        uvForward(sp_trans.t(), emission, initk.col(k), obs.slice(k), alpha, scales);
        arma::mat beta(emission.n_rows, obs.n_cols); //m,n
        uvBackward(sp_trans, emission, obs.slice(k), beta, scales);

        int countgrad = 0;
        arma::vec grad_k(grad.n_elem, arma::fill::zeros);
        // transitionMatrix
        if (arma::accu(ANZ) > 0) {

          for (unsigned int jj = 0; jj < numberOfStates.n_elem; jj++) {
            arma::vec gradArow(numberOfStates(jj));
            arma::mat gradA(numberOfStates(jj), numberOfStates(jj));
            int ind_jj = cumsumstate(jj) - numberOfStates(jj);

            for (unsigned int i = 0; i < numberOfStates(jj); i++) {
              arma::uvec ind = arma::find(ANZ.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1));

              if (ind.n_elem > 0) {
                gradArow.zeros();
                gradA.eye();
                gradA.each_row() -= transition.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1);
                gradA.each_col() %= transition.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1).t();


                for (unsigned int j = 0; j < numberOfStates(jj); j++) {
                  for (unsigned int t = 0; t < (obs.n_cols - 1); t++) {
                    double tmp = alpha(ind_jj + i, t);
                    for (unsigned int r = 0; r < obs.n_rows; r++) {
                      tmp *= emission(ind_jj + j, obs(r, t + 1, k), r);
                    }
                    gradArow(j) += tmp * beta(ind_jj + j, t + 1);
                  }

                }

                gradArow = gradA * gradArow;
                grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradArow.rows(ind);
                countgrad += ind.n_elem;
              }
            }
          }
        }
        if (arma::accu(BNZ) > 0) {
          // emissionMatrix
          for (unsigned int r = 0; r < obs.n_rows; r++) {
            arma::vec gradBrow(nSymbols(r));
            arma::mat gradB(nSymbols(r), nSymbols(r));
            for (unsigned int i = 0; i < emission.n_rows; i++) {
              arma::uvec ind = arma::find(BNZ.slice(r).row(i));
              if (ind.n_elem > 0) {
                gradBrow.zeros();
                gradB.eye();
                gradB.each_row() -= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1);
                gradB.each_col() %= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1).t();
                for (unsigned int j = 0; j < nSymbols(r); j++) {
                  if (obs(r, 0, k) == j) {
                    double tmp = initk(i, k);
                    for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) {
                      if (r2 != r) {
                        tmp *= emission(i, obs(r2, 0, k), r2);
                      }
                    }
                    gradBrow(j) += tmp * beta(i, 0);
                  }
                  for (unsigned int t = 0; t < (obs.n_cols - 1); t++) {
                    if (obs(r, t + 1, k) == j) {
                      double tmp = beta(i, t + 1);
                      for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) {
                        if (r2 != r) {
                          tmp *= emission(i, obs(r2, t + 1, k), r2);
                        }
                      }
                      gradBrow(j) += arma::dot(alpha.col(t), transition.col(i)) * tmp;
                    }
                  }

                }
                gradBrow = gradB * gradBrow;
                grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradBrow.rows(ind);
                countgrad += ind.n_elem;

              }
            }
          }
        }
        if (arma::accu(INZ) > 0) {
          for (unsigned int i = 0; i < numberOfStates.n_elem; i++) {
            int ind_i = cumsumstate(i) - numberOfStates(i);
            arma::uvec ind = arma::find(
              INZ.subvec(ind_i, cumsumstate(i) - 1));
            if (ind.n_elem > 0) {
              arma::vec gradIrow(numberOfStates(i), arma::fill::zeros);
              for (unsigned int j = 0; j < numberOfStates(i); j++) {
                double tmp = weights(i, k);
                for (unsigned int r = 0; r < obs.n_rows; r++) {
                  tmp *= emission(ind_i + j, obs(r, 0, k), r);
                }
                gradIrow(j) += tmp * beta(ind_i + j, 0);

              }
              arma::mat gradI(numberOfStates(i), numberOfStates(i), arma::fill::zeros);
              gradI.eye();
              gradI.each_row() -= init.subvec(ind_i, cumsumstate(i) - 1).t();
              gradI.each_col() %= init.subvec(ind_i, cumsumstate(i) - 1);
              gradIrow = gradI * gradIrow;
              grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradIrow.rows(ind);
              countgrad += ind.n_elem;
            }
          }
        }
        for (unsigned int jj = 1; jj < numberOfStates.n_elem; jj++) {
          unsigned int ind_jj = (cumsumstate(jj) - numberOfStates(jj));

          for (unsigned int j = 0; j < emission.n_rows; j++) {
            double tmp = 1.0;
            for (unsigned int r = 0; r < obs.n_rows; r++) {
              tmp *= emission(j, obs(r, 0, k), r);
            }
            if ((j >= ind_jj) & (j < cumsumstate(jj))) {
              grad_k.subvec(countgrad + q * (jj - 1), countgrad + q * jj - 1) += tmp
              * beta(j, 0) * initk(j, k) * X.row(k).t() * (1.0 - weights(jj, k));
            } else {
              grad_k.subvec(countgrad + q * (jj - 1), countgrad + q * jj - 1) -= tmp
              * beta(j, 0) * initk(j, k) * X.row(k).t() * weights(jj, k);
            }
          }

        }
        if (!scales.is_finite() || !beta.is_finite()) {
#pragma omp atomic
          error++;
        } else {
          ll -= arma::sum(log(scales));
#pragma omp critical
          grad += grad_k;
        }
      }
    }
    if(error > 0){
      ll = -arma::datum::inf;
      grad.fill(-arma::datum::inf);
    }
    return List::create(Named("objective") = -ll, Named("gradient") = wrap(-grad));
}
Пример #2
0
List objective(const arma::mat& transition, NumericVector emissionArray,
  const arma::vec& init, IntegerVector obsArray, const arma::imat& ANZ,
  IntegerVector emissNZ, const arma::ivec& INZ, const arma::ivec& nSymbols, int threads) {

  IntegerVector eDims = emissionArray.attr("dim"); //m,p,r
  IntegerVector oDims = obsArray.attr("dim"); //k,n,r

  arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false, true);
  arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true);
  arma::icube BNZ(emissNZ.begin(), emission.n_rows, emission.n_cols - 1, emission.n_slices, false, true);

  arma::vec grad(arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ), arma::fill::zeros);

  // arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k
  // arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k
  // arma::mat scales(obs.n_cols, obs.n_slices); //m,n,k
  //
  // internalForward(transition, emission, init, obs, alpha, scales, threads);
  // if (!scales.is_finite()) {
  //   grad.fill(-arma::math::inf());
  //   return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad));
  // }
  //
  // internalBackward(transition, emission, obs, beta, scales, threads);
  // if (!beta.is_finite()) {
  //   grad.fill(-arma::math::inf());
  //   return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad));
  // }

  //use this instead of local vectors with grad += grad_k;, uses more memory but gives bit-identical results
  //arma::mat gradmat(arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ), obs.n_slices);

  unsigned int error = 0;
  double ll = 0;
#pragma omp parallel for if(obs.n_slices >= threads) schedule(static) reduction(+:ll) num_threads(threads) \
  default(none) shared(grad, nSymbols, ANZ, BNZ, INZ, obs, init, transition, emission, error)
    for (int k = 0; k < obs.n_slices; k++) {
      if (error == 0) {
        arma::mat alpha(emission.n_rows, obs.n_cols); //m,n
        arma::vec scales(obs.n_cols); //n
        arma::sp_mat sp_trans(transition);
        uvForward(sp_trans.t(), emission, init, obs.slice(k), alpha, scales);
        arma::mat beta(emission.n_rows, obs.n_cols); //m,n
        uvBackward(sp_trans, emission, obs.slice(k), beta, scales);

        int countgrad = 0;
        arma::vec grad_k(grad.n_elem, arma::fill::zeros);
        // transitionMatrix
        arma::vec gradArow(emission.n_rows);
        arma::mat gradA(emission.n_rows, emission.n_rows);

        for (unsigned int i = 0; i < emission.n_rows; i++) {
          arma::uvec ind = arma::find(ANZ.row(i));

          if (ind.n_elem > 0) {
            gradArow.zeros();
            gradA.eye();
            gradA.each_row() -= transition.row(i);
            gradA.each_col() %= transition.row(i).t();

            for (unsigned int t = 0; t < (obs.n_cols - 1); t++) {
              for (unsigned int j = 0; j < emission.n_rows; j++) {
                double tmp = 1.0;
                for (unsigned int r = 0; r < obs.n_rows; r++) {
                  tmp *= emission(j, obs(r, t + 1, k), r);
                }
                gradArow(j) += alpha(i, t) * tmp * beta(j, t + 1) / scales(t + 1);
              }

            }

            gradArow = gradA * gradArow;
            grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradArow.rows(ind);
            countgrad += ind.n_elem;
          }
        }
        // emissionMatrix
        for (unsigned int r = 0; r < obs.n_rows; r++) {
          arma::vec gradBrow(nSymbols(r));
          arma::mat gradB(nSymbols(r), nSymbols(r));
          for (unsigned int i = 0; i < emission.n_rows; i++) {
            arma::uvec ind = arma::find(BNZ.slice(r).row(i));
            if (ind.n_elem > 0) {
              gradBrow.zeros();
              gradB.eye();
              gradB.each_row() -= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1);
              gradB.each_col() %= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1).t();
              for (int j = 0; j < nSymbols(r); j++) {
                if (obs(r, 0, k) == j) {
                  double tmp = 1.0;
                  for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) {
                    if (r2 != r) {
                      tmp *= emission(i, obs(r2, 0, k), r2);
                    }
                  }
                  gradBrow(j) += init(i) * tmp * beta(i, 0) / scales(0);
                }
                for (unsigned int t = 0; t < (obs.n_cols - 1); t++) {
                  if (obs(r, t + 1, k) == j) {
                    double tmp = 1.0;
                    for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) {
                      if (r2 != r) {
                        tmp *= emission(i, obs(r2, t + 1, k), r2);
                      }
                    }
                    gradBrow(j) += arma::dot(alpha.col(t), transition.col(i)) * tmp
                      * beta(i, t + 1) / scales(t + 1);
                  }
                }

              }
              gradBrow = gradB * gradBrow;
              grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradBrow.rows(ind);
              countgrad += ind.n_elem;

            }
          }
        }
        // InitProbs
        arma::uvec ind = arma::find(INZ);
        if (ind.n_elem > 0) {
          arma::vec gradIrow(emission.n_rows);
          arma::mat gradI(emission.n_rows, emission.n_rows);

          gradIrow.zeros();
          gradI.zeros();
          gradI.eye();
          gradI.each_row() -= init.t();
          gradI.each_col() %= init;
          for (unsigned int j = 0; j < emission.n_rows; j++) {
            double tmp = 1.0;
            for (unsigned int r = 0; r < obs.n_rows; r++) {
              tmp *= emission(j, obs(r, 0, k), r);
            }
            gradIrow(j) += tmp * beta(j, 0) / scales(0);
          }

          gradIrow = gradI * gradIrow;
          grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradIrow.rows(ind);
          countgrad += ind.n_elem;
        }
        if (!scales.is_finite() || !beta.is_finite()) {
#pragma omp atomic
          error++;
        } else {
          ll += arma::sum(log(scales));
#pragma omp critical
          grad += grad_k;
         // gradmat.col(k) = grad_k;
        }
//           for (unsigned int ii = 0; ii < grad_k.n_elem; ii++) {
// #pragma omp atomic
//             grad(ii) += grad_k(ii);
//         }

      }
    }
    if(error > 0){
      ll = -arma::math::inf();
      grad.fill(-arma::math::inf());
    }
    // } else {
    //   grad = sum(gradmat, 1);
    // }
    return List::create(Named("objective") = -ll, Named("gradient") = wrap(-grad));
}
Пример #3
0
// [[Rcpp::export]]
Rcpp::List objective(const arma::mat& transition, const arma::cube& emission,
  const arma::vec& init, arma::ucube& obs, const arma::umat& ANZ,
  const arma::ucube& BNZ, const arma::uvec& INZ, const arma::uvec& nSymbols, unsigned int threads) {

  arma::vec grad(arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ), arma::fill::zeros);

  unsigned int error = 0;
  double ll = 0;
#pragma omp parallel for if(obs.n_slices >= threads) schedule(static) reduction(+:ll) num_threads(threads) \
  default(shared) //shared(grad, nSymbols, ANZ, BNZ, INZ, obs, init, transition, emission, error, arma::fill::zeros)
    for (unsigned int k = 0; k < obs.n_slices; k++) {
      if (error == 0) {
        arma::mat alpha(emission.n_rows, obs.n_cols); //m,n
        arma::vec scales(obs.n_cols); //n
        arma::sp_mat sp_trans(transition);
        uvForward(sp_trans.t(), emission, init, obs.slice(k), alpha, scales);
        arma::mat beta(emission.n_rows, obs.n_cols); //m,n
        uvBackward(sp_trans, emission, obs.slice(k), beta, scales);

        int countgrad = 0;
        arma::vec grad_k(grad.n_elem, arma::fill::zeros);
        // transitionMatrix
        arma::vec gradArow(emission.n_rows);
        arma::mat gradA(emission.n_rows, emission.n_rows);

        for (unsigned int i = 0; i < emission.n_rows; i++) {
          arma::uvec ind = arma::find(ANZ.row(i));

          if (ind.n_elem > 0) {
            gradArow.zeros();
            gradA.eye();
            gradA.each_row() -= transition.row(i);
            gradA.each_col() %= transition.row(i).t();

            for (unsigned int t = 0; t < (obs.n_cols - 1); t++) {
              for (unsigned int j = 0; j < emission.n_rows; j++) {
                double tmp = 1.0;
                for (unsigned int r = 0; r < obs.n_rows; r++) {
                  tmp *= emission(j, obs(r, t + 1, k), r);
                }
                gradArow(j) += alpha(i, t) * tmp * beta(j, t + 1);
              }

            }

            gradArow = gradA * gradArow;
            grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradArow.rows(ind);
            countgrad += ind.n_elem;
          }
        }
        // emissionMatrix
        for (unsigned int r = 0; r < obs.n_rows; r++) {
          arma::vec gradBrow(nSymbols(r));
          arma::mat gradB(nSymbols(r), nSymbols(r));
          for (unsigned int i = 0; i < emission.n_rows; i++) {
            arma::uvec ind = arma::find(BNZ.slice(r).row(i));
            if (ind.n_elem > 0) {
              gradBrow.zeros();
              gradB.eye();
              gradB.each_row() -= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1);
              gradB.each_col() %= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1).t();
              for (unsigned int j = 0; j < nSymbols(r); j++) {
                if (obs(r, 0, k) == j) {
                  double tmp = 1.0;
                  for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) {
                    if (r2 != r) {
                      tmp *= emission(i, obs(r2, 0, k), r2);
                    }
                  }
                  gradBrow(j) += init(i) * tmp * beta(i, 0);
                }
                for (unsigned int t = 0; t < (obs.n_cols - 1); t++) {
                  if (obs(r, t + 1, k) == j) {
                    double tmp = 1.0;
                    for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) {
                      if (r2 != r) {
                        tmp *= emission(i, obs(r2, t + 1, k), r2);
                      }
                    }
                    gradBrow(j) += arma::dot(alpha.col(t), transition.col(i)) * tmp
                      * beta(i, t + 1);
                  }
                }

              }
              gradBrow = gradB * gradBrow;
              grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradBrow.rows(ind);
              countgrad += ind.n_elem;

            }
          }
        }
        // InitProbs
        arma::uvec ind = arma::find(INZ);
        if (ind.n_elem > 0) {
          arma::vec gradIrow(emission.n_rows);
          arma::mat gradI(emission.n_rows, emission.n_rows);

          gradIrow.zeros();
          gradI.zeros();
          gradI.eye();
          gradI.each_row() -= init.t();
          gradI.each_col() %= init;
          for (unsigned int j = 0; j < emission.n_rows; j++) {
            double tmp = 1.0;
            for (unsigned int r = 0; r < obs.n_rows; r++) {
              tmp *= emission(j, obs(r, 0, k), r);
            }
            gradIrow(j) += tmp * beta(j, 0);
          }

          gradIrow = gradI * gradIrow;
          grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradIrow.rows(ind);
          countgrad += ind.n_elem;
        }
        if (!scales.is_finite() || !beta.is_finite()) {
#pragma omp atomic
          error++;
        } else {
          ll -= arma::sum(log(scales));
#pragma omp critical
          grad += grad_k;
         // gradmat.col(k) = grad_k;
        }
//           for (unsigned int ii = 0; ii < grad_k.n_elem; ii++) {
// #pragma omp atomic
//             grad(ii) += grad_k(ii);
//         }

      }
    }
    if(error > 0){
      ll = -arma::datum::inf;
      grad.fill(-arma::datum::inf);
    }
    // } else {
    //   grad = sum(gradmat, 1);
    // }
    return Rcpp::List::create(Rcpp::Named("objective") = -ll, Rcpp::Named("gradient") = Rcpp::wrap(-grad));
}
Пример #4
0
int main(int argc, char *argv[]){
  
	Params params;
  
	std::map<std::string, std::string> args;
	readArgs(argc, argv, args);
	if(args.find("algo")!=args.end()){
		params.algo = args["algo"];
	}else{
		params.algo = "qdMCNat";
	}

	if(args.find("inst_file")!=args.end())
		setParamsFromFile(args["inst_file"], args, params);
	else   
		setParams(params.algo, args, params);
  
	createLogDir(params.dir_path);
  
	gen.seed(params.seed);

	// Load the dataset
	MyMatrix X_train, X_valid;
	VectorXd Y_train, Y_valid;
	loadMnist(params.ratio_train, X_train, X_valid, Y_train, Y_valid);
	//loadCIFAR10(params.ratio_train, X_train, X_valid, Y_train, Y_valid);
	//loadLightCIFAR10(params.ratio_train, X_train, X_valid, Y_train, Y_valid);
  
	// ConvNet parameters
	std::vector<ConvLayerParams> conv_params;
	ConvLayerParams conv_params1;
	conv_params1.Hf = 5;
	conv_params1.stride = 1;
	conv_params1.n_filter = 20;
	conv_params1.padding = 0;
	conv_params.push_back(conv_params1);
  
	ConvLayerParams conv_params2;
	conv_params2.Hf = 5;
	conv_params2.stride = 1;
	conv_params2.n_filter = 50;
	conv_params2.padding = 0;
	conv_params.push_back(conv_params2);

	std::vector<PoolLayerParams> pool_params;
	PoolLayerParams pool_params1;
	pool_params1.Hf = 2;
	pool_params1.stride = 2;
	pool_params.push_back(pool_params1);

	PoolLayerParams pool_params2;
	pool_params2.Hf = 2;
	pool_params2.stride = 2;
	pool_params.push_back(pool_params2);
  
	const unsigned n_conv_layer = conv_params.size();
  
	for(unsigned l = 0; l < conv_params.size(); l++){

		if(l==0){
			conv_params[l].filter_size = conv_params[l].Hf * conv_params[l].Hf * params.img_depth;
			conv_params[l].N = (params.img_width - conv_params[l].Hf + 2*conv_params[l].padding)/conv_params[l].stride + 1;
		}
		else{
			conv_params[l].filter_size = conv_params[l].Hf * conv_params[l].Hf * conv_params[l-1].n_filter;
			conv_params[l].N = (pool_params[l-1].N - conv_params[l].Hf + 2*conv_params[l].padding)/conv_params[l].stride + 1;
		}
		pool_params[l].N = (conv_params[l].N - pool_params[l].Hf)/pool_params[l].stride + 1;
	}
  
	// Neural Network parameters
	const unsigned n_training = X_train.rows();
	const unsigned n_valid = X_valid.rows();
	const unsigned n_feature = X_train.cols();
	const unsigned n_label = Y_train.maxCoeff() + 1;
  
	params.nn_arch.insert(params.nn_arch.begin(),conv_params[n_conv_layer-1].n_filter * pool_params[n_conv_layer-1].N * pool_params[n_conv_layer-1].N);
	params.nn_arch.push_back(n_label);
	const unsigned n_layers = params.nn_arch.size();
  
	// Optimization parameter
	const int n_train_batch = ceil(n_training/(float)params.train_minibatch_size);
	const int n_valid_batch = ceil(n_valid/(float)params.valid_minibatch_size);
	double prev_loss = std::numeric_limits<double>::max();
	double eta = params.eta;

	// Create the convolutional layer
	std::vector<MyMatrix> conv_W(n_conv_layer);
	std::vector<MyMatrix> conv_W_T(n_conv_layer);
	std::vector<MyVector> conv_B(n_conv_layer);
  
	// Create the neural network
	MyMatrix W_out(params.nn_arch[n_layers-2],n_label);
	std::vector<MySpMatrix> W(n_layers-2);
	std::vector<MySpMatrix> Wt(n_layers-2);
	std::vector<MyVector> B(n_layers-1);

	double init_sigma = 0.;
	ActivationFunction act_func;
	ActivationFunction eval_act_func;
	if(params.act_func_name=="sigmoid"){
		init_sigma = 4.0;
		act_func = std::bind(logistic,true,_1,_2,_3);
		eval_act_func = std::bind(logistic,false,_1,_2,_3);
	}else if(params.act_func_name=="tanh"){
		init_sigma = 1.0;
		act_func = std::bind(my_tanh,true,_1,_2,_3);
		eval_act_func = std::bind(my_tanh,false,_1,_2,_3);
	}else if(params.act_func_name=="relu"){
		init_sigma = 1.0; // TODO: Find the good value
		act_func = std::bind(relu,true,_1,_2,_3);
		eval_act_func = std::bind(relu,false,_1,_2,_3);
	}else{
		std::cout << "Not implemented yet!" << std::endl;
		assert(false);
	}

	std::cout << "Initializing the network... ";
	params.n_params = initNetwork(params.nn_arch, params.act_func_name, params.sparsity, conv_params, pool_params, W_out, W, Wt, B, conv_W, conv_W_T, conv_B); // TODO: Init the conv bias

	// Deep copy of parameters for the adaptive rule
	std::vector<MyMatrix> mu_dW(n_layers-1);
	std::vector<MyVector> mu_dB(n_layers-1);

	MyMatrix pW_out = W_out;
	std::vector<MySpMatrix> pW = W;
	std::vector<MySpMatrix> pWt = Wt;
	std::vector<MyVector> pB = B;

	MyMatrix ppMii_out, ppM0i_out;
	MyVector ppM00_out;
  
	std::vector<MySpMatrix> ppMii,ppM0i;
	std::vector<MyVector> ppM00;

	MyMatrix pMii_out,pM0i_out;
	MyVector pM00_out;
  
	std::vector<MySpMatrix> pMii,pM0i;
	std::vector<MyVector> pM00;

	std::vector<MyMatrix> conv_ppMii, conv_ppM0i;
	std::vector<MyVector> conv_ppM00;

	std::vector<MyMatrix> conv_pMii, conv_pM0i;
	std::vector<MyVector> conv_pM00;
  
	// Convert the labels to one-hot vector
	MyMatrix one_hot = MyMatrix::Zero(n_training, n_label);
	labels2oneHot(Y_train,one_hot);
  
	// Configure the logger 
	std::ostream* logger;
	if(args.find("verbose")!=args.end()){
		getOutput("",logger);
	}else{
		getOutput(params.file_path,logger);
	}

	double cumul_time = 0.;
  
	printDesc(params, logger);
	printConvDesc(params, conv_params, pool_params, logger);
	std::cout << "Starting the learning phase... " << std::endl;
	*logger << "Epoch Time(s) train_loss train_accuracy valid_loss valid_accuracy eta" << std::endl;
  
	for(unsigned i = 0; i < params.n_epoch; i++){
		for(unsigned j = 0; j < n_train_batch; j++){
      
			// Mini-batch creation
			unsigned curr_batch_size = 0;
			MyMatrix X_batch, one_hot_batch;
			getMiniBatch(j, params.train_minibatch_size, X_train, one_hot, params, conv_params[0], curr_batch_size, X_batch, one_hot_batch);
      
			double prev_time = gettime();

			// Forward propagation for conv layer
			std::vector<std::vector<unsigned>> poolIdxX1(n_conv_layer);
			std::vector<std::vector<unsigned>> poolIdxY1(n_conv_layer);
      
			MyMatrix z0;
			std::vector<MyMatrix> conv_A(conv_W.size());
			std::vector<MyMatrix> conv_Ap(conv_W.size());
			convFprop(curr_batch_size, conv_params, pool_params, act_func, conv_W, conv_B, X_batch, conv_A, conv_Ap, z0, poolIdxX1, poolIdxY1);
            
			// Forward propagation
			std::vector<MyMatrix> Z(n_layers-1);
			std::vector<MyMatrix> A(n_layers-2);
			std::vector<MyMatrix> Ap(n_layers-2);
			fprop(params.dropout_flag, act_func, W, W_out, B, z0, Z, A, Ap);
      
			// Compute the output and the error
			MyMatrix out;
			softmax(Z[n_layers-2], out);
      
			std::vector<MyMatrix> gradB(n_layers-1);
			gradB[n_layers-2] = out - one_hot_batch;

			// Backpropagation
			bprop(Wt, W_out, Ap, gradB);

			// Backpropagation for conv layer
			std::vector<MyMatrix> conv_gradB(conv_W.size());
			MyMatrix layer_gradB = (gradB[0] * W[0].transpose());
			MyMatrix pool_gradB;
			layer2pool(curr_batch_size, pool_params[conv_W.size()-1].N, conv_params[conv_W.size()-1].n_filter, layer_gradB, pool_gradB);
      
			convBprop(curr_batch_size, conv_params, pool_params, conv_W_T, conv_Ap, pool_gradB, conv_gradB, poolIdxX1, poolIdxY1);
      
			if(params.algo == "bprop"){
				update(eta, gradB, A, z0, params.regularizer, params.lambda, W_out, W, Wt, B);
				convUpdate(curr_batch_size, eta, conv_params, conv_gradB, conv_A, X_batch, "", 0., conv_W, conv_W_T, conv_B);
	
			}else{

				// Compute the metric
				std::vector<MyMatrix> metric_gradB(n_layers-1);
				std::vector<MyMatrix> metric_conv_gradB(conv_params.size());

				if(params.algo=="qdMCNat"){

					// Monte-Carlo Approximation of the metric
					std::vector<MyMatrix> mc_gradB(n_layers-1);
					computeMcError(out, mc_gradB[n_layers-2]);

					// Backpropagation
					bprop(Wt, W_out, Ap, mc_gradB);

					for(unsigned k = 0; k < gradB.size(); k++){
						metric_gradB[k] = mc_gradB[k].array().square();
					}

					// Backpropagation for conv layer
					std::vector<MyMatrix> mc_conv_gradB(conv_W.size());
					MyMatrix mc_layer_gradB = (mc_gradB[0] * W[0].transpose());
					MyMatrix mc_pool_gradB;
					layer2pool(curr_batch_size, pool_params[conv_W.size()-1].N, conv_params[conv_W.size()-1].n_filter, mc_layer_gradB, mc_pool_gradB);
	  
					convBprop(curr_batch_size, conv_params, pool_params, conv_W_T, conv_Ap, mc_pool_gradB, mc_conv_gradB, poolIdxX1, poolIdxY1);
	  
					for(unsigned k = 0; k < conv_params.size(); k++){
						metric_conv_gradB[k] = mc_conv_gradB[k].array().square();
					}
				}	
				else if(params.algo=="qdop"){

					for(unsigned k = 0; k < conv_params.size(); k++){
						metric_conv_gradB[k] = conv_gradB[k].array().square();
					}
					for(unsigned k = 0; k < gradB.size(); k++){
						metric_gradB[k] = gradB[k].array().square();
					}
				}
				else if(params.algo=="qdNat"){
	  
					for(unsigned k = 0; k < conv_params.size(); k++){
						metric_conv_gradB[k] = conv_gradB[k].array().square();
					}

					for(unsigned k = 0; k < metric_gradB.size(); k++){
						metric_gradB[k] = MyMatrix::Zero(gradB[k].rows(),gradB[k].cols());
					}

					for(unsigned l = 0; l < n_label; l++){
						MyMatrix fisher_ohbatch = MyMatrix::Zero(curr_batch_size, n_label);
						fisher_ohbatch.col(l).setOnes();

						std::vector<MyMatrix> fgradB(n_layers-1);
						fgradB[n_layers-2] = out - fisher_ohbatch;
						bprop(Wt, W_out, Ap, fgradB);

						// Backpropagation for conv layer
						std::vector<MyMatrix> fisher_conv_gradB(conv_W.size());
						MyMatrix fisher_layer_gradB = (fgradB[0] * W[0].transpose());
						MyMatrix fisher_pool_gradB;
						layer2pool(curr_batch_size, pool_params[conv_W.size()-1].N, conv_params[conv_W.size()-1].n_filter, fisher_layer_gradB, fisher_pool_gradB);
	    
						convBprop(curr_batch_size, conv_params, pool_params, conv_W_T, conv_Ap, fisher_pool_gradB, fisher_conv_gradB, poolIdxX1, poolIdxY1);

						for(unsigned k = 0; k < conv_params.size(); k++){
							MyMatrix fisher_conv_gradB_sq = fisher_conv_gradB[k].array().square();
							for(unsigned m = 0; m < out.rows(); m++){
								for(unsigned f = 0; f < conv_params[k].n_filter; f++){
									for(unsigned n = 0; n < conv_params[k].N * conv_params[k].N; n++){
										fisher_conv_gradB_sq(f,m*conv_params[k].N*conv_params[k].N+n) *= out(m,l);
									}
								}
							}
							metric_conv_gradB[k] += fisher_conv_gradB_sq;
						}
	    
						for(unsigned k = 0; k < W.size(); k++){
							const unsigned rev_k = n_layers - k - 2;
							metric_gradB[rev_k] += (fgradB[rev_k].array().square().array().colwise() * out.array().col(l)).matrix();
						}
					}
				}
	
				bool init_flag = false;
				if(i == 0 && j == 0 && !params.init_metric_id){
					init_flag = true;
				}

				std::vector<MyMatrix> conv_Mii(conv_params.size());
				std::vector<MyMatrix> conv_M0i(conv_params.size());
				std::vector<MyVector> conv_M00(conv_params.size());
	
				buildConvQDMetric(curr_batch_size, metric_conv_gradB, conv_A, X_batch, conv_W, params.matrix_reg, conv_Mii, conv_M0i, conv_M00);

				updateConvMetric(init_flag, params.metric_gamma, conv_pMii, conv_pM0i, conv_pM00, conv_Mii, conv_M0i, conv_M00);

				MyMatrix Mii_out, M0i_out;
				MyVector M00_out;
				std::vector<MySpMatrix> Mii(W.size());
				std::vector<MySpMatrix> M0i(W.size());
				std::vector<MyVector> M00(W.size());

				buildQDMetric(metric_gradB, A, z0, W_out, W, params.matrix_reg, Mii_out, M0i_out, M00_out, Mii, M0i, M00);

				updateMetric(init_flag, params.metric_gamma, Mii_out, M0i_out, M00_out, Mii, M0i, M00, pMii_out, pM0i_out, pM00_out, pMii, pM0i, pM00);
				update(eta, gradB, A, z0, params.regularizer, params.lambda, W_out, W, Wt, B, Mii_out, M0i_out, M00_out, Mii, M0i, M00);
			}
      
			double curr_time = gettime();
			cumul_time += curr_time - prev_time;      
      
			if(params.minilog_flag){
	
				double train_loss = 0.;
				double train_accuracy = 0.;
				double valid_loss = 0.;
				double valid_accuracy = 0.;
				evalModel(eval_act_func, params, n_train_batch, n_training, X_train, Y_train, conv_params, pool_params, conv_W, conv_B, W_out, W, B, train_loss, train_accuracy);
				evalModel(eval_act_func, params, n_valid_batch, n_valid, X_valid, Y_valid, conv_params, pool_params, conv_W, conv_B, W_out, W, B, valid_loss, valid_accuracy);
	
				// Logging
				*logger << i + float(j)/n_train_batch << " " << cumul_time << " " << train_loss <<  " " << train_accuracy << " " << valid_loss <<  " " << valid_accuracy << " " << eta << std::endl;
	
			}
		}
		if(!params.minilog_flag || params.adaptive_flag){
			double train_loss = 0.;
			double train_accuracy = 0.;
			double valid_loss = 0.;
			double valid_accuracy = 0.;
			evalModel(eval_act_func, params, n_train_batch, n_training, X_train, Y_train, conv_params, pool_params, conv_W, conv_B, W_out, W, B, train_loss, train_accuracy);
			evalModel(eval_act_func, params, n_valid_batch, n_valid, X_valid, Y_valid, conv_params, pool_params, conv_W, conv_B, W_out, W, B, valid_loss, valid_accuracy);
      
			// if(params.adaptive_flag)
			// 	adaptiveRule(train_loss, prev_loss, eta, W, B, pMii, pM0i, pM00, pW, pB, ppMii, ppM0i, ppM00);
      
			// Logging
			if(!params.minilog_flag){
				*logger << i  << " " << cumul_time << " " << train_loss <<  " " << train_accuracy << " " << valid_loss <<  " " << valid_accuracy << " " << eta << std::endl;
			}
		}
	}
}
Пример #5
0
List objectivex(const arma::mat& transition, NumericVector emissionArray,
                const arma::vec& init, IntegerVector obsArray, const arma::imat& ANZ,
                IntegerVector emissNZ, const arma::ivec& INZ, const arma::ivec& nSymbols,
                const arma::mat& coef, const arma::mat& X, arma::ivec& numberOfStates,
                int threads) {


  IntegerVector eDims = emissionArray.attr("dim"); //m,p,r
  IntegerVector oDims = obsArray.attr("dim"); //k,n,r

  arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false, true);
  arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true);

  arma::icube BNZ(emissNZ.begin(), emission.n_rows, emission.n_cols - 1, emission.n_slices, false, true);

  unsigned int q = coef.n_rows;
  arma::vec grad(
      arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ) + (numberOfStates.n_elem- 1) * q,
      arma::fill::zeros);
  arma::mat weights = exp(X * coef).t();
  if (!weights.is_finite()) {
    grad.fill(-arma::math::inf());
    return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad));
  }

  weights.each_row() /= sum(weights, 0);

  arma::mat initk(emission.n_rows, obs.n_slices);

  for (unsigned int k = 0; k < obs.n_slices; k++) {
    initk.col(k) = init % reparma(weights.col(k), numberOfStates);
  }

  arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k
  arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k
  arma::mat scales(obs.n_cols, obs.n_slices); //m,n,k

  arma::sp_mat sp_trans(transition);
  internalForwardx(sp_trans.t(), emission, initk, obs, alpha, scales, threads);
  if (!scales.is_finite()) {
    grad.fill(-arma::math::inf());
    return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad));
  }

  internalBackwardx(sp_trans, emission, obs, beta, scales, threads);
  if (!beta.is_finite()) {
    grad.fill(-arma::math::inf());
    return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad));
  }

  arma::ivec cumsumstate = arma::cumsum(numberOfStates);

  arma::mat gradmat(
      arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ) + (numberOfStates.n_elem- 1) * q,
      obs.n_slices, arma::fill::zeros);

#pragma omp parallel for if(obs.n_slices >= threads) schedule(static) num_threads(threads)       \
  default(none) shared(q, alpha, beta, scales, gradmat, nSymbols, ANZ, BNZ, INZ,          \
          numberOfStates, cumsumstate, obs, init, initk, X, weights, transition, emission)
    for (int k = 0; k < obs.n_slices; k++) {
      int countgrad = 0;
      // transitionMatrix
      if (arma::accu(ANZ) > 0) {

        for (int jj = 0; jj < numberOfStates.n_elem; jj++) {
          arma::vec gradArow(numberOfStates(jj));
          arma::mat gradA(numberOfStates(jj), numberOfStates(jj));
          int ind_jj = cumsumstate(jj) - numberOfStates(jj);

          for (int i = 0; i < numberOfStates(jj); i++) {
            arma::uvec ind = arma::find(ANZ.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1));

            if (ind.n_elem > 0) {
              gradArow.zeros();
              gradA.eye();
              gradA.each_row() -= transition.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1);
              gradA.each_col() %= transition.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1).t();


              for (int j = 0; j < numberOfStates(jj); j++) {
                for (unsigned int t = 0; t < (obs.n_cols - 1); t++) {
                  double tmp = alpha(ind_jj + i, t, k);
                  for (unsigned int r = 0; r < obs.n_rows; r++) {
                    tmp *= emission(ind_jj + j, obs(r, t + 1, k), r);
                  }
                  gradArow(j) += tmp * beta(ind_jj + j, t + 1, k) / scales(t + 1, k);
                }

              }

              gradArow = gradA * gradArow;
              gradmat.col(k).subvec(countgrad, countgrad + ind.n_elem - 1) = gradArow.rows(ind);
              countgrad += ind.n_elem;
            }
          }
        }
      }
      if (arma::accu(BNZ) > 0) {
        // emissionMatrix
        for (unsigned int r = 0; r < obs.n_rows; r++) {
          arma::vec gradBrow(nSymbols(r));
          arma::mat gradB(nSymbols(r), nSymbols(r));
          for (unsigned int i = 0; i < emission.n_rows; i++) {
            arma::uvec ind = arma::find(BNZ.slice(r).row(i));
            if (ind.n_elem > 0) {
              gradBrow.zeros();
              gradB.eye();
              gradB.each_row() -= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1);
              gradB.each_col() %= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1).t();
              for (int j = 0; j < nSymbols(r); j++) {
                if (obs(r, 0, k) == j) {
                  double tmp = initk(i, k);
                  for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) {
                    if (r2 != r) {
                      tmp *= emission(i, obs(r2, 0, k), r2);
                    }
                  }
                  gradBrow(j) += tmp * beta(i, 0, k) / scales(0, k);
                }
                for (unsigned int t = 0; t < (obs.n_cols - 1); t++) {
                  if (obs(r, t + 1, k) == j) {
                    double tmp = beta(i, t + 1, k) / scales(t + 1, k);
                    for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) {
                      if (r2 != r) {
                        tmp *= emission(i, obs(r2, t + 1, k), r2);
                      }
                    }
                    gradBrow(j) += arma::dot(alpha.slice(k).col(t), transition.col(i)) * tmp;
                  }
                }

              }
              gradBrow = gradB * gradBrow;
              gradmat.col(k).subvec(countgrad, countgrad + ind.n_elem - 1) = gradBrow.rows(ind);
              countgrad += ind.n_elem;

            }
          }
        }
      }
      if (arma::accu(INZ) > 0) {
        for (int i = 0; i < numberOfStates.n_elem; i++) {
          int ind_i = cumsumstate(i) - numberOfStates(i);
          arma::uvec ind = arma::find(
            INZ.subvec(ind_i, cumsumstate(i) - 1));
          if (ind.n_elem > 0) {
            arma::vec gradIrow(numberOfStates(i), arma::fill::zeros);
            for (int j = 0; j < numberOfStates(i); j++) {
              double tmp = weights(i, k);
              for (unsigned int r = 0; r < obs.n_rows; r++) {
                tmp *= emission(ind_i + j, obs(r, 0, k), r);
              }
              gradIrow(j) += tmp * beta(ind_i + j, 0, k) / scales(0, k);

            }
            arma::mat gradI(numberOfStates(i), numberOfStates(i), arma::fill::zeros);
            gradI.eye();
            gradI.each_row() -= init.subvec(ind_i, cumsumstate(i) - 1).t();
            gradI.each_col() %= init.subvec(ind_i, cumsumstate(i) - 1);
            gradIrow = gradI * gradIrow;
            gradmat.col(k).subvec(countgrad, countgrad + ind.n_elem - 1) = gradIrow.rows(ind);
            countgrad += ind.n_elem;
          }
        }
      }
      for (int jj = 1; jj < numberOfStates.n_elem; jj++) {
        int ind_jj = (cumsumstate(jj) - numberOfStates(jj));

        for (int j = 0; j < emission.n_rows; j++) {
          double tmp = 1.0;
          for (unsigned int r = 0; r < obs.n_rows; r++) {
            tmp *= emission(j, obs(r, 0, k), r);
          }
          if ((j >= ind_jj) & (j < cumsumstate(jj))) {
            gradmat.col(k).subvec(countgrad + q * (jj - 1), countgrad + q * jj - 1) += tmp
            * beta(j, 0, k) / scales(0, k) * initk(j, k) * X.row(k).t() * (1.0 - weights(jj, k));
          } else {
            gradmat.col(k).subvec(countgrad + q * (jj - 1), countgrad + q * jj - 1) -= tmp
            * beta(j, 0, k) / scales(0, k) * initk(j, k) * X.row(k).t() * weights(jj, k);
          }
        }

      }
    }
    return List::create(Named("objective") = -arma::accu(log(scales)),
                        Named("gradient") = wrap(-sum(gradmat, 1)));
}