IntegerVector cCreateFactor(NumericVector x, CharacterVector levels) { IntegerVector y = (IntegerVector)x; y.attr("levels") = levels; y.attr("class") = "factor"; return y; }
// Optimized factor routine for the case where we want to make // a factor from a vector of names -- used for generating the // 'variable' column in the melted data.frame IntegerVector make_variable_column(CharacterVector x, int nrow) { IntegerVector fact = seq(1, x.size()); IntegerVector output = rep_each_(fact, nrow); output.attr("levels") = x; output.attr("class") = "factor"; return output; }
// Optimized factor routine for the case where we want to make // a factor from a vector of names -- used for generating the // 'variable' column in the melted data.frame IntegerVector make_variable_column_factor(CharacterVector x, int nrow) { IntegerVector output = no_init(x.size() * nrow); int idx = 0; for (int i = 0; i < x.size(); ++i) for (int j = 0; j < nrow; ++j) output[idx++] = i + 1; output.attr("levels") = x; output.attr("class") = "factor"; return output; }
//' @export // [[Rcpp::export]] List forward_backward_fast(NumericVector pi, NumericMatrix A, NumericMatrix B, IntegerVector y, int k, int n, bool marginal_distr){ List PP(n), QQ(n); for(int t=0; t<n; t++){ PP[t] = NumericMatrix(k, k); QQ[t] = NumericMatrix(k, k); } ListOf<NumericMatrix> P(PP), Q(QQ); double loglik=0.0; NumericMatrix emission_probs = emission_probs_mat_discrete(y, B, k, n); forward_step(pi, A, emission_probs, P, loglik, k, n); // now backward sampling arma::ivec x(n); IntegerVector possible_values = seq_len(k)-1; backward_sampling(x, P, possible_values, k, n); // and backward recursion to obtain marginal distributions if(marginal_distr) backward_step(P, Q, k, n); IntegerVector xx = as<IntegerVector>(wrap(x)); xx.attr("dim") = R_NilValue; return List::create(Rcpp::Named("x_draw") = xx, Rcpp::Named("P") = P, Rcpp::Named("Q") = Q, Rcpp::Named("log_posterior") = loglik); }
NumericVector logLikMixHMM(NumericVector transitionMatrix, NumericVector emissionArray, NumericVector initialProbs, IntegerVector obsArray, NumericMatrix coefs, NumericMatrix X_, IntegerVector numberOfStates) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r int q = coefs.nrow(); arma::mat coef(coefs.begin(),q,coefs.ncol()); coef.col(0).zeros(); arma::mat X(X_.begin(),oDims[0],q); arma::mat lweights = exp(X*coef).t(); if(!lweights.is_finite()){ return wrap(-std::numeric_limits<double>::max()); } lweights.each_row() /= sum(lweights,0); arma::colvec init(initialProbs.begin(),eDims[0], true); arma::mat transition(transitionMatrix.begin(),eDims[0],eDims[0], true); arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false); arma::vec alpha(eDims[0]); NumericVector ll(oDims[0]); double tmp; arma::vec initk(eDims[0]); for(int k = 0; k < oDims[0]; k++){ initk = init % reparma(lweights.col(k),numberOfStates); for(int i=0; i < eDims[0]; i++){ alpha(i) = initk(i); for(int r = 0; r < oDims[2]; r++){ alpha(i) *= emission(i,obs(k,0,r),r); } } tmp = sum(alpha); ll(k) = log(tmp); alpha /= tmp; arma::vec alphatmp(eDims[0]); for(int t = 1; t < oDims[1]; t++){ for(int i = 0; i < eDims[0]; i++){ alphatmp(i) = arma::dot(transition.col(i), alpha); for(int r = 0; r < oDims[2]; r++){ alphatmp(i) *= emission(i,obs(k,t,r),r); } } tmp = sum(alphatmp); ll(k) += log(tmp); alpha = alphatmp/tmp; } } return ll; }
RleIter(RObject& rle): rlens(as<IntegerVector>(rle.slot("lengths"))), values(as<IntegerVector>(rle.slot("values"))), names(as<CharacterVector>(values.attr("levels"))), run(0), rpos(-1) { next(); }
Rcpp::IntegerVector reTrms::nlevs() const { int nfac = d_flist.size(); IntegerVector ans(nfac); int *ap = ans.begin(); for (int i = 0; i < nfac; i++) { IntegerVector fi = d_flist(i); CharacterVector ll = fi.attr("levels"); ap[i] = ll.size(); } return ans; }
void defineVariable(IntegerVector x, std::string name) { readstat_label_set_t* labelSet = NULL; if (rClass(x) == "factor") { labelSet = readstat_add_label_set(writer_, READSTAT_TYPE_INT32, name.c_str()); CharacterVector levels = as<CharacterVector>(x.attr("levels")); for (int i = 0; i < levels.size(); ++i) readstat_label_int32_value(labelSet, i + 1, std::string(levels[i]).c_str()); } else if (rClass(x) == "labelled") { labelSet = readstat_add_label_set(writer_, READSTAT_TYPE_INT32, name.c_str()); IntegerVector values = as<IntegerVector>(x.attr("labels")); CharacterVector labels = as<CharacterVector>(values.attr("names")); for (int i = 0; i < values.size(); ++i) readstat_label_int32_value(labelSet, values[i], std::string(labels[i]).c_str()); } readstat_add_variable(writer_, READSTAT_TYPE_INT32, 0, name.c_str(), var_label(x), NULL, labelSet); }
// Based on table() // dataframe { // for each column get the num of dims.tfm // get size of the resulting talbe // finally tabulate by the bins meaning how many are there // each value will correspond to its index in the dim array. // this is just indexing by a set of values, then you go to there and find it. // } // [[Rcpp::export]] Rcpp::IntegerVector table_cpp(const RObject & input, const RObject & columns) { if(!is<DataFrame>(input)) stop("Must be a data frame."); DataFrame data = as<DataFrame>(input); if(!is<CharacterVector>(columns)) stop("Must be character vector."); CharacterVector cols = as<CharacterVector>(columns); data = data[cols]; const R_xlen_t ncols = data.ncol(); if (ncols == 0) stop("No columns in data frame."); const IntegerVector & column = data.at(0); // There is a single entry for each row IntegerVector to_tabulate = no_init(column.size()); to_tabulate.fill(1); // The product of dimensions. R_xlen_t pd = 1; IntegerVector dims(ncols); List dimnames(ncols); dimnames.names() = data.names(); for (R_xlen_t i = 0; i < ncols; i++) { const IntegerVector & a = data.at(i); if(!Rf_isFactor(a)) stop("Not a factor."); const CharacterVector & factorLevels = a.attr("levels"); R_xlen_t nl = factorLevels.size(); to_tabulate = to_tabulate + pd * (a - 1L); pd = pd * nl ; dims.at(i) = nl; dimnames.at(i) = factorLevels; } to_tabulate = na_omit(to_tabulate); IntegerVector tbl = tabulate_cpp(to_tabulate, pd); tbl.attr("dim") = dims; tbl.attr("dimnames") = dimnames; tbl.attr("class") = "table"; return tbl; }
List viterbi(NumericVector transitionMatrix, NumericVector emissionArray, NumericVector initialProbs, IntegerVector obsArray) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::vec init(initialProbs.begin(), eDims[0], false); arma::mat transition(transitionMatrix.begin(), eDims[0], eDims[0], false); arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false); arma::umat q(oDims[0], oDims[1]); arma::vec logp(oDims[0]); arma::mat delta(eDims[0],oDims[1]); arma::umat phi(eDims[0],oDims[1]); for(int k=0; k<oDims[0]; k++){ delta.col(0) = init; for(int r=0; r<eDims[2]; r++){ delta.col(0) += emission.slice(r).col(obs(k,0,r)); } phi.col(0).zeros(); for(int t=1; t<oDims[1]; t++){ for(int j=0; j<eDims[0]; j++){ (delta.col(t-1)+transition.col(j)).max(phi(j,t)); delta(j,t) = delta(phi(j,t),t-1)+transition(phi(j,t),j); for(int r=0; r<eDims[2]; r++){ delta(j,t) += emission(j,obs(k,t,r),r); } } } delta.col(oDims[1]-1).max(q(k,oDims[1]-1)); for(int t=(oDims[1]-2); t>=0; t--){ q(k,t) = phi(q(k,t+1),t+1); } logp(k) = delta.col(oDims[1]-1).max(); } return List::create(Named("q") = wrap(q),Named("logp") = wrap(logp)); }
NumericVector logLikHMM(NumericVector transitionMatrix, NumericVector emissionArray, NumericVector initialProbs, IntegerVector obsArray) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::colvec init(initialProbs.begin(),eDims[0], false); arma::mat transition(transitionMatrix.begin(),eDims[0],eDims[0], false); arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false); arma::vec alpha(eDims[0]); NumericVector ll(oDims[0]); double tmp; for(int k = 0; k < oDims[0]; k++){ for(int i=0; i < eDims[0]; i++){ alpha(i) = init(i); for(int r = 0; r < oDims[2]; r++){ alpha(i) *= emission(i,obs(k,0,r),r); } } tmp = sum(alpha); ll(k) = log(tmp); alpha /= tmp; arma::vec alphatmp(eDims[0]); for(int t = 1; t < oDims[1]; t++){ for(int i = 0; i < eDims[0]; i++){ alphatmp(i) = arma::dot(transition.col(i), alpha); for(int r = 0; r < oDims[2]; r++){ alphatmp(i) *= emission(i,obs(k,t,r),r); } } tmp = sum(alphatmp); ll(k) += log(tmp); alpha = alphatmp/tmp; } } return ll; }
// [[Rcpp::export]] Rcpp::IntegerMatrix CreateStartBoard() { using namespace Rcpp; // initalize the start board unsigned int nrow = gol_board_nrow; unsigned int ncol = gol_board_ncol; IntegerMatrix start_board(nrow, ncol); // loop until you get a board that isn't all dead unsigned int count_alive_cells = 0; while(count_alive_cells == 0) { // compute board's initial probability of the cell's being on double u = R::runif(0.0, 1.0); // fill cells of start board IntegerVector rv = as<IntegerVector>(rbinom(nrow*ncol, 1, u)); rv.attr("dim") = Dimension(nrow, ncol); Rcpp::IntegerMatrix fresh_board = as<IntegerMatrix>(rv); count_alive_cells = GetNumAlive(fresh_board); if( count_alive_cells == 0) { //std::cout << "Dead before evolve" << std::endl; continue; } // evolve board start_board = fresh_board; for (int delta = 0; delta != 5; delta++) { start_board = GetEvolvedBoard(start_board); } // check the evoled board is not all dead count_alive_cells = GetNumAlive(start_board); if( count_alive_cells == 0) { //std::cout << "Dead after evolve" << std::endl; } } // end while loop return start_board; }
List viterbi(const arma::mat& transition, NumericVector emissionArray, const arma::vec& init, IntegerVector obsArray) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false, true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true); arma::umat q(obs.n_slices, obs.n_cols); arma::vec logp(obs.n_slices); arma::mat delta(emission.n_rows, obs.n_cols); arma::umat phi(emission.n_rows, obs.n_cols); for (unsigned int k = 0; k < obs.n_slices; k++) { delta.col(0) = init; for (unsigned int r = 0; r < emission.n_slices; r++) { delta.col(0) += emission.slice(r).col(obs(r, 0, k)); } phi.col(0).zeros(); for (unsigned int t = 1; t < obs.n_cols; t++) { for (unsigned int j = 0; j < emission.n_rows; j++) { (delta.col(t - 1) + transition.col(j)).max(phi(j, t)); delta(j, t) = delta(phi(j, t), t - 1) + transition(phi(j, t), j); for (unsigned int r = 0; r < emission.n_slices; r++) { delta(j, t) += emission(j, obs(r, t, k), r); } } } delta.col(obs.n_cols - 1).max(q(k, obs.n_cols - 1)); for (int t = (obs.n_cols - 2); t >= 0; t--) { q(k, t) = phi(q(k, t + 1), t + 1); } logp(k) = delta.col(obs.n_cols - 1).max(); } return List::create(Named("q") = wrap(q), Named("logp") = wrap(logp)); }
List forwardbackward(const arma::mat& transition, NumericVector emissionArray, const arma::vec& init, IntegerVector obsArray, bool forwardonly, int threads) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false, true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true); arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k arma::mat scales(obs.n_cols, obs.n_slices); //n,k internalForward(transition, emission, init, obs, alpha, scales, threads); if(!scales.is_finite()) { Rcpp::stop("Scaling factors contain non-finite values. \n Check the model or try using the log-space version of the algorithm."); } double min_sf = scales.min(); if (min_sf < 1e-150) { Rcpp::warning("Smallest scaling factor was %e, results can be numerically unstable.", min_sf); } if (forwardonly) { return List::create(Named("forward_probs") = wrap(alpha), Named("scaling_factors") = wrap(scales)); } else { arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k internalBackward(transition, emission, obs, beta, scales, threads); if(!beta.is_finite()) { Rcpp::stop("Backward probabilities contain non-finite values. Check the model or try using the log-space version of the algorithm."); } return List::create(Named("forward_probs") = wrap(alpha), Named("backward_probs") = wrap(beta), Named("scaling_factors") = wrap(scales)); } }
List objective(const arma::mat& transition, NumericVector emissionArray, const arma::vec& init, IntegerVector obsArray, const arma::imat& ANZ, IntegerVector emissNZ, const arma::ivec& INZ, const arma::ivec& nSymbols, int threads) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false, true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true); arma::icube BNZ(emissNZ.begin(), emission.n_rows, emission.n_cols - 1, emission.n_slices, false, true); arma::vec grad(arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ), arma::fill::zeros); // arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k // arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k // arma::mat scales(obs.n_cols, obs.n_slices); //m,n,k // // internalForward(transition, emission, init, obs, alpha, scales, threads); // if (!scales.is_finite()) { // grad.fill(-arma::math::inf()); // return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad)); // } // // internalBackward(transition, emission, obs, beta, scales, threads); // if (!beta.is_finite()) { // grad.fill(-arma::math::inf()); // return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad)); // } //use this instead of local vectors with grad += grad_k;, uses more memory but gives bit-identical results //arma::mat gradmat(arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ), obs.n_slices); unsigned int error = 0; double ll = 0; #pragma omp parallel for if(obs.n_slices >= threads) schedule(static) reduction(+:ll) num_threads(threads) \ default(none) shared(grad, nSymbols, ANZ, BNZ, INZ, obs, init, transition, emission, error) for (int k = 0; k < obs.n_slices; k++) { if (error == 0) { arma::mat alpha(emission.n_rows, obs.n_cols); //m,n arma::vec scales(obs.n_cols); //n arma::sp_mat sp_trans(transition); uvForward(sp_trans.t(), emission, init, obs.slice(k), alpha, scales); arma::mat beta(emission.n_rows, obs.n_cols); //m,n uvBackward(sp_trans, emission, obs.slice(k), beta, scales); int countgrad = 0; arma::vec grad_k(grad.n_elem, arma::fill::zeros); // transitionMatrix arma::vec gradArow(emission.n_rows); arma::mat gradA(emission.n_rows, emission.n_rows); for (unsigned int i = 0; i < emission.n_rows; i++) { arma::uvec ind = arma::find(ANZ.row(i)); if (ind.n_elem > 0) { gradArow.zeros(); gradA.eye(); gradA.each_row() -= transition.row(i); gradA.each_col() %= transition.row(i).t(); for (unsigned int t = 0; t < (obs.n_cols - 1); t++) { for (unsigned int j = 0; j < emission.n_rows; j++) { double tmp = 1.0; for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(j, obs(r, t + 1, k), r); } gradArow(j) += alpha(i, t) * tmp * beta(j, t + 1) / scales(t + 1); } } gradArow = gradA * gradArow; grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradArow.rows(ind); countgrad += ind.n_elem; } } // emissionMatrix for (unsigned int r = 0; r < obs.n_rows; r++) { arma::vec gradBrow(nSymbols(r)); arma::mat gradB(nSymbols(r), nSymbols(r)); for (unsigned int i = 0; i < emission.n_rows; i++) { arma::uvec ind = arma::find(BNZ.slice(r).row(i)); if (ind.n_elem > 0) { gradBrow.zeros(); gradB.eye(); gradB.each_row() -= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1); gradB.each_col() %= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1).t(); for (int j = 0; j < nSymbols(r); j++) { if (obs(r, 0, k) == j) { double tmp = 1.0; for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) { if (r2 != r) { tmp *= emission(i, obs(r2, 0, k), r2); } } gradBrow(j) += init(i) * tmp * beta(i, 0) / scales(0); } for (unsigned int t = 0; t < (obs.n_cols - 1); t++) { if (obs(r, t + 1, k) == j) { double tmp = 1.0; for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) { if (r2 != r) { tmp *= emission(i, obs(r2, t + 1, k), r2); } } gradBrow(j) += arma::dot(alpha.col(t), transition.col(i)) * tmp * beta(i, t + 1) / scales(t + 1); } } } gradBrow = gradB * gradBrow; grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradBrow.rows(ind); countgrad += ind.n_elem; } } } // InitProbs arma::uvec ind = arma::find(INZ); if (ind.n_elem > 0) { arma::vec gradIrow(emission.n_rows); arma::mat gradI(emission.n_rows, emission.n_rows); gradIrow.zeros(); gradI.zeros(); gradI.eye(); gradI.each_row() -= init.t(); gradI.each_col() %= init; for (unsigned int j = 0; j < emission.n_rows; j++) { double tmp = 1.0; for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(j, obs(r, 0, k), r); } gradIrow(j) += tmp * beta(j, 0) / scales(0); } gradIrow = gradI * gradIrow; grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradIrow.rows(ind); countgrad += ind.n_elem; } if (!scales.is_finite() || !beta.is_finite()) { #pragma omp atomic error++; } else { ll += arma::sum(log(scales)); #pragma omp critical grad += grad_k; // gradmat.col(k) = grad_k; } // for (unsigned int ii = 0; ii < grad_k.n_elem; ii++) { // #pragma omp atomic // grad(ii) += grad_k(ii); // } } } if(error > 0){ ll = -arma::math::inf(); grad.fill(-arma::math::inf()); } // } else { // grad = sum(gradmat, 1); // } return List::create(Named("objective") = -ll, Named("gradient") = wrap(-grad)); }
List objectivex(const arma::mat& transition, NumericVector emissionArray, const arma::vec& init, IntegerVector obsArray, const arma::imat& ANZ, IntegerVector emissNZ, const arma::ivec& INZ, const arma::ivec& nSymbols, const arma::mat& coef, const arma::mat& X, arma::ivec& numberOfStates, int threads) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false, true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true); arma::icube BNZ(emissNZ.begin(), emission.n_rows, emission.n_cols - 1, emission.n_slices, false, true); unsigned int q = coef.n_rows; arma::vec grad( arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ) + (numberOfStates.n_elem- 1) * q, arma::fill::zeros); arma::mat weights = exp(X * coef).t(); if (!weights.is_finite()) { grad.fill(-arma::math::inf()); return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad)); } weights.each_row() /= sum(weights, 0); arma::mat initk(emission.n_rows, obs.n_slices); for (unsigned int k = 0; k < obs.n_slices; k++) { initk.col(k) = init % reparma(weights.col(k), numberOfStates); } arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k arma::mat scales(obs.n_cols, obs.n_slices); //m,n,k arma::sp_mat sp_trans(transition); internalForwardx(sp_trans.t(), emission, initk, obs, alpha, scales, threads); if (!scales.is_finite()) { grad.fill(-arma::math::inf()); return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad)); } internalBackwardx(sp_trans, emission, obs, beta, scales, threads); if (!beta.is_finite()) { grad.fill(-arma::math::inf()); return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad)); } arma::ivec cumsumstate = arma::cumsum(numberOfStates); arma::mat gradmat( arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ) + (numberOfStates.n_elem- 1) * q, obs.n_slices, arma::fill::zeros); #pragma omp parallel for if(obs.n_slices >= threads) schedule(static) num_threads(threads) \ default(none) shared(q, alpha, beta, scales, gradmat, nSymbols, ANZ, BNZ, INZ, \ numberOfStates, cumsumstate, obs, init, initk, X, weights, transition, emission) for (int k = 0; k < obs.n_slices; k++) { int countgrad = 0; // transitionMatrix if (arma::accu(ANZ) > 0) { for (int jj = 0; jj < numberOfStates.n_elem; jj++) { arma::vec gradArow(numberOfStates(jj)); arma::mat gradA(numberOfStates(jj), numberOfStates(jj)); int ind_jj = cumsumstate(jj) - numberOfStates(jj); for (int i = 0; i < numberOfStates(jj); i++) { arma::uvec ind = arma::find(ANZ.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1)); if (ind.n_elem > 0) { gradArow.zeros(); gradA.eye(); gradA.each_row() -= transition.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1); gradA.each_col() %= transition.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1).t(); for (int j = 0; j < numberOfStates(jj); j++) { for (unsigned int t = 0; t < (obs.n_cols - 1); t++) { double tmp = alpha(ind_jj + i, t, k); for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(ind_jj + j, obs(r, t + 1, k), r); } gradArow(j) += tmp * beta(ind_jj + j, t + 1, k) / scales(t + 1, k); } } gradArow = gradA * gradArow; gradmat.col(k).subvec(countgrad, countgrad + ind.n_elem - 1) = gradArow.rows(ind); countgrad += ind.n_elem; } } } } if (arma::accu(BNZ) > 0) { // emissionMatrix for (unsigned int r = 0; r < obs.n_rows; r++) { arma::vec gradBrow(nSymbols(r)); arma::mat gradB(nSymbols(r), nSymbols(r)); for (unsigned int i = 0; i < emission.n_rows; i++) { arma::uvec ind = arma::find(BNZ.slice(r).row(i)); if (ind.n_elem > 0) { gradBrow.zeros(); gradB.eye(); gradB.each_row() -= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1); gradB.each_col() %= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1).t(); for (int j = 0; j < nSymbols(r); j++) { if (obs(r, 0, k) == j) { double tmp = initk(i, k); for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) { if (r2 != r) { tmp *= emission(i, obs(r2, 0, k), r2); } } gradBrow(j) += tmp * beta(i, 0, k) / scales(0, k); } for (unsigned int t = 0; t < (obs.n_cols - 1); t++) { if (obs(r, t + 1, k) == j) { double tmp = beta(i, t + 1, k) / scales(t + 1, k); for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) { if (r2 != r) { tmp *= emission(i, obs(r2, t + 1, k), r2); } } gradBrow(j) += arma::dot(alpha.slice(k).col(t), transition.col(i)) * tmp; } } } gradBrow = gradB * gradBrow; gradmat.col(k).subvec(countgrad, countgrad + ind.n_elem - 1) = gradBrow.rows(ind); countgrad += ind.n_elem; } } } } if (arma::accu(INZ) > 0) { for (int i = 0; i < numberOfStates.n_elem; i++) { int ind_i = cumsumstate(i) - numberOfStates(i); arma::uvec ind = arma::find( INZ.subvec(ind_i, cumsumstate(i) - 1)); if (ind.n_elem > 0) { arma::vec gradIrow(numberOfStates(i), arma::fill::zeros); for (int j = 0; j < numberOfStates(i); j++) { double tmp = weights(i, k); for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(ind_i + j, obs(r, 0, k), r); } gradIrow(j) += tmp * beta(ind_i + j, 0, k) / scales(0, k); } arma::mat gradI(numberOfStates(i), numberOfStates(i), arma::fill::zeros); gradI.eye(); gradI.each_row() -= init.subvec(ind_i, cumsumstate(i) - 1).t(); gradI.each_col() %= init.subvec(ind_i, cumsumstate(i) - 1); gradIrow = gradI * gradIrow; gradmat.col(k).subvec(countgrad, countgrad + ind.n_elem - 1) = gradIrow.rows(ind); countgrad += ind.n_elem; } } } for (int jj = 1; jj < numberOfStates.n_elem; jj++) { int ind_jj = (cumsumstate(jj) - numberOfStates(jj)); for (int j = 0; j < emission.n_rows; j++) { double tmp = 1.0; for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(j, obs(r, 0, k), r); } if ((j >= ind_jj) & (j < cumsumstate(jj))) { gradmat.col(k).subvec(countgrad + q * (jj - 1), countgrad + q * jj - 1) += tmp * beta(j, 0, k) / scales(0, k) * initk(j, k) * X.row(k).t() * (1.0 - weights(jj, k)); } else { gradmat.col(k).subvec(countgrad + q * (jj - 1), countgrad + q * jj - 1) -= tmp * beta(j, 0, k) / scales(0, k) * initk(j, k) * X.row(k).t() * weights(jj, k); } } } } return List::create(Named("objective") = -arma::accu(log(scales)), Named("gradient") = wrap(-sum(gradmat, 1))); }
// [[Rcpp::export]] SEXP combine_vars(CharacterVector vars, ListOf<IntegerVector> xs) { VarList selected(vars.size()); if (xs.size() == 0) return IntegerVector::create(); // Workaround bug in ListOf<>; can't access attributes SEXP raw_names = Rf_getAttrib(xs, Rf_mkString("names")); CharacterVector xs_names; if (raw_names == R_NilValue) { xs_names = CharacterVector(xs.size(), "" ); } else { xs_names = raw_names ; } // If first component is negative, pre-fill with existing vars if (vector_sign(xs[0]) == -1) { for (int j = 0; j < vars.size(); ++j) { selected.add(j + 1, vars[j]); } } for (int i = 0; i < xs.size(); ++i) { IntegerVector x = xs[i]; if (x.size() == 0) continue; int sign = vector_sign(x); if (sign == 0) stop("Each argument must yield either positive or negative integers"); if (sign == 1) { bool group_named = xs_names[i] != ""; bool has_names = x.attr("names") != R_NilValue; if (group_named) { if (x.size() == 1) { selected.update(x[0], xs_names[i]); } else { // If the group is named, children are numbered sequentially for (int j = 0; j < x.size(); ++j) { std::stringstream out; out << xs_names[i] << j + 1; selected.update(x[j], out.str()); } } } else if (has_names) { CharacterVector names = x.names() ; for (int j = 0; j < x.size(); ++j) { selected.update(x[j], names[j]); } } else { for (int j = 0; j < x.size(); ++j) { int pos = x[j]; if (pos < 1 || pos > vars.size()) stop("Position must be between 0 and n"); // Add default name, if not all ready present if (!selected.has(pos)) selected.update(pos, vars[pos - 1]); } } } else { for (int j = 0; j < x.size(); ++j) { selected.remove(-x[j]); } } } return selected; }
List EM(NumericVector transitionMatrix, NumericVector emissionArray, NumericVector initialProbs, IntegerVector obsArray, const arma::ivec& nSymbols, int itermax, double tol, int trace, int threads) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true); arma::vec init(initialProbs.begin(), emission.n_rows, true); arma::mat transition(transitionMatrix.begin(), emission.n_rows, emission.n_rows, true); arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k arma::mat scales(obs.n_cols, obs.n_slices); internalForward(transition, emission, init, obs, alpha, scales, threads); if(!scales.is_finite()) { return List::create(Named("error") = 1); } double min_sf = scales.min(); if (min_sf < 1e-150) { Rcpp::warning("Smallest scaling factor was %e, results can be numerically unstable.", min_sf); } internalBackward(transition, emission, obs, beta, scales, threads); if(!beta.is_finite()) { return List::create(Named("error") = 2); } arma::rowvec ll = arma::sum(log(scales)); double sumlogLik = sum(ll); if (trace > 0) { Rcout << "Log-likelihood of initial model: " << sumlogLik << std::endl; } // // //EM-algorithm begins // double change = tol + 1.0; int iter = 0; while ((change > tol) & (iter < itermax)) { iter++; arma::mat ksii(emission.n_rows, emission.n_rows, arma::fill::zeros); arma::cube gamma(emission.n_rows, emission.n_cols, emission.n_slices, arma::fill::zeros); arma::vec delta(emission.n_rows, arma::fill::zeros); for (unsigned int k = 0; k < obs.n_slices; k++) { delta += alpha.slice(k).col(0) % beta.slice(k).col(0); } #pragma omp parallel for if(obs.n_slices>=threads) schedule(static) num_threads(threads) \ default(none) shared(transition, obs, alpha, beta, scales, \ emission, ksii, gamma, nSymbols) for (int k = 0; k < obs.n_slices; k++) { if (obs.n_cols > 1) { for (unsigned int j = 0; j < emission.n_rows; j++) { for (unsigned int i = 0; i < emission.n_rows; i++) { if (transition(i, j) > 0.0) { for (unsigned int t = 0; t < (obs.n_cols - 1); t++) { double tmp = alpha(i, t, k) * transition(i, j) * beta(j, t + 1, k) / scales(t + 1, k); for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(j, obs(r, t + 1, k), r); } #pragma omp atomic ksii(i, j) += tmp; } } } } } for (unsigned int r = 0; r < emission.n_slices; r++) { for (int l = 0; l < nSymbols(r); l++) { for (unsigned int i = 0; i < emission.n_rows; i++) { if (emission(i, l, r) > 0.0) { for (unsigned int t = 0; t < obs.n_cols; t++) { if (l == (obs(r, t, k))) { #pragma omp atomic gamma(i, l, r) += alpha(i, t, k) * beta(i, t, k); } } } } } } } if (obs.n_cols > 1) { ksii.each_col() /= sum(ksii, 1); transition = ksii; } for (unsigned int r = 0; r < emission.n_slices; r++) { gamma.slice(r).cols(0, nSymbols(r) - 1).each_col() /= sum( gamma.slice(r).cols(0, nSymbols(r) - 1), 1); emission.slice(r).cols(0, nSymbols(r) - 1) = gamma.slice(r).cols(0, nSymbols(r) - 1); } delta /= arma::as_scalar(arma::accu(delta)); init = delta; internalForward(transition, emission, init, obs, alpha, scales, threads); if(!scales.is_finite()) { return List::create(Named("error") = 1); } internalBackward(transition, emission, obs, beta, scales, threads); if(!beta.is_finite()) { return List::create(Named("error") = 2); } double min_sf = scales.min(); if (min_sf < 1e-150) { Rcpp::warning("Smallest scaling factor was %e, results can be numerically unstable.", min_sf); } ll = sum(log(scales)); double tmp = sum(ll); change = (tmp - sumlogLik) / (std::abs(sumlogLik) + 0.1); sumlogLik = tmp; if (trace > 1) { Rcout << "iter: " << iter; Rcout << " logLik: " << sumlogLik; Rcout << " relative change: " << change << std::endl; } } if (trace > 0) { if (iter == itermax) { Rcpp::Rcout << "EM algorithm stopped after reaching the maximum number of " << iter << " iterations." << std::endl; } else { Rcpp::Rcout << "EM algorithm stopped after reaching the relative change of " << change; Rcpp::Rcout << " after " << iter << " iterations." << std::endl; } Rcpp::Rcout << "Final log-likelihood: " << sumlogLik << std::endl; } return List::create(Named("initialProbs") = wrap(init), Named("transitionMatrix") = wrap(transition), Named("emissionArray") = wrap(emission), Named("logLik") = sumlogLik, Named("iterations") = iter, Named("change") = change, Named("error") = 0); }