// Scan a single chromosome with interactive covariates // this version should be fast but requires more memory // (since we first expand the genotype probabilities to probs x intcovar) // and this one allows weights for the individuals (the same for all phenotypes) // // genoprobs = 3d array of genotype probabilities (individuals x genotypes x positions) // pheno = matrix of numeric phenotypes (individuals x phenotypes) // (no missing data allowed) // addcovar = additive covariates (an intercept, at least) // intcovar = interactive covariates (should also be included in addcovar) // weights = vector of weights // // output = matrix of residual sums of squares (RSS) (phenotypes x positions) // // [[Rcpp::export]] NumericMatrix scan_binary_onechr_intcovar_weighted_highmem(const NumericVector& genoprobs, const NumericMatrix& pheno, const NumericMatrix& addcovar, const NumericMatrix& intcovar, const NumericVector& weights, const int maxit=100, const double tol=1e-6, const double qr_tol=1e-12) { const int n_ind = pheno.rows(); if(Rf_isNull(genoprobs.attr("dim"))) throw std::invalid_argument("genoprobs should be a 3d array but has no dim attribute"); const Dimension d = genoprobs.attr("dim"); if(d.size() != 3) throw std::invalid_argument("genoprobs should be a 3d array"); if(n_ind != d[0]) throw std::range_error("nrow(pheno) != nrow(genoprobs)"); if(n_ind != addcovar.rows()) throw std::range_error("nrow(pheno) != nrow(addcovar)"); if(n_ind != intcovar.rows()) throw std::range_error("nrow(pheno) != nrow(intcovar)"); if(n_ind != weights.size()) throw std::range_error("nrow(pheno) != length(weights)"); // expand genotype probabilities to include geno x interactive covariate NumericVector genoprobs_rev = expand_genoprobs_intcovar(genoprobs, intcovar); // genotype can return scan_binary_onechr_weighted(genoprobs_rev, pheno, addcovar, weights, maxit, tol, qr_tol); }
// [[Rcpp::export]] NumericVector attribs() { NumericVector out = NumericVector::create(1, 2, 3); out.names() = CharacterVector::create("a", "b", "c"); out.attr("my-attr") = "my-value"; out.attr("class") = "my-class"; return out; }
void defineVariable(NumericVector x, std::string name) { readstat_label_set_t* labelSet = NULL; if (rClass(x) == "labelled") { labelSet = readstat_add_label_set(writer_, READSTAT_TYPE_DOUBLE, name.c_str()); NumericVector values = as<NumericVector>(x.attr("labels")); CharacterVector labels = as<CharacterVector>(values.attr("names")); for (int i = 0; i < values.size(); ++i) readstat_label_double_value(labelSet, values[i], std::string(labels[i]).c_str()); } readstat_add_variable(writer_, READSTAT_TYPE_DOUBLE, 0, name.c_str(), var_label(x), NULL, labelSet); }
// [[Rcpp::export]] arma::cube gibbsCPP(NumericVector p2, int T, int M, double rho, double x0, double y0) { IntegerVector dim_p2=p2.attr("dim"); arma::cube p(p2.begin(), dim_p2[0], dim_p2[1], dim_p2[2]); double x, y; arma::vec n1 = rnormC(76543,M*T); //arma::vec n2 = rnormC(76543,M*T); for(int m=0;m<M;m++) { x = x0; y = y0; p(0,0,m) = x; p(1,0,m) = y; for(int t=1;t<T;t++) { //x = rnormC(1,M*T)[m*t]; //x = x * sqrt(1-pow(rho,2)) + rho * y; //y = rnormC(76543,M*T)[(m*t)+1]; //y = y * sqrt(1-pow(rho,2)) + rho * x; x = n1[m*T]; x = x * sqrt(1-pow(rho,2)) + rho * y; y = n1[m*T+1]; y = y * sqrt(1-pow(rho,2)) + rho * x; p(0,t,m) = x; p(1,t,m) = y; } } return(p); }
// [[Rcpp::export]] NumericVector row_weights(NumericMatrix x, NumericVector weight) { int nX = x.ncol(); int nY = x.nrow(); NumericVector v = no_init(nY); #pragma omp parallel for schedule(static) for (int i=0; i < nY; i++) { NumericMatrix::Row row = x(i, _); double w = 0; for (int j=0; j < nX; j++) { if(row[j]!=0 && !R_IsNA(row[j])) { w += weight[j]; } } double o = 0; if (w!=0) { for(int j=0; j < nX; j++) { if(row[j]!=0 && !R_IsNA(row[j])) { o += row[j]*weight[j] / w; } } } v[i] = o; } v.attr("names") = rownames(x); return v; }
// [[Rcpp::export]] List pwTabMerge(List hsum, NumericMatrix pw) { int n = pw.ncol() ; List out(n) ; for(int k = 0; k < n; k++){ RCPP_UNORDERED_MAP<std::string,double> preout ; List ip(2) ; ip[0] = hsum[pw(0,k)] ; ip[1] = hsum[pw(1,k)] ; for(int i = 0; i < 2; i++){ NumericVector x = ip[i] ; CharacterVector names = x.attr("names") ; int m = x.size() ; for(int j = 0; j < m; j++){ String name = names[j] ; preout[ name ] += x[j] ; } } out[k] = preout ; } return wrap(out) ; }
//' Do values in a numeric vector fall in specified range? //' //' This is a shortcut for `x >= left & x <= right`, implemented //' efficiently in C++ for local values, and translated to the //' appropriate SQL for remote tables. //' //' @param x A numeric vector of values //' @param left,right Boundary values //' @export //' @examples //' between(1:12, 7, 9) //' //' x <- rnorm(1e2) //' x[between(x, -1, 1)] // [[Rcpp::export]] LogicalVector between(NumericVector x, double left, double right) { int n = x.size(); LogicalVector out(no_init(n)); // Assume users know what they're doing with date/times. In the future // should ensure that left and right are the correct class too. if (x.attr("class") != R_NilValue && !Rf_inherits(x, "Date") && !Rf_inherits(x, "POSIXct")) { warningcall(R_NilValue, "between() called on numeric vector with S3 class"); } if (NumericVector::is_na(left) || NumericVector::is_na(right)) { for (int i = 0; i < n; ++i) out[i] = NA_LOGICAL; return out; } for (int i = 0; i < n; ++i) { if (NumericVector::is_na(x[i])) { out[i] = NA_LOGICAL; } else if ((x[i] >= left) && (x[i] <= right)) { out[i] = true; } else { out[i] = false; } } return out; }
// Scan a single chromosome with additive covariates and weights // // genoprobs = 3d array of genotype probabilities (individuals x genotypes x positions) // pheno = matrix of numeric phenotypes (individuals x phenotypes) // (no missing data allowed, values should be in [0,1]) // addcovar = additive covariates (an intercept, at least) // weights = vector of weights // // output = matrix of (weighted) residual sums of squares (RSS) (phenotypes x positions) // // [[Rcpp::export]] NumericMatrix scan_binary_onechr_weighted(const NumericVector& genoprobs, const NumericMatrix& pheno, const NumericMatrix& addcovar, const NumericVector& weights, const int maxit=100, const double tol=1e-6, const double qr_tol=1e-12, const double eta_max=30.0) { const int n_ind = pheno.rows(); if(Rf_isNull(genoprobs.attr("dim"))) throw std::invalid_argument("genoprobs should be a 3d array but has no dim attribute"); const Dimension d = genoprobs.attr("dim"); if(d.size() != 3) throw std::invalid_argument("genoprobs should be a 3d array"); if(n_ind != d[0]) throw std::range_error("nrow(pheno) != nrow(genoprobs)"); if(n_ind != addcovar.rows()) throw std::range_error("nrow(pheno) != nrow(addcovar)"); if(n_ind != weights.size()) throw std::range_error("nrow(pheno) != length(weights)"); const int n_pos = d[2]; const int n_gen = d[1]; const int n_add = addcovar.cols(); const int g_size = n_ind * n_gen; const int n_phe = pheno.cols(); NumericMatrix result(n_phe, n_pos); NumericMatrix X(n_ind, n_gen+n_add); if(n_add > 0) // paste in covariates, if present std::copy(addcovar.begin(), addcovar.end(), X.begin() + g_size); for(int pos=0, offset=0; pos<n_pos; pos++, offset += g_size) { Rcpp::checkUserInterrupt(); // check for ^C from user // copy genoprobs for this pos into a matrix std::copy(genoprobs.begin() + offset, genoprobs.begin() + offset + g_size, X.begin()); for(int phe=0; phe<n_phe; phe++) { // calc rss and paste into ith column of result result(phe,pos) = calc_ll_binreg_weighted(X, pheno(_,phe), weights, maxit, tol, qr_tol, eta_max); } } return result; }
NumericVector logLikMixHMM(NumericVector transitionMatrix, NumericVector emissionArray, NumericVector initialProbs, IntegerVector obsArray, NumericMatrix coefs, NumericMatrix X_, IntegerVector numberOfStates) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r int q = coefs.nrow(); arma::mat coef(coefs.begin(),q,coefs.ncol()); coef.col(0).zeros(); arma::mat X(X_.begin(),oDims[0],q); arma::mat lweights = exp(X*coef).t(); if(!lweights.is_finite()){ return wrap(-std::numeric_limits<double>::max()); } lweights.each_row() /= sum(lweights,0); arma::colvec init(initialProbs.begin(),eDims[0], true); arma::mat transition(transitionMatrix.begin(),eDims[0],eDims[0], true); arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false); arma::vec alpha(eDims[0]); NumericVector ll(oDims[0]); double tmp; arma::vec initk(eDims[0]); for(int k = 0; k < oDims[0]; k++){ initk = init % reparma(lweights.col(k),numberOfStates); for(int i=0; i < eDims[0]; i++){ alpha(i) = initk(i); for(int r = 0; r < oDims[2]; r++){ alpha(i) *= emission(i,obs(k,0,r),r); } } tmp = sum(alpha); ll(k) = log(tmp); alpha /= tmp; arma::vec alphatmp(eDims[0]); for(int t = 1; t < oDims[1]; t++){ for(int i = 0; i < eDims[0]; i++){ alphatmp(i) = arma::dot(transition.col(i), alpha); for(int r = 0; r < oDims[2]; r++){ alphatmp(i) *= emission(i,obs(k,t,r),r); } } tmp = sum(alphatmp); ll(k) += log(tmp); alpha = alphatmp/tmp; } } return ll; }
// LMM scan of a single chromosome with interactive covariates // this version should be fast but requires more memory // (since we first expand the genotype probabilities to probs x intcovar) // and this one allows weights for the individuals (the same for all phenotypes) // // genoprobs = 3d array of genotype probabilities (individuals x genotypes x positions) // pheno = matrix with one column of numeric phenotypes // (no missing data allowed) // addcovar = additive covariates (an intercept, at least) // intcovar = interactive covariates (should also be included in addcovar) // eigenvec = matrix of transposed eigenvectors of variance matrix // weights = vector of weights (really the SQUARE ROOT of the weights) // // output = vector of log likelihood values // // [[Rcpp::export]] NumericVector scan_pg_onechr_intcovar_highmem(const NumericVector& genoprobs, const NumericMatrix& pheno, const NumericMatrix& addcovar, const NumericMatrix& intcovar, const NumericMatrix& eigenvec, const NumericVector& weights, const double tol=1e-12) { const unsigned int n_ind = pheno.rows(); if(pheno.cols() != 1) throw std::range_error("ncol(pheno) != 1"); const Dimension d = genoprobs.attr("dim"); const unsigned int n_pos = d[2]; if(n_ind != d[0]) throw std::range_error("nrow(pheno) != nrow(genoprobs)"); if(n_ind != addcovar.rows()) throw std::range_error("nrow(pheno) != nrow(addcovar)"); if(n_ind != intcovar.rows()) throw std::range_error("nrow(pheno) != nrow(intcovar)"); if(n_ind != weights.size()) throw std::range_error("nrow(pheno) != length(weights)"); if(n_ind != eigenvec.rows()) throw std::range_error("ncol(pheno) != nrow(eigenvec)"); if(n_ind != eigenvec.cols()) throw std::range_error("ncol(pheno) != ncol(eigenvec)"); // expand genotype probabilities to include geno x interactive covariate NumericVector genoprobs_rev = expand_genoprobs_intcovar(genoprobs, intcovar); // pre-multiply everything by eigenvectors genoprobs_rev = matrix_x_3darray(eigenvec, genoprobs_rev); NumericMatrix addcovar_rev = matrix_x_matrix(eigenvec, addcovar); NumericMatrix pheno_rev = matrix_x_matrix(eigenvec, pheno); // multiply everything by the (square root) of the weights // (weights should ALREADY be the square-root of the real weights) addcovar_rev = weighted_matrix(addcovar_rev, weights); pheno_rev = weighted_matrix(pheno_rev, weights); genoprobs_rev = weighted_3darray(genoprobs_rev, weights); // regress out the additive covariates genoprobs_rev = calc_resid_linreg_3d(addcovar_rev, genoprobs_rev, tol); pheno_rev = calc_resid_linreg(addcovar_rev, pheno_rev, tol); // now the scan, return RSS NumericMatrix rss = scan_hk_onechr_nocovar(genoprobs_rev, pheno_rev, tol); // 0.5*sum(log(weights)) [since these are sqrt(weights)] double sum_logweights = sum(log(weights)); // calculate log likelihood NumericVector result(n_pos); for(unsigned int pos=0; pos<n_pos; pos++) result[pos] = -(double)n_ind/2.0*log(rss[pos]) + sum_logweights; return result; }
// use calc_resid_linreg for a 3-dim array // [[Rcpp::export]] NumericVector calc_resid_linreg_3d(const NumericMatrix& X, const NumericVector& P, const double tol=1e-12) { const int nrowx = X.rows(); if(Rf_isNull(P.attr("dim"))) throw std::invalid_argument("P should be a 3d array but has no dim attribute"); const Dimension d = P.attr("dim"); if(d.size() != 3) throw std::invalid_argument("P should be a 3d array"); if(d[0] != nrowx) throw std::range_error("nrow(X) != nrow(P)"); NumericMatrix pr(nrowx, d[1]*d[2]); std::copy(P.begin(), P.end(), pr.begin()); // FIXME I shouldn't need to copy NumericMatrix result = calc_resid_eigenqr(X, pr, tol); result.attr("dim") = d; return result; }
// LMM scan of a single chromosome with interactive covariates // this version uses less memory but will be slower // (since we need to work with each position, one at a time) // and this one allows weights for the individuals (the same for all phenotypes) // // genoprobs = 3d array of genotype probabilities (individuals x genotypes x positions) // pheno = matrix with one column of numeric phenotypes // (no missing data allowed) // addcovar = additive covariates (an intercept, at least) // intcovar = interactive covariates (should also be included in addcovar) // eigenvec = matrix of transposed eigenvectors of variance matrix // weights = vector of weights (really the SQUARE ROOT of the weights) // // output = vector of log likelihood values // // [[Rcpp::export]] NumericVector scan_pg_onechr_intcovar_lowmem(const NumericVector& genoprobs, const NumericMatrix& pheno, const NumericMatrix& addcovar, const NumericMatrix& intcovar, const NumericMatrix& eigenvec, const NumericVector& weights, const double tol=1e-12) { const unsigned int n_ind = pheno.rows(); if(pheno.cols() != 1) throw std::range_error("ncol(pheno) != 1"); const Dimension d = genoprobs.attr("dim"); const unsigned int n_pos = d[2]; if(n_ind != d[0]) throw std::range_error("nrow(pheno) != nrow(genoprobs)"); if(n_ind != addcovar.rows()) throw std::range_error("nrow(pheno) != nrow(addcovar)"); if(n_ind != intcovar.rows()) throw std::range_error("nrow(pheno) != nrow(intcovar)"); if(n_ind != weights.size()) throw std::range_error("ncol(pheno) != length(weights)"); if(n_ind != eigenvec.rows()) throw std::range_error("ncol(pheno) != nrow(eigenvec)"); if(n_ind != eigenvec.cols()) throw std::range_error("ncol(pheno) != ncol(eigenvec)"); NumericVector result(n_pos); // multiply pheno by eigenvectors and then by weights NumericMatrix pheno_rev = matrix_x_matrix(eigenvec, pheno); pheno_rev = weighted_matrix(pheno_rev, weights); // 0.5*sum(log(weights)) [since these are sqrt(weights)] double sum_logweights = sum(log(weights)); for(unsigned int pos=0; pos<n_pos; pos++) { Rcpp::checkUserInterrupt(); // check for ^C from user // form X matrix NumericMatrix X = formX_intcovar(genoprobs, addcovar, intcovar, pos, true); // multiply by eigenvectors X = matrix_x_matrix(eigenvec, X); // multiply by weights X = weighted_matrix(X, weights); // do regression NumericVector rss = calc_rss_linreg(X, pheno_rev, tol); result[pos] = -(double)n_ind/2.0*log(rss[0]) + sum_logweights; } return result; }
// Scan a single chromosome with interactive covariates // this version uses less memory but will be slower // (since we need to work with each position, one at a time) // and this one allows weights for the individuals (the same for all phenotypes) // // genoprobs = 3d array of genotype probabilities (individuals x genotypes x positions) // pheno = matrix of numeric phenotypes (individuals x phenotypes) // (no missing data allowed) // addcovar = additive covariates (an intercept, at least) // intcovar = interactive covariates (should also be included in addcovar) // weights = vector of weights // // output = matrix of residual sums of squares (RSS) (phenotypes x positions) // // [[Rcpp::export]] NumericMatrix scan_binary_onechr_intcovar_weighted_lowmem(const NumericVector& genoprobs, const NumericMatrix& pheno, const NumericMatrix& addcovar, const NumericMatrix& intcovar, const NumericVector& weights, const int maxit=100, const double tol=1e-6, const double qr_tol=1e-12, const double eta_max=30.0) { const int n_ind = pheno.rows(); if(Rf_isNull(genoprobs.attr("dim"))) throw std::invalid_argument("genoprobs should be a 3d array but has no dim attribute"); const Dimension d = genoprobs.attr("dim"); if(d.size() != 3) throw std::invalid_argument("genoprobs should be a 3d array"); const int n_pos = d[2]; const int n_phe = pheno.cols(); if(n_ind != d[0]) throw std::range_error("nrow(pheno) != nrow(genoprobs)"); if(n_ind != addcovar.rows()) throw std::range_error("nrow(pheno) != nrow(addcovar)"); if(n_ind != intcovar.rows()) throw std::range_error("nrow(pheno) != nrow(intcovar)"); NumericMatrix result(n_phe, n_pos); for(int pos=0; pos<n_pos; pos++) { Rcpp::checkUserInterrupt(); // check for ^C from user // form X matrix NumericMatrix X = formX_intcovar(genoprobs, addcovar, intcovar, pos, true); for(int phe=0; phe<n_phe; phe++) { // do regression result(phe,pos) = calc_ll_binreg_weighted(X, pheno(_,phe), weights, maxit, tol, qr_tol, eta_max); } } return result; }
// use calc_resid_linreg for a 3-dim array // [[Rcpp::export]] NumericVector calc_resid_linreg_3d(const NumericMatrix& X, const NumericVector& P) { int nrowx = X.rows(); int sizep = P.size(); NumericMatrix pr(nrowx, sizep/nrowx); std::copy(P.begin(), P.end(), pr.begin()); // FIXME I shouldn't need to copy NumericMatrix result = calc_resid_linreg(X, pr); result.attr("dim") = P.attr("dim"); return result; }
//' FITS image writer //' //' Writes a vector, matrix or 3D array to a FITS file as an image. //' The data is written to the primary HDU. //' // [[Rcpp::export]] int gv_writefits_img(NumericVector img, CharacterVector fits_name, CharacterVector hdu_name = "") { IntegerVector dim; if (!img.hasAttribute("dim")) { REprintf("ERROR: image has not been dimensioned.\n"); return 1; } dim = img.attr("dim"); if (dim.length() > 3) { REprintf("ERROR: dimension of more than 3 unsupported.\n"); return 1; } fitsfile *pfits=NULL; int err=0; std::string fname = as<std::string>(fits_name[0]); fits_create_file(&pfits, (char *) fname.c_str(), &err); if (err) { gv_print_fits_err(err); return err; } #ifdef GV_DEBUG Rcout << "Number of dim: " << dim.length() << std::endl; for (int i=0; i<dim.length(); i++) { Rcout << "Dim[" << i << "]: " << dim[i] << std::endl; } Rcout << "Number of elements: " << img.length() << std::endl; double *p = &(*img.begin()); for (int i=0; i<img.length(); i++) { Rcout << "*(p+" << i << ") = " << *(p+i) << std::endl; } #endif long longdim[3], startpix[3] = {1,1,1}; // default start for (int i=0; i<dim.length(); i++) longdim[i] = (long) dim[i]; // start writing to file fits_create_img(pfits, DOUBLE_IMG, dim.length(), longdim, &err); fits_write_pix(pfits, TDOUBLE, startpix, img.length(), &(*img.begin()), &err); fits_close_file(pfits, &err); return err; }
// use calc_resid_linreg for a 3-dim array // [[Rcpp::export]] NumericVector calc_resid_linreg_3d(const NumericMatrix& X, const NumericVector& P, const double tol=1e-12) { const unsigned int nrowx = X.rows(); const Dimension d = P.attr("dim"); if(d[0] != nrowx) throw std::range_error("nrow(X) != nrow(P)"); NumericMatrix pr(nrowx, d[1]*d[2]); std::copy(P.begin(), P.end(), pr.begin()); // FIXME I shouldn't need to copy NumericMatrix result = calc_resid_eigenqr(X, pr, tol); result.attr("dim") = d; return result; }
List viterbi(NumericVector transitionMatrix, NumericVector emissionArray, NumericVector initialProbs, IntegerVector obsArray) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::vec init(initialProbs.begin(), eDims[0], false); arma::mat transition(transitionMatrix.begin(), eDims[0], eDims[0], false); arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false); arma::umat q(oDims[0], oDims[1]); arma::vec logp(oDims[0]); arma::mat delta(eDims[0],oDims[1]); arma::umat phi(eDims[0],oDims[1]); for(int k=0; k<oDims[0]; k++){ delta.col(0) = init; for(int r=0; r<eDims[2]; r++){ delta.col(0) += emission.slice(r).col(obs(k,0,r)); } phi.col(0).zeros(); for(int t=1; t<oDims[1]; t++){ for(int j=0; j<eDims[0]; j++){ (delta.col(t-1)+transition.col(j)).max(phi(j,t)); delta(j,t) = delta(phi(j,t),t-1)+transition(phi(j,t),j); for(int r=0; r<eDims[2]; r++){ delta(j,t) += emission(j,obs(k,t,r),r); } } } delta.col(oDims[1]-1).max(q(k,oDims[1]-1)); for(int t=(oDims[1]-2); t>=0; t--){ q(k,t) = phi(q(k,t+1),t+1); } logp(k) = delta.col(oDims[1]-1).max(); } return List::create(Named("q") = wrap(q),Named("logp") = wrap(logp)); }
NumericVector logLikHMM(NumericVector transitionMatrix, NumericVector emissionArray, NumericVector initialProbs, IntegerVector obsArray) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::colvec init(initialProbs.begin(),eDims[0], false); arma::mat transition(transitionMatrix.begin(),eDims[0],eDims[0], false); arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false); arma::vec alpha(eDims[0]); NumericVector ll(oDims[0]); double tmp; for(int k = 0; k < oDims[0]; k++){ for(int i=0; i < eDims[0]; i++){ alpha(i) = init(i); for(int r = 0; r < oDims[2]; r++){ alpha(i) *= emission(i,obs(k,0,r),r); } } tmp = sum(alpha); ll(k) = log(tmp); alpha /= tmp; arma::vec alphatmp(eDims[0]); for(int t = 1; t < oDims[1]; t++){ for(int i = 0; i < eDims[0]; i++){ alphatmp(i) = arma::dot(transition.col(i), alpha); for(int r = 0; r < oDims[2]; r++){ alphatmp(i) *= emission(i,obs(k,t,r),r); } } tmp = sum(alphatmp); ll(k) += log(tmp); alpha = alphatmp/tmp; } } return ll; }
List viterbi(const arma::mat& transition, NumericVector emissionArray, const arma::vec& init, IntegerVector obsArray) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false, true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true); arma::umat q(obs.n_slices, obs.n_cols); arma::vec logp(obs.n_slices); arma::mat delta(emission.n_rows, obs.n_cols); arma::umat phi(emission.n_rows, obs.n_cols); for (unsigned int k = 0; k < obs.n_slices; k++) { delta.col(0) = init; for (unsigned int r = 0; r < emission.n_slices; r++) { delta.col(0) += emission.slice(r).col(obs(r, 0, k)); } phi.col(0).zeros(); for (unsigned int t = 1; t < obs.n_cols; t++) { for (unsigned int j = 0; j < emission.n_rows; j++) { (delta.col(t - 1) + transition.col(j)).max(phi(j, t)); delta(j, t) = delta(phi(j, t), t - 1) + transition(phi(j, t), j); for (unsigned int r = 0; r < emission.n_slices; r++) { delta(j, t) += emission(j, obs(r, t, k), r); } } } delta.col(obs.n_cols - 1).max(q(k, obs.n_cols - 1)); for (int t = (obs.n_cols - 2); t >= 0; t--) { q(k, t) = phi(q(k, t + 1), t + 1); } logp(k) = delta.col(obs.n_cols - 1).max(); } return List::create(Named("q") = wrap(q), Named("logp") = wrap(logp)); }
double nlsResp::updateMu(const VectorXd& gamma) { int n = d_y.size(); if (gamma.size() != d_gamma.size()) throw invalid_argument("size mismatch in updateMu"); std::copy(gamma.data(), gamma.data() + gamma.size(), d_gamma.data()); const VectorXd lp(d_gamma + d_offset); // linear predictor const double *gg = lp.data(); for (int p = 0; p < d_pnames.size(); ++p) { std::string pn(d_pnames[p]); NumericVector pp = d_nlenv.get(pn); std::copy(gg + n * p, gg + n * (p + 1), pp.begin()); } NumericVector rr = d_nlmod.eval(SEXP(d_nlenv)); if (rr.size() != n) throw invalid_argument("dimension mismatch"); std::copy(rr.begin(), rr.end(), d_mu.data()); NumericMatrix gr = rr.attr("gradient"); std::copy(gr.begin(), gr.end(), d_sqrtXwt.data()); return updateWrss(); }
List forwardbackward(const arma::mat& transition, NumericVector emissionArray, const arma::vec& init, IntegerVector obsArray, bool forwardonly, int threads) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false, true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true); arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k arma::mat scales(obs.n_cols, obs.n_slices); //n,k internalForward(transition, emission, init, obs, alpha, scales, threads); if(!scales.is_finite()) { Rcpp::stop("Scaling factors contain non-finite values. \n Check the model or try using the log-space version of the algorithm."); } double min_sf = scales.min(); if (min_sf < 1e-150) { Rcpp::warning("Smallest scaling factor was %e, results can be numerically unstable.", min_sf); } if (forwardonly) { return List::create(Named("forward_probs") = wrap(alpha), Named("scaling_factors") = wrap(scales)); } else { arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k internalBackward(transition, emission, obs, beta, scales, threads); if(!beta.is_finite()) { Rcpp::stop("Backward probabilities contain non-finite values. Check the model or try using the log-space version of the algorithm."); } return List::create(Named("forward_probs") = wrap(alpha), Named("backward_probs") = wrap(beta), Named("scaling_factors") = wrap(scales)); } }
double nlmerResp::updateMu(Rcpp::NumericVector const &gamma) throw(std::runtime_error) { int n = d_y.size(); #ifdef USE_RCPP_SUGAR Rcpp::NumericVector gam = gamma + d_offset; #else NumericVector gam(d_offset.size()); std::transform(gamma.begin(), gamma.end(), d_offset.begin(), gam.begin(), std::plus<double>()); #endif double *gg = gam.begin(); for (int p = 0; p < d_pnames.size(); p++) { std::string pn(d_pnames[p]); Rcpp::NumericVector pp = d_nlenv.get(pn); std::copy(gg + n * p, gg + n * (p + 1), pp.begin()); } NumericVector rr = d_nlmod.eval(SEXP(d_nlenv)); if (rr.size() != n) throw std::runtime_error("dimension mismatch"); std::copy(rr.begin(), rr.end(), d_mu.begin()); NumericMatrix rrg = rr.attr("gradient"); std::copy(rrg.begin(), rrg.end(), d_sqrtXwt.begin()); return updateWrss(); }
List EM(NumericVector transitionMatrix, NumericVector emissionArray, NumericVector initialProbs, IntegerVector obsArray, const arma::ivec& nSymbols, int itermax, double tol, int trace, int threads) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true); arma::vec init(initialProbs.begin(), emission.n_rows, true); arma::mat transition(transitionMatrix.begin(), emission.n_rows, emission.n_rows, true); arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k arma::mat scales(obs.n_cols, obs.n_slices); internalForward(transition, emission, init, obs, alpha, scales, threads); if(!scales.is_finite()) { return List::create(Named("error") = 1); } double min_sf = scales.min(); if (min_sf < 1e-150) { Rcpp::warning("Smallest scaling factor was %e, results can be numerically unstable.", min_sf); } internalBackward(transition, emission, obs, beta, scales, threads); if(!beta.is_finite()) { return List::create(Named("error") = 2); } arma::rowvec ll = arma::sum(log(scales)); double sumlogLik = sum(ll); if (trace > 0) { Rcout << "Log-likelihood of initial model: " << sumlogLik << std::endl; } // // //EM-algorithm begins // double change = tol + 1.0; int iter = 0; while ((change > tol) & (iter < itermax)) { iter++; arma::mat ksii(emission.n_rows, emission.n_rows, arma::fill::zeros); arma::cube gamma(emission.n_rows, emission.n_cols, emission.n_slices, arma::fill::zeros); arma::vec delta(emission.n_rows, arma::fill::zeros); for (unsigned int k = 0; k < obs.n_slices; k++) { delta += alpha.slice(k).col(0) % beta.slice(k).col(0); } #pragma omp parallel for if(obs.n_slices>=threads) schedule(static) num_threads(threads) \ default(none) shared(transition, obs, alpha, beta, scales, \ emission, ksii, gamma, nSymbols) for (int k = 0; k < obs.n_slices; k++) { if (obs.n_cols > 1) { for (unsigned int j = 0; j < emission.n_rows; j++) { for (unsigned int i = 0; i < emission.n_rows; i++) { if (transition(i, j) > 0.0) { for (unsigned int t = 0; t < (obs.n_cols - 1); t++) { double tmp = alpha(i, t, k) * transition(i, j) * beta(j, t + 1, k) / scales(t + 1, k); for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(j, obs(r, t + 1, k), r); } #pragma omp atomic ksii(i, j) += tmp; } } } } } for (unsigned int r = 0; r < emission.n_slices; r++) { for (int l = 0; l < nSymbols(r); l++) { for (unsigned int i = 0; i < emission.n_rows; i++) { if (emission(i, l, r) > 0.0) { for (unsigned int t = 0; t < obs.n_cols; t++) { if (l == (obs(r, t, k))) { #pragma omp atomic gamma(i, l, r) += alpha(i, t, k) * beta(i, t, k); } } } } } } } if (obs.n_cols > 1) { ksii.each_col() /= sum(ksii, 1); transition = ksii; } for (unsigned int r = 0; r < emission.n_slices; r++) { gamma.slice(r).cols(0, nSymbols(r) - 1).each_col() /= sum( gamma.slice(r).cols(0, nSymbols(r) - 1), 1); emission.slice(r).cols(0, nSymbols(r) - 1) = gamma.slice(r).cols(0, nSymbols(r) - 1); } delta /= arma::as_scalar(arma::accu(delta)); init = delta; internalForward(transition, emission, init, obs, alpha, scales, threads); if(!scales.is_finite()) { return List::create(Named("error") = 1); } internalBackward(transition, emission, obs, beta, scales, threads); if(!beta.is_finite()) { return List::create(Named("error") = 2); } double min_sf = scales.min(); if (min_sf < 1e-150) { Rcpp::warning("Smallest scaling factor was %e, results can be numerically unstable.", min_sf); } ll = sum(log(scales)); double tmp = sum(ll); change = (tmp - sumlogLik) / (std::abs(sumlogLik) + 0.1); sumlogLik = tmp; if (trace > 1) { Rcout << "iter: " << iter; Rcout << " logLik: " << sumlogLik; Rcout << " relative change: " << change << std::endl; } } if (trace > 0) { if (iter == itermax) { Rcpp::Rcout << "EM algorithm stopped after reaching the maximum number of " << iter << " iterations." << std::endl; } else { Rcpp::Rcout << "EM algorithm stopped after reaching the relative change of " << change; Rcpp::Rcout << " after " << iter << " iterations." << std::endl; } Rcpp::Rcout << "Final log-likelihood: " << sumlogLik << std::endl; } return List::create(Named("initialProbs") = wrap(init), Named("transitionMatrix") = wrap(transition), Named("emissionArray") = wrap(emission), Named("logLik") = sumlogLik, Named("iterations") = iter, Named("change") = change, Named("error") = 0); }
// [[Rcpp::depends(RcppArmadillo)]] //' @export // [[Rcpp::export]] DataFrame triadTable(NumericVector edgeDistance, IntegerMatrix shortPaths, IntegerMatrix triads, CharacterVector vertices, NumericVector edgevalence){ IntegerVector edgeDistanceDims = edgeDistance.attr("dim"); arma::cube cubeEdgeDistance(edgeDistance.begin(), edgeDistanceDims[0], edgeDistanceDims[1], edgeDistanceDims[2], false); DataFrame triadtable; CharacterVector triadID(0); NumericVector nodeID(0); CharacterVector direction(0); NumericVector valence(0); NumericVector distance(0); int node1, node2, node3; String triname, edge1_2, edge1_3, edge2_3; int ed, dis1, dis2, dis3; String ref; int truev, diff; for(int i = 0; i < triads.nrow(); i++) { node1 = triads(i,0); node2 = triads(i,1); node3 = triads(i,2); std::string snode1; std::string snode2; std::string snode3; std::stringstream out; out << node1; snode1 = out.str(); out.str(std::string()); out << node2; snode2 = out.str(); out.str(std::string()); out << node3; snode3 = out.str(); triname = snode1 + "-" + snode2 + "-" + snode3; edge1_2 = snode1 + "-" + snode2; edge1_3 = snode1 + "-" + snode3; edge2_3 = snode2 + "-" + snode3; for(int j = 0; j < vertices.length(); j++){ dis1 = cubeEdgeDistance((node1 - 1), (node2 - 1), j); dis2 = cubeEdgeDistance((node1 - 1), (node3 - 1), j); dis3 = cubeEdgeDistance((node2 - 1), (node3 - 1), j); if (dis1 == dis2 & dis1 == dis3) { int nodedis1_2 = shortPaths[node1, node2]; int nodedis1_3 = shortPaths[node1, node3]; int nodedis2_3 = shortPaths[node2, node3]; if (nodedis1_2 != nodedis1_3) { if (nodedis1_2 != nodedis2_3) { ref = edge1_2; }else{ ref = edge1_3; } }else{ ref = edge2_3; } //NumericVector valence_set = edgevalence[triads(i, 0)]; if (ref == edge1_2) { truev = edgevalence[triads(i, 0)]; } if (ref == edge1_3) { truev = edgevalence[triads(i, 2)]; } if (ref == edge2_3) { truev = edgevalence[triads(i, 1)]; } triadID.push_back(triname); nodeID.push_back(j+1); direction.push_back("IN "); valence.push_back(truev); distance.push_back(dis1); } else{ if (dis1 != dis2) { if (dis1 != dis3) { diff = dis1; }else{ diff = dis2; } }else{ diff = dis3; } //NumericVector valence_set = edgevalence[triads(i)]; if (diff == dis1) { truev = edgevalence[triads(i, 0)]; } if (diff == dis2) { truev = edgevalence[triads(i, 2)]; } if (diff == dis3) { truev = edgevalence[triads(i, 1)]; } triadID.push_back(triname); nodeID.push_back(j+1); direction.push_back("OUT"); valence.push_back(truev); distance.push_back(diff); } } } triadtable = DataFrame::create(_["triadID"]= triadID, _["nodeID"]= nodeID, _["direction"]= direction, _["valence"]= valence, _["distance"]= distance); return triadtable; }
List objective(const arma::mat& transition, NumericVector emissionArray, const arma::vec& init, IntegerVector obsArray, const arma::imat& ANZ, IntegerVector emissNZ, const arma::ivec& INZ, const arma::ivec& nSymbols, int threads) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false, true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true); arma::icube BNZ(emissNZ.begin(), emission.n_rows, emission.n_cols - 1, emission.n_slices, false, true); arma::vec grad(arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ), arma::fill::zeros); // arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k // arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k // arma::mat scales(obs.n_cols, obs.n_slices); //m,n,k // // internalForward(transition, emission, init, obs, alpha, scales, threads); // if (!scales.is_finite()) { // grad.fill(-arma::math::inf()); // return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad)); // } // // internalBackward(transition, emission, obs, beta, scales, threads); // if (!beta.is_finite()) { // grad.fill(-arma::math::inf()); // return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad)); // } //use this instead of local vectors with grad += grad_k;, uses more memory but gives bit-identical results //arma::mat gradmat(arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ), obs.n_slices); unsigned int error = 0; double ll = 0; #pragma omp parallel for if(obs.n_slices >= threads) schedule(static) reduction(+:ll) num_threads(threads) \ default(none) shared(grad, nSymbols, ANZ, BNZ, INZ, obs, init, transition, emission, error) for (int k = 0; k < obs.n_slices; k++) { if (error == 0) { arma::mat alpha(emission.n_rows, obs.n_cols); //m,n arma::vec scales(obs.n_cols); //n arma::sp_mat sp_trans(transition); uvForward(sp_trans.t(), emission, init, obs.slice(k), alpha, scales); arma::mat beta(emission.n_rows, obs.n_cols); //m,n uvBackward(sp_trans, emission, obs.slice(k), beta, scales); int countgrad = 0; arma::vec grad_k(grad.n_elem, arma::fill::zeros); // transitionMatrix arma::vec gradArow(emission.n_rows); arma::mat gradA(emission.n_rows, emission.n_rows); for (unsigned int i = 0; i < emission.n_rows; i++) { arma::uvec ind = arma::find(ANZ.row(i)); if (ind.n_elem > 0) { gradArow.zeros(); gradA.eye(); gradA.each_row() -= transition.row(i); gradA.each_col() %= transition.row(i).t(); for (unsigned int t = 0; t < (obs.n_cols - 1); t++) { for (unsigned int j = 0; j < emission.n_rows; j++) { double tmp = 1.0; for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(j, obs(r, t + 1, k), r); } gradArow(j) += alpha(i, t) * tmp * beta(j, t + 1) / scales(t + 1); } } gradArow = gradA * gradArow; grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradArow.rows(ind); countgrad += ind.n_elem; } } // emissionMatrix for (unsigned int r = 0; r < obs.n_rows; r++) { arma::vec gradBrow(nSymbols(r)); arma::mat gradB(nSymbols(r), nSymbols(r)); for (unsigned int i = 0; i < emission.n_rows; i++) { arma::uvec ind = arma::find(BNZ.slice(r).row(i)); if (ind.n_elem > 0) { gradBrow.zeros(); gradB.eye(); gradB.each_row() -= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1); gradB.each_col() %= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1).t(); for (int j = 0; j < nSymbols(r); j++) { if (obs(r, 0, k) == j) { double tmp = 1.0; for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) { if (r2 != r) { tmp *= emission(i, obs(r2, 0, k), r2); } } gradBrow(j) += init(i) * tmp * beta(i, 0) / scales(0); } for (unsigned int t = 0; t < (obs.n_cols - 1); t++) { if (obs(r, t + 1, k) == j) { double tmp = 1.0; for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) { if (r2 != r) { tmp *= emission(i, obs(r2, t + 1, k), r2); } } gradBrow(j) += arma::dot(alpha.col(t), transition.col(i)) * tmp * beta(i, t + 1) / scales(t + 1); } } } gradBrow = gradB * gradBrow; grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradBrow.rows(ind); countgrad += ind.n_elem; } } } // InitProbs arma::uvec ind = arma::find(INZ); if (ind.n_elem > 0) { arma::vec gradIrow(emission.n_rows); arma::mat gradI(emission.n_rows, emission.n_rows); gradIrow.zeros(); gradI.zeros(); gradI.eye(); gradI.each_row() -= init.t(); gradI.each_col() %= init; for (unsigned int j = 0; j < emission.n_rows; j++) { double tmp = 1.0; for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(j, obs(r, 0, k), r); } gradIrow(j) += tmp * beta(j, 0) / scales(0); } gradIrow = gradI * gradIrow; grad_k.subvec(countgrad, countgrad + ind.n_elem - 1) = gradIrow.rows(ind); countgrad += ind.n_elem; } if (!scales.is_finite() || !beta.is_finite()) { #pragma omp atomic error++; } else { ll += arma::sum(log(scales)); #pragma omp critical grad += grad_k; // gradmat.col(k) = grad_k; } // for (unsigned int ii = 0; ii < grad_k.n_elem; ii++) { // #pragma omp atomic // grad(ii) += grad_k(ii); // } } } if(error > 0){ ll = -arma::math::inf(); grad.fill(-arma::math::inf()); } // } else { // grad = sum(gradmat, 1); // } return List::create(Named("objective") = -ll, Named("gradient") = wrap(-grad)); }
List objectivex(const arma::mat& transition, NumericVector emissionArray, const arma::vec& init, IntegerVector obsArray, const arma::imat& ANZ, IntegerVector emissNZ, const arma::ivec& INZ, const arma::ivec& nSymbols, const arma::mat& coef, const arma::mat& X, arma::ivec& numberOfStates, int threads) { IntegerVector eDims = emissionArray.attr("dim"); //m,p,r IntegerVector oDims = obsArray.attr("dim"); //k,n,r arma::cube emission(emissionArray.begin(), eDims[0], eDims[1], eDims[2], false, true); arma::icube obs(obsArray.begin(), oDims[0], oDims[1], oDims[2], false, true); arma::icube BNZ(emissNZ.begin(), emission.n_rows, emission.n_cols - 1, emission.n_slices, false, true); unsigned int q = coef.n_rows; arma::vec grad( arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ) + (numberOfStates.n_elem- 1) * q, arma::fill::zeros); arma::mat weights = exp(X * coef).t(); if (!weights.is_finite()) { grad.fill(-arma::math::inf()); return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad)); } weights.each_row() /= sum(weights, 0); arma::mat initk(emission.n_rows, obs.n_slices); for (unsigned int k = 0; k < obs.n_slices; k++) { initk.col(k) = init % reparma(weights.col(k), numberOfStates); } arma::cube alpha(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k arma::cube beta(emission.n_rows, obs.n_cols, obs.n_slices); //m,n,k arma::mat scales(obs.n_cols, obs.n_slices); //m,n,k arma::sp_mat sp_trans(transition); internalForwardx(sp_trans.t(), emission, initk, obs, alpha, scales, threads); if (!scales.is_finite()) { grad.fill(-arma::math::inf()); return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad)); } internalBackwardx(sp_trans, emission, obs, beta, scales, threads); if (!beta.is_finite()) { grad.fill(-arma::math::inf()); return List::create(Named("objective") = arma::math::inf(), Named("gradient") = wrap(grad)); } arma::ivec cumsumstate = arma::cumsum(numberOfStates); arma::mat gradmat( arma::accu(ANZ) + arma::accu(BNZ) + arma::accu(INZ) + (numberOfStates.n_elem- 1) * q, obs.n_slices, arma::fill::zeros); #pragma omp parallel for if(obs.n_slices >= threads) schedule(static) num_threads(threads) \ default(none) shared(q, alpha, beta, scales, gradmat, nSymbols, ANZ, BNZ, INZ, \ numberOfStates, cumsumstate, obs, init, initk, X, weights, transition, emission) for (int k = 0; k < obs.n_slices; k++) { int countgrad = 0; // transitionMatrix if (arma::accu(ANZ) > 0) { for (int jj = 0; jj < numberOfStates.n_elem; jj++) { arma::vec gradArow(numberOfStates(jj)); arma::mat gradA(numberOfStates(jj), numberOfStates(jj)); int ind_jj = cumsumstate(jj) - numberOfStates(jj); for (int i = 0; i < numberOfStates(jj); i++) { arma::uvec ind = arma::find(ANZ.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1)); if (ind.n_elem > 0) { gradArow.zeros(); gradA.eye(); gradA.each_row() -= transition.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1); gradA.each_col() %= transition.row(ind_jj + i).subvec(ind_jj, cumsumstate(jj) - 1).t(); for (int j = 0; j < numberOfStates(jj); j++) { for (unsigned int t = 0; t < (obs.n_cols - 1); t++) { double tmp = alpha(ind_jj + i, t, k); for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(ind_jj + j, obs(r, t + 1, k), r); } gradArow(j) += tmp * beta(ind_jj + j, t + 1, k) / scales(t + 1, k); } } gradArow = gradA * gradArow; gradmat.col(k).subvec(countgrad, countgrad + ind.n_elem - 1) = gradArow.rows(ind); countgrad += ind.n_elem; } } } } if (arma::accu(BNZ) > 0) { // emissionMatrix for (unsigned int r = 0; r < obs.n_rows; r++) { arma::vec gradBrow(nSymbols(r)); arma::mat gradB(nSymbols(r), nSymbols(r)); for (unsigned int i = 0; i < emission.n_rows; i++) { arma::uvec ind = arma::find(BNZ.slice(r).row(i)); if (ind.n_elem > 0) { gradBrow.zeros(); gradB.eye(); gradB.each_row() -= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1); gradB.each_col() %= emission.slice(r).row(i).subvec(0, nSymbols(r) - 1).t(); for (int j = 0; j < nSymbols(r); j++) { if (obs(r, 0, k) == j) { double tmp = initk(i, k); for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) { if (r2 != r) { tmp *= emission(i, obs(r2, 0, k), r2); } } gradBrow(j) += tmp * beta(i, 0, k) / scales(0, k); } for (unsigned int t = 0; t < (obs.n_cols - 1); t++) { if (obs(r, t + 1, k) == j) { double tmp = beta(i, t + 1, k) / scales(t + 1, k); for (unsigned int r2 = 0; r2 < obs.n_rows; r2++) { if (r2 != r) { tmp *= emission(i, obs(r2, t + 1, k), r2); } } gradBrow(j) += arma::dot(alpha.slice(k).col(t), transition.col(i)) * tmp; } } } gradBrow = gradB * gradBrow; gradmat.col(k).subvec(countgrad, countgrad + ind.n_elem - 1) = gradBrow.rows(ind); countgrad += ind.n_elem; } } } } if (arma::accu(INZ) > 0) { for (int i = 0; i < numberOfStates.n_elem; i++) { int ind_i = cumsumstate(i) - numberOfStates(i); arma::uvec ind = arma::find( INZ.subvec(ind_i, cumsumstate(i) - 1)); if (ind.n_elem > 0) { arma::vec gradIrow(numberOfStates(i), arma::fill::zeros); for (int j = 0; j < numberOfStates(i); j++) { double tmp = weights(i, k); for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(ind_i + j, obs(r, 0, k), r); } gradIrow(j) += tmp * beta(ind_i + j, 0, k) / scales(0, k); } arma::mat gradI(numberOfStates(i), numberOfStates(i), arma::fill::zeros); gradI.eye(); gradI.each_row() -= init.subvec(ind_i, cumsumstate(i) - 1).t(); gradI.each_col() %= init.subvec(ind_i, cumsumstate(i) - 1); gradIrow = gradI * gradIrow; gradmat.col(k).subvec(countgrad, countgrad + ind.n_elem - 1) = gradIrow.rows(ind); countgrad += ind.n_elem; } } } for (int jj = 1; jj < numberOfStates.n_elem; jj++) { int ind_jj = (cumsumstate(jj) - numberOfStates(jj)); for (int j = 0; j < emission.n_rows; j++) { double tmp = 1.0; for (unsigned int r = 0; r < obs.n_rows; r++) { tmp *= emission(j, obs(r, 0, k), r); } if ((j >= ind_jj) & (j < cumsumstate(jj))) { gradmat.col(k).subvec(countgrad + q * (jj - 1), countgrad + q * jj - 1) += tmp * beta(j, 0, k) / scales(0, k) * initk(j, k) * X.row(k).t() * (1.0 - weights(jj, k)); } else { gradmat.col(k).subvec(countgrad + q * (jj - 1), countgrad + q * jj - 1) -= tmp * beta(j, 0, k) / scales(0, k) * initk(j, k) * X.row(k).t() * weights(jj, k); } } } } return List::create(Named("objective") = -arma::accu(log(scales)), Named("gradient") = wrap(-sum(gradmat, 1))); }
SEXP vec2mat(vector<double> &x, const int &nrow, const int &ncol) { NumericVector output = wrap(x); output.attr("dim") = Dimension(nrow, ncol); return(output); }
// [[Rcpp::export]] List MH_Sampler( int number_of_MH_itterations, int number_of_actors, int number_of_topics, int number_of_latent_dimensions, arma::vec proposal_variance, arma::vec topic_cluster_assignments, NumericVector tpec, NumericVector taec, NumericVector clp, NumericVector plp, arma::vec current_intercepts, arma::mat betas, int number_of_betas, NumericVector indicator_array, int burnin, int number_of_clusters, int store_every_x_rounds, int adaptive_metropolis_update_every, int use_adaptive_metropolis, double MH_prior_standard_deviation, int seed ){ //count up to a storage round int storage_counter = 0; //one less than the number of unique objects I want to store at the beginning of the list int list_length = 7; List to_return(list_length); int MH_Counter = 0; arma::mat store_intercepts = arma::zeros(number_of_MH_itterations/store_every_x_rounds,number_of_clusters); arma::cube store_betas = arma::zeros(number_of_clusters,number_of_betas,(number_of_MH_itterations/store_every_x_rounds)); arma::cube store_latent_positions = arma::zeros(number_of_latent_dimensions*(number_of_MH_itterations/store_every_x_rounds),number_of_clusters,number_of_actors); arma::mat store_cluster_whether_accepted = arma::zeros(number_of_MH_itterations/store_every_x_rounds,number_of_clusters); arma::mat store_cluster_proposed_likelihoods = arma::zeros(number_of_MH_itterations/store_every_x_rounds,number_of_clusters); arma::mat store_cluster_current_likelihoods = arma::zeros(number_of_MH_itterations/store_every_x_rounds,number_of_clusters); arma::vec cluster_accept= arma::zeros(number_of_clusters); arma::vec Proposed_MH_Likelihoods= arma::zeros(number_of_clusters); arma::vec Current_MH_Likelihoods= arma::zeros(number_of_clusters); //set up varaibles for adaptive metropolis int number_to_store = ceil(double(burnin)/double(adaptive_metropolis_update_every)) + 1; int adaptive_metropolis_update_counter = 0; arma::mat MH_acceptances = arma::zeros(adaptive_metropolis_update_every,number_of_clusters); arma::mat cur_accept_rates = arma::zeros(number_to_store,number_of_clusters); int reached_burnin = 0; double metropolis_target_accpet_rate = 0.20; // Set RNG and define uniform distribution //boost::mt19937_64 generator(seed); boost::mt19937 generator(seed); boost::uniform_01<double> uniform_distribution; //read in topic present edge counts array [num actors x num actors x topics] IntegerVector arrayDims1 = tpec.attr("dim"); arma::cube topic_present_edge_counts(tpec.begin(), arrayDims1[0], arrayDims1[1], arrayDims1[2], false); //read in topic absent edge counts array [num actors x num actors x topics] IntegerVector arrayDims2 = taec.attr("dim"); arma::cube topic_absent_edge_counts(taec.begin(), arrayDims2[0], arrayDims2[1], arrayDims2[2], false); //read in latent positions array [num dimensions x clusters x actors] IntegerVector arrayDims3 = clp.attr("dim"); arma::cube current_latent_positions(clp.begin(), arrayDims3[0], arrayDims3[1], arrayDims3[2], false); //read in beta indicator array (0,1) [number of topics x number of actors x number of betas] IntegerVector arrayDims5 = indicator_array.attr("dim"); arma::cube beta_indicator_array(indicator_array.begin(), arrayDims5[0], arrayDims5[1], arrayDims5[2], false); arma::vec current_author_position(number_of_latent_dimensions); arma::vec proposed_author_position(number_of_latent_dimensions); arma::vec recipient_position(number_of_latent_dimensions); IntegerVector arrayDims4 = plp.attr("dim"); arma::cube proposed_latent_positions(plp.begin(), arrayDims4[0], arrayDims4[1], arrayDims4[2], false); arma::mat proposed_betas(number_of_clusters,number_of_betas); arma::vec current_cluster_betas(number_of_betas); arma::vec proposed_cluster_betas(number_of_betas); for(int i = 0; i < number_of_MH_itterations; ++i){ //Adaptive metropolis step if(use_adaptive_metropolis == 1){ if((adaptive_metropolis_update_counter == (adaptive_metropolis_update_every -1)) & (reached_burnin == 0)){ //reset counter adaptive_metropolis_update_counter =0; for(int k = 0; k < number_of_clusters; ++k){ double num_accepted = 0; for(int n = 0; n < adaptive_metropolis_update_every; ++n){ num_accepted += MH_acceptances(n,k); MH_acceptances(n,k) = 0; } double accept_proportion = double(num_accepted)/double(adaptive_metropolis_update_every); //update record of accept rates cur_accept_rates[adaptive_metropolis_update_counter] = accept_proportion; double temp = proposal_variance[k]; //ifthe accept proportion is zero then we should not do anything if(accept_proportion > metropolis_target_accpet_rate + 0.05){ proposal_variance[k] = temp + 0.05; } if((accept_proportion < metropolis_target_accpet_rate - 0.05) &(proposal_variance[k] > 0.09 )){ proposal_variance[k] = temp - 0.05; } } Rcpp::Rcout << "Cluster Proposal Variances: " << std::endl << proposal_variance << std::endl; }// end of if statement to see if we are actually in an update iteration adaptive_metropolis_update_counter += 1; }//end of if statement for using adaptive metropolis if(i == burnin ){ //set variable that tells us we have reached burnin reached_burnin =1; } double beta_val = 0; arma::vec current_author_position = arma::zeros(number_of_latent_dimensions); arma::vec proposed_author_position = arma::zeros(number_of_latent_dimensions); arma::vec recipient_position = arma::zeros(number_of_latent_dimensions); arma::vec proposed_intercepts = arma::zeros(number_of_clusters); arma::cube proposed_latent_positions = arma::zeros(number_of_latent_dimensions,number_of_clusters,number_of_actors); arma::mat proposed_betas = arma::zeros(number_of_clusters,number_of_betas); arma::vec cluster_distances = arma::zeros(number_of_clusters); for(int k = 0; k < number_of_clusters; ++k){ //for intercepts mjd::normal_distribution<double> distribution1(current_intercepts[k],proposal_variance[k]); proposed_intercepts[k] = distribution1(generator); //for latent positions for(int a = 0; a < number_of_actors; ++a){ for(int l = 0; l < number_of_latent_dimensions; ++l){ mjd::normal_distribution<double> distribution2(current_latent_positions(l,k,a),proposal_variance[k]); proposed_latent_positions(l,k,a) = distribution2(generator); } } //for mixing parameters proposed_betas(k,0) = 0; for(int b = 1; b < number_of_betas; ++b){ mjd::normal_distribution<double> distribution3(betas(k,b),proposal_variance[k]); proposed_betas(k,b) = distribution3(generator); } }//end of loop over generating new potenttial LS positions //main loop for(int k = 0; k < number_of_clusters; ++k){ double lsm_prior_current_positions = 0; double lsm_prior_proposed_positions = 0; double standard_deviation = MH_prior_standard_deviation; double dist_center = 0; double lsm_sum_log_probability_of_current_positions = 0; double lsm_sum_log_probability_of_proposed_positions = 0; double current_cluster_intercept = current_intercepts[k]; double proposed_cluster_intercept = proposed_intercepts[k]; lsm_prior_current_positions += log(( 1 / ( standard_deviation * sqrt(2*M_PI) ) ) * exp( -0.5 * pow( (current_cluster_intercept-dist_center)/standard_deviation, 2.0 ) )); lsm_prior_proposed_positions += log(( 1 / ( standard_deviation * sqrt(2*M_PI) ) ) * exp( -0.5 * pow( (proposed_cluster_intercept-dist_center)/standard_deviation, 2.0 ) )); arma::rowvec current_cluster_betas = betas.row(k); arma::rowvec proposed_cluster_betas = proposed_betas.row(k); for(int c = 1; c < number_of_betas; ++c){ lsm_prior_current_positions += log(( 1 / ( standard_deviation * sqrt(2*M_PI) ) ) * exp( -0.5 * pow( (current_cluster_betas[c]-dist_center)/standard_deviation, 2.0 ) )); lsm_prior_proposed_positions += log(( 1 / ( standard_deviation * sqrt(2*M_PI) ) ) * exp( -0.5 * pow( (proposed_cluster_betas[c]-dist_center)/standard_deviation, 2.0 ) )); } for(int a = 0; a < number_of_actors; ++a){ for(int c = 0; c < number_of_latent_dimensions; ++c){ current_author_position[c] = current_latent_positions(c,k,a); proposed_author_position[c] = proposed_latent_positions(c,k,a); lsm_prior_current_positions += log(( 1 / ( standard_deviation * sqrt(2*M_PI) ) ) * exp( -0.5 * pow( (current_author_position[c]-dist_center)/standard_deviation, 2.0 ) )); lsm_prior_proposed_positions += log(( 1 / ( standard_deviation * sqrt(2*M_PI) ) ) * exp( -0.5 * pow( (proposed_author_position[c]-dist_center)/standard_deviation, 2.0 ) )); } for(int b = 0; b < number_of_actors; ++b){ if(b!= a){ double num_actual_edge = 0; double num_non_edge = 0; //get number of actual edge present and absent for this cluster sender reciever combo for(int t = 0; t < number_of_topics; ++t){ double topic_cluster = topic_cluster_assignments[t] -1; if(topic_cluster == k){ num_actual_edge += topic_present_edge_counts(a,b,t); num_non_edge += topic_absent_edge_counts(a,b,t); } } for(int c = 0; c < number_of_latent_dimensions; ++c){ recipient_position[c] = current_latent_positions(c,k,b); } //initialize distance double distance = 0; //calculate distance for(int j = 0; j < number_of_latent_dimensions; ++j){ distance += pow((current_author_position[j] - recipient_position[j]),2); } beta_val = 0; for(int c = 1; c < number_of_betas; ++c){ beta_val += current_cluster_betas[c]*beta_indicator_array(a,b,c); } //calculate linear predictor double eta = 0; eta = current_cluster_intercept - pow(distance,.5) + beta_val; //calculate likelihoods for both double log_prob_edge = 0; double log_prob_no_edge = 0; if (eta != 0){ if(eta < 0){ log_prob_edge = eta -log(1 + exp(eta)); log_prob_no_edge = 0 -log(1 + exp(eta)); } else{ log_prob_edge = 0 -log(1 + exp(-eta)); log_prob_no_edge = 0 -eta -log(1 + exp(-eta)); } } //multiply and add to sum lsm_sum_log_probability_of_current_positions += num_actual_edge*log_prob_edge; lsm_sum_log_probability_of_current_positions += num_non_edge*log_prob_no_edge; // ======== Now calculate for new positions ==========// //get current recipient position for(int c = 0; c < number_of_latent_dimensions; ++c){ recipient_position[c] = proposed_latent_positions(c,k,b); } //initialize distance distance = 0; //calculate distance for(int j = 0; j < number_of_latent_dimensions; ++j){ distance += pow((proposed_author_position[j] - recipient_position[j]),2); } beta_val = 0; for(int c = 1; c < number_of_betas; ++c){ beta_val += proposed_cluster_betas[c]*beta_indicator_array(a,b,c); } //calculate linear predictor eta = proposed_cluster_intercept - pow(distance,.5) + beta_val; log_prob_edge = 0; log_prob_no_edge = 0; if (eta != 0){ if(eta < 0){ log_prob_edge = eta -log(1 + exp(eta)); log_prob_no_edge = 0 -log(1 + exp(eta)); } else{ log_prob_edge = 0 -log(1 + exp(-eta)); log_prob_no_edge = 0 -eta -log(1 + exp(-eta)); } } //multiply and add to sum lsm_sum_log_probability_of_proposed_positions += num_actual_edge*log_prob_edge; lsm_sum_log_probability_of_proposed_positions += num_non_edge*log_prob_no_edge; } } } lsm_sum_log_probability_of_proposed_positions += lsm_prior_proposed_positions; lsm_sum_log_probability_of_current_positions += lsm_prior_current_positions; //now calculate log ratio between two Proposed_MH_Likelihoods[k] = lsm_sum_log_probability_of_proposed_positions; Current_MH_Likelihoods[k] = lsm_sum_log_probability_of_current_positions; double accepted = 0; double log_ratio = lsm_sum_log_probability_of_proposed_positions - lsm_sum_log_probability_of_current_positions; double rand_num = uniform_distribution(generator); double lud = log(rand_num); if(log_ratio < lud){ accepted = 0; cluster_accept[k] = 0; }else{ accepted = 1; cluster_accept[k] = 1; double tempint = proposed_intercepts[k]; current_intercepts[k] = tempint; for(int a = 0; a < number_of_actors; ++a){ for(int l = 0; l < number_of_latent_dimensions; ++l){ current_latent_positions(l,k,a) = proposed_latent_positions(l,k,a); } } for(int b = 0; b < number_of_betas; ++b){ betas(k,b) = proposed_betas(k,b); } }//end of update if((use_adaptive_metropolis == 1) & (reached_burnin == 0)){ MH_acceptances((adaptive_metropolis_update_counter -1),k) = accepted; } }//end of clusters loop storage_counter +=1; if(store_every_x_rounds == storage_counter){ Rcpp::Rcout << "Current Iteration: " << i << " of " << number_of_MH_itterations << std::endl; storage_counter =0; arma::rowvec ints(number_of_clusters); for(int b = 0; b < number_of_clusters; ++b){ ints[b] = current_intercepts[b]; } arma::mat bets(number_of_clusters,number_of_betas); for(int a = 0; a < number_of_clusters; ++a){ for(int b = 0; b < number_of_betas; ++b){ bets(a,b) = betas(a,b); } } arma::cube lat_pos = current_latent_positions; arma::rowvec cluster_accepted(number_of_clusters); arma::rowvec Cur_Proposed_MH_Likelihoods(number_of_clusters); arma::rowvec Cur_Current_MH_Likelihoods(number_of_clusters); for(int a = 0; a < number_of_clusters; ++a){ cluster_accepted[a] = cluster_accept[a]; cluster_accept[a] = (double) 0; Cur_Proposed_MH_Likelihoods[a] = Proposed_MH_Likelihoods[a]; Proposed_MH_Likelihoods[a] = (double) 0; Cur_Current_MH_Likelihoods[a] = Current_MH_Likelihoods[a]; Current_MH_Likelihoods[a] = (double) 0; } for(int b = 0; b < number_of_clusters; ++b){ //store intercepts from last round of metropolis hastings store_intercepts(MH_Counter,b) = ints[b]; for(int c = 0; c < number_of_betas; ++c){ //store betas from last round of metropolis hastings store_betas(b,c,MH_Counter) = bets(b,c); } //store whether or not the new MH proposal was accepted in the last round of MH store_cluster_whether_accepted(MH_Counter,b) = cluster_accepted[b]; //store proposed position likelihods in the last round of MH store_cluster_proposed_likelihoods(MH_Counter,b) = Cur_Proposed_MH_Likelihoods[b]; //store current position likelihoods in the last round of MH store_cluster_current_likelihoods(MH_Counter,b) = Cur_Current_MH_Likelihoods[b]; } //store latent positions from last round of metropolis hastings (will take up multiple slices per iteration depending on the number of latent dimensions) int startslice = number_of_latent_dimensions*MH_Counter; for(int a = 0; a < number_of_latent_dimensions; ++a){ for(int b = 0; b < number_of_clusters; ++b){ for(int c = 0; c < number_of_actors; ++c){ int store_position = startslice + a; store_latent_positions(store_position,b,c) = lat_pos(a,b,c); } } } MH_Counter += 1; }//end of save }// end of MH loop to_return[0] = number_of_MH_itterations/store_every_x_rounds; to_return[1] = store_cluster_proposed_likelihoods; to_return[2] = store_cluster_current_likelihoods; to_return[3] = store_cluster_whether_accepted; to_return[4] = store_intercepts; to_return[5] = store_latent_positions; to_return[6] = store_betas; return to_return; }
// [[Rcpp::export(".interp_genoprob_onechr")]] NumericVector interp_genoprob_onechr(const NumericVector& genoprob, const NumericVector& map, const IntegerVector& pos_index) { // get dimensions if(Rf_isNull(genoprob.attr("dim"))) throw std::invalid_argument("genoprob should be a 3d array but has no dim attribute"); const IntegerVector& d = genoprob.attr("dim"); if(d.size() != 3) throw std::invalid_argument("genoprob should be a 3d array"); const int n_ind = d[0]; const int n_gen = d[1]; const int matsize = n_ind * n_gen; const int n_pos = map.size(); if(pos_index.size() != n_pos) { throw std::invalid_argument("Need length(map) == length(pos_index)"); } NumericVector result(n_ind*n_gen*n_pos); result.attr("dim") = Dimension(n_ind, n_gen, n_pos); // find position to the left that has genoprobs IntegerVector left_index(n_pos); int last = -1; for(int pos=0; pos<n_pos; pos++) { if(pos_index[pos] >= 0) last = pos; left_index[pos] = last; } // find position to the right that has genoprobs IntegerVector right_index(n_pos); last = -1; for(int pos=n_pos-1; pos>=0; pos--) { if(pos_index[pos] >= 0) last = pos; right_index[pos] = last; } // copy or interpolate for(int pos=0; pos<n_pos; pos++) { if(pos_index[pos] >= 0) { // in the old genoprobs std::copy(genoprob.begin()+(pos_index[pos]*matsize), genoprob.begin()+((pos_index[pos]+1)*matsize), result.begin()+(pos*matsize)); } else { double p,q; if(left_index[pos] < 0) { // off end to left p = 0.0; q = 1.0; } else if(right_index[pos] < 0) { // off end to right p = 1.0; q = 0.0; } else { double left_pos = map[left_index[pos]]; double right_pos = map[right_index[pos]]; p = (right_pos - map[pos])/(right_pos - left_pos); q = (map[pos] - left_pos)/(right_pos - left_pos); } for(int ind=0; ind<n_ind; ind++) { for(int gen=0; gen<n_gen; gen++) { result[ind + gen*n_ind + pos*matsize] = 0.0; if(p > 0) result[ind + gen*n_ind + pos*matsize] += (p*genoprob[ind + gen*n_ind + pos_index[left_index[pos]]*matsize]); if(q > 0) result[ind + gen*n_ind + pos*matsize] += (q*genoprob[ind + gen*n_ind + pos_index[right_index[pos]]*matsize]); } } } } return result; }