SEXP getDocTermMatrix(string termWeighting){ Rcpp::List dimnms = Rcpp::List::create(extDocIDs, terms); if(termWeighting == "tf"){ NumericMatrix d = Rcpp::wrap(resultsData.tfMatrix); d.attr("dimnames") = dimnms; return d; }else if(termWeighting == "tf_normalized"){ arma::mat tfnorm = resultsData.tfMatrix; arma::rowvec docLen = arma::sum(tfnorm, 0); tfnorm.each_row() /= docLen; NumericMatrix d = Rcpp::wrap(tfnorm); d.attr("dimnames") = dimnms; return d; }else if(termWeighting == "tfidf"){ arma::mat tfidfMat = resultsData.tfMatrix; arma::vec idf = arma::log((environment.documentCount() + 1) / (resultsData.dfVector + 0.5)); tfidfMat.each_row() %= idf.t(); NumericMatrix d = Rcpp::wrap(tfidfMat); d.attr("dimnames") = dimnms; return d; }else if(termWeighting == "idf"){ } }
//[[Rcpp::export]] SEXP do_getcq_dense( NumericMatrix X, const IntegerVector& mcs0idx){ List vnl = clone(List(X.attr("dimnames"))); CharacterVector vn=vnl[0]; int nrX = X.rows(), i, ii, j, k, l, past; IntegerVector pas( nrX ), vec_s( nrX ), vec2_s( nrX ); IntegerVector ggg( nrX ); // pas.setZero(); for (i=0; i<nrX; ++i){ j = mcs0idx[i]; // Rprintf("i=%d, j=%d, past=%d\n", i, j, past); vec_s = X(_, j ); vec2_s = vec_s * pas ; past = sum( vec2_s ); pas[i] = 1; ggg[ mcs0idx[i] ] = past; } // // cout << vec_s.transpose() << endl; cout << pas.transpose() << endl; IntegerVector ladder( nrX ); for (i=0; i<nrX-1; ++i){ if( ggg[i]+1>ggg[i+1]) ladder[i] = 1; } ladder[nrX-1]=1; //Rprintf("ladder: "); Rf_PrintValue( ladder ); int ncq = sum( ladder ); List cqlist(ncq); for (i=0; i<nrX; ++i) pas[i]=0; // pas.setZero(); l=0; for (i=0; i<nrX; ++i){ if (ladder[i]>0){ j = mcs0idx[i]; vec_s = X(_, j ); vec2_s = vec_s * pas; past = sum( vec2_s ) ; //Rprintf("i=%d, j=%d, past=%d\n", i, j, past); IntegerVector cq(past+1); //cout << "vec2_s " << vec2_s.transpose() << endl; Rf_PrintValue( cq ); k=0; for (ii=0; ii<nrX; ++ii){ if (vec2_s[ii] != 0) cq[k++] = ii; } cq[past] = j; CharacterVector cq2(past+1); for (k=0; k<past+1;++k) cq2[k]=vn[cq[k]]; cqlist[l++] = cq2; //Rf_PrintValue( cq ); } pas[i] = 1; } return cqlist; //List::create( cqlist ); //return List::create(1); }
// use calc_resid_linreg for a 3-dim array // [[Rcpp::export]] NumericVector calc_resid_linreg_3d(const NumericMatrix& X, const NumericVector& P) { int nrowx = X.rows(); int sizep = P.size(); NumericMatrix pr(nrowx, sizep/nrowx); std::copy(P.begin(), P.end(), pr.begin()); // FIXME I shouldn't need to copy NumericMatrix result = calc_resid_linreg(X, pr); result.attr("dim") = P.attr("dim"); return result; }
// use calc_resid_linreg for a 3-dim array // [[Rcpp::export]] NumericVector calc_resid_linreg_3d(const NumericMatrix& X, const NumericVector& P, const double tol=1e-12) { const unsigned int nrowx = X.rows(); const Dimension d = P.attr("dim"); if(d[0] != nrowx) throw std::range_error("nrow(X) != nrow(P)"); NumericMatrix pr(nrowx, d[1]*d[2]); std::copy(P.begin(), P.end(), pr.begin()); // FIXME I shouldn't need to copy NumericMatrix result = calc_resid_eigenqr(X, pr, tol); result.attr("dim") = d; return result; }
// use calc_resid_linreg for a 3-dim array // [[Rcpp::export]] NumericVector calc_resid_linreg_3d(const NumericMatrix& X, const NumericVector& P, const double tol=1e-12) { const int nrowx = X.rows(); if(Rf_isNull(P.attr("dim"))) throw std::invalid_argument("P should be a 3d array but has no dim attribute"); const Dimension d = P.attr("dim"); if(d.size() != 3) throw std::invalid_argument("P should be a 3d array"); if(d[0] != nrowx) throw std::range_error("nrow(X) != nrow(P)"); NumericMatrix pr(nrowx, d[1]*d[2]); std::copy(P.begin(), P.end(), pr.begin()); // FIXME I shouldn't need to copy NumericMatrix result = calc_resid_eigenqr(X, pr, tol); result.attr("dim") = d; return result; }
// seqDist // [[Rcpp::export]] double seqDistRcpp(std::string seq1, std::string seq2, NumericMatrix dist_mat) { // Check that seq1 and seq2 have same length int len_seq1 = seq1.length(); int len_seq2 = seq2.length(); if (len_seq1 != len_seq2) { throw std::range_error("Sequences of different length."); } int len_seqs = len_seq1; List dist_mat_dims = dist_mat.attr("dimnames"); //print (dist_mat_dims); CharacterVector dist_mat_rownames = dist_mat_dims[0]; CharacterVector dist_mat_colnames = dist_mat_dims[1]; int num_rows = dist_mat_rownames.size(); int num_cols = dist_mat_colnames.size(); List row_key_idx; List col_key_idx; std::map<std::string, int> rows_map; std::map<std::string, int> cols_map; for (int i = 0; i < num_rows; i++) { //const char *this_col = dist_mat_colnames[i].c_str(); std::string this_row = as<std::string>(dist_mat_rownames[i]); rows_map[this_row] = i; } for (int i = 0; i < num_cols; i++) { //const char *this_col = dist_mat_colnames[i].c_str(); std::string this_col = as<std::string>(dist_mat_colnames[i]); cols_map[this_col] = i; } int d_seen = 0; int indels = 0; // sum(d[d>0]) double d_sum = 0; for (int i = 0; i < len_seqs; i++) { // find row index int row_idx; char row_char = (char)seq1[i]; std::string row_string; row_string+=row_char; auto search_row = rows_map.find(row_string); if(search_row != rows_map.end()) { row_idx = search_row->second; } else { throw std::range_error("Character not found in dist_mat."); } // find col index int col_idx; char col_char = (char)seq2[i]; std::string col_string; col_string+=col_char; auto search_col = cols_map.find(col_string); if(search_col != cols_map.end()) { col_idx = search_col->second; } else { throw std::range_error("Character not found in dist_mat."); } // distance for current i double d_i = dist_mat(row_idx, col_idx); if (d_i > 0){ // Sum distance d_sum = d_sum + d_i; } else if ( (d_i == -1 ) & (d_seen != -1) ) { // Count indel indels++; } d_seen = d_i; } double distance = d_sum + indels; return (distance); }
// [[Rcpp::export]] Rcpp::List userRMA(const NumericMatrix x2, Rcpp::List probesets, CharacterVector colnames, NumericVector quantile, NumericVector alpha) { NumericMatrix x = clone(x2); CharacterVector probesets2 = probesets.names(); int n = x.nrow(); int m = x.ncol(); int p = probesets2.size(); NumericMatrix xnorm(n,m); arma::rowvec cdelta(m, arma::fill::none); IntegerVector match2 = seq_len(n); IntegerVector h5 = seq_len(n); IntegerVector h3 = seq_len(n); IntegerVector revh3 = clone<IntegerVector>(h3); std::reverse(revh3.begin(), revh3.end()); NumericMatrix xsum (p, m); Rcpp::List dimns = List::create(probesets2, colnames); xsum.attr("dimnames") = dimns; NumericMatrix xsort(n,m); NumericVector h1 (p); for (int j = 0; j < m; ++j) { h1 = x(_, j); h1 = h1.sort(); NumericVector revh1 = clone<NumericVector>(h1); std::reverse(revh1.begin(), revh1.end()); NumericVector revx = clone<NumericVector>(x(_, j)); std::reverse(revx.begin(), revx.end()); match2 = match(revx, revh1); IntegerVector revmatch2 = clone<IntegerVector>(match2); std::reverse(revmatch2.begin(), revmatch2.end()); for (int i = 0; i < n; ++i) { xsort(i, j) = quantile(revh3(revmatch2(i)-1)-1); } } for (int j = 0; j < m; ++j) { xnorm(_,j) = log(xsort(_, j))/log(2) - alpha; } double count = 0; double n2 = 0; for (int l = 0; l < p; ++l) { CharacterVector probes = probesets(l); n2 = probes.size(); IntegerVector x5(n2); x5 = seq(count, count + probes.size()); count += n2; arma::mat x3(n2, m); for (int j = 0; j < n2; ++j) { cdelta =xnorm(x5(j), _); x3(j, arma::span::all) = cdelta; } cdelta = arma::median(x3, 0); for (int j = 0; j < m; ++j) { xsum(l, j) = cdelta(j); } } return List::create(Named("exprs") = xsum); }