Beispiel #1
0
    SEXP getDocTermMatrix(string termWeighting){
        Rcpp::List dimnms = Rcpp::List::create(extDocIDs, terms);
        if(termWeighting == "tf"){
            NumericMatrix d = Rcpp::wrap(resultsData.tfMatrix);
            d.attr("dimnames") = dimnms;
            return d;
        }else if(termWeighting == "tf_normalized"){
            arma::mat tfnorm = resultsData.tfMatrix;
            arma::rowvec docLen = arma::sum(tfnorm, 0);
            tfnorm.each_row() /= docLen;
            NumericMatrix d = Rcpp::wrap(tfnorm);
            d.attr("dimnames") = dimnms;
            return d;
        }else if(termWeighting == "tfidf"){
            arma::mat tfidfMat = resultsData.tfMatrix;
            arma::vec idf = arma::log((environment.documentCount() + 1) /
                    (resultsData.dfVector + 0.5));
            tfidfMat.each_row() %= idf.t();
            NumericMatrix d = Rcpp::wrap(tfidfMat);
            d.attr("dimnames") = dimnms;
            return d;
        }else if(termWeighting == "idf"){

        }


    }
Beispiel #2
0
//[[Rcpp::export]]
SEXP do_getcq_dense( NumericMatrix X, const IntegerVector& mcs0idx){

  List vnl = clone(List(X.attr("dimnames")));
  CharacterVector vn=vnl[0];

  int nrX = X.rows(), i, ii, j, k, l, past;
  
  IntegerVector pas( nrX ), vec_s( nrX ), vec2_s( nrX );
  IntegerVector ggg( nrX );
  
  // pas.setZero();
  for (i=0; i<nrX; ++i){
    j = mcs0idx[i];  // Rprintf("i=%d, j=%d, past=%d\n", i, j, past);
    vec_s = X(_, j );
    vec2_s = vec_s * pas ;
    past = sum( vec2_s );
    pas[i] = 1;
    ggg[ mcs0idx[i] ] = past;
  }
  
  // // cout << vec_s.transpose() << endl; cout << pas.transpose() << endl;

  IntegerVector ladder( nrX );
  for (i=0; i<nrX-1; ++i){
    if( ggg[i]+1>ggg[i+1]) ladder[i] = 1;
  }
  ladder[nrX-1]=1; //Rprintf("ladder: "); Rf_PrintValue( ladder );
  int ncq = sum( ladder );
  List cqlist(ncq);
  for (i=0; i<nrX; ++i) pas[i]=0;
  // pas.setZero();
  l=0;
  for (i=0; i<nrX; ++i){
    if (ladder[i]>0){
      j = mcs0idx[i];
      vec_s  = X(_, j );
      vec2_s = vec_s * pas;
      past = sum( vec2_s ) ;   //Rprintf("i=%d, j=%d, past=%d\n", i, j, past);
      IntegerVector cq(past+1);
      //cout << "vec2_s " << vec2_s.transpose() << endl; Rf_PrintValue( cq );
      k=0;
      for (ii=0; ii<nrX; ++ii){
	if (vec2_s[ii] != 0)
	  cq[k++] = ii;
      }
      cq[past] = j;
      CharacterVector cq2(past+1);
      for (k=0; k<past+1;++k) cq2[k]=vn[cq[k]];
      cqlist[l++] = cq2;     //Rf_PrintValue( cq );
    }
    pas[i] = 1;
  }
  return cqlist; //List::create( cqlist );
  //return List::create(1);
}
Beispiel #3
0
// use calc_resid_linreg for a 3-dim array
// [[Rcpp::export]]
NumericVector calc_resid_linreg_3d(const NumericMatrix& X, const NumericVector& P)
{
    int nrowx = X.rows();
    int sizep = P.size();

    NumericMatrix pr(nrowx, sizep/nrowx);
    std::copy(P.begin(), P.end(), pr.begin()); // FIXME I shouldn't need to copy

    NumericMatrix result = calc_resid_linreg(X, pr);
    result.attr("dim") = P.attr("dim");

    return result;
}
Beispiel #4
0
// use calc_resid_linreg for a 3-dim array
// [[Rcpp::export]]
NumericVector calc_resid_linreg_3d(const NumericMatrix& X, const NumericVector& P,
                                   const double tol=1e-12)
{
    const unsigned int nrowx = X.rows();
    const Dimension d = P.attr("dim");
    if(d[0] != nrowx)
        throw std::range_error("nrow(X) != nrow(P)");

    NumericMatrix pr(nrowx, d[1]*d[2]);
    std::copy(P.begin(), P.end(), pr.begin()); // FIXME I shouldn't need to copy

    NumericMatrix result = calc_resid_eigenqr(X, pr, tol);
    result.attr("dim") = d;

    return result;
}
Beispiel #5
0
// use calc_resid_linreg for a 3-dim array
// [[Rcpp::export]]
NumericVector calc_resid_linreg_3d(const NumericMatrix& X, const NumericVector& P,
                                   const double tol=1e-12)
{
    const int nrowx = X.rows();
    if(Rf_isNull(P.attr("dim")))
        throw std::invalid_argument("P should be a 3d array but has no dim attribute");
    const Dimension d = P.attr("dim");
    if(d.size() != 3)
        throw std::invalid_argument("P should be a 3d array");
    if(d[0] != nrowx)
        throw std::range_error("nrow(X) != nrow(P)");

    NumericMatrix pr(nrowx, d[1]*d[2]);
    std::copy(P.begin(), P.end(), pr.begin()); // FIXME I shouldn't need to copy

    NumericMatrix result = calc_resid_eigenqr(X, pr, tol);
    result.attr("dim") = d;

    return result;
}
Beispiel #6
0
// seqDist
// [[Rcpp::export]]
double seqDistRcpp(std::string seq1, std::string seq2, 
                   NumericMatrix dist_mat) {

    // Check that seq1 and seq2 have same length
    int len_seq1 = seq1.length();
    int len_seq2 = seq2.length();
    
    if (len_seq1 != len_seq2) {
        throw std::range_error("Sequences of different length.");  
    }
    
    int len_seqs = len_seq1;
    
    List dist_mat_dims = dist_mat.attr("dimnames");
    //print (dist_mat_dims);
    CharacterVector dist_mat_rownames = dist_mat_dims[0];
    CharacterVector dist_mat_colnames = dist_mat_dims[1];
    int num_rows = dist_mat_rownames.size();
    int num_cols = dist_mat_colnames.size();
    
    List row_key_idx;
    List col_key_idx;
    
    std::map<std::string, int> rows_map;
    std::map<std::string, int> cols_map;
    
    for (int i = 0; i < num_rows; i++)
    {
        //const char *this_col = dist_mat_colnames[i].c_str();
        std::string this_row = as<std::string>(dist_mat_rownames[i]);
        rows_map[this_row] = i;
    }  
    
    for (int i = 0; i < num_cols; i++)
    {
        //const char *this_col = dist_mat_colnames[i].c_str();
        std::string this_col = as<std::string>(dist_mat_colnames[i]);
        cols_map[this_col] = i;
    } 
    
    int d_seen = 0;
    int indels = 0;
    // sum(d[d>0])
    double d_sum = 0;
    
    for (int i = 0; i < len_seqs; i++)
    {
        // find row index
        int row_idx;
        char row_char = (char)seq1[i];
        std::string row_string;
        row_string+=row_char;
        auto search_row = rows_map.find(row_string);
        if(search_row != rows_map.end()) {
            row_idx = search_row->second;
        }
        else {
            throw std::range_error("Character not found in dist_mat.");  
        }
        
        // find col index
        int col_idx;
        char col_char = (char)seq2[i];
        std::string col_string;
        col_string+=col_char;
        auto search_col = cols_map.find(col_string);
        if(search_col != cols_map.end()) {
            col_idx = search_col->second;
        }
        else {
            throw std::range_error("Character not found in dist_mat.");  
        }    
        
        // distance for current i
        double d_i = dist_mat(row_idx, col_idx);
        
        if (d_i > 0){
            // Sum distance
            d_sum = d_sum + d_i;
        } 
        else if ( (d_i == -1 ) &  (d_seen != -1) )
        {
            // Count indel
            indels++;
        }  
        d_seen = d_i;
    }
    
    double distance = d_sum + indels;
    return (distance);
}
Beispiel #7
0
// [[Rcpp::export]]
Rcpp::List userRMA(const NumericMatrix x2, 
                   Rcpp::List probesets,
                   CharacterVector colnames, 
                   NumericVector quantile, 
                   NumericVector alpha) {
  
  NumericMatrix x = clone(x2);
  CharacterVector probesets2 = probesets.names();
  
  int n = x.nrow();
  int m = x.ncol();
  int p = probesets2.size();
  
  NumericMatrix xnorm(n,m);
  
  arma::rowvec cdelta(m, arma::fill::none);
   
  IntegerVector match2 = seq_len(n);
  IntegerVector h5 = seq_len(n);
  IntegerVector h3 = seq_len(n);
  IntegerVector revh3 = clone<IntegerVector>(h3);
  std::reverse(revh3.begin(), revh3.end());
  
  NumericMatrix xsum (p, m);
  Rcpp::List dimns = List::create(probesets2, colnames);
  xsum.attr("dimnames") = dimns;
  
  NumericMatrix xsort(n,m);
  NumericVector h1 (p);  
  
  for (int j = 0; j < m; ++j) {
    h1 = x(_, j);
    h1 = h1.sort();
    
    NumericVector revh1 = clone<NumericVector>(h1);
    std::reverse(revh1.begin(), revh1.end());
    
    NumericVector revx = clone<NumericVector>(x(_, j));
    std::reverse(revx.begin(), revx.end());
    
    match2 = match(revx, revh1);
    
    IntegerVector revmatch2 = clone<IntegerVector>(match2);
    std::reverse(revmatch2.begin(), revmatch2.end());
    
    for (int i = 0; i < n; ++i) {
      xsort(i, j) = quantile(revh3(revmatch2(i)-1)-1);
    }
  } 
    
  for (int j = 0; j < m; ++j) {
    xnorm(_,j) =  log(xsort(_, j))/log(2) - alpha;
  }
  
  double count = 0;
  double n2 = 0;
  for (int l = 0; l < p; ++l) {
    CharacterVector probes = probesets(l);
    n2 = probes.size();
    
    IntegerVector x5(n2);
    x5 = seq(count, count + probes.size());
    count += n2;
    
    arma::mat x3(n2, m);
    
    for (int j = 0; j < n2; ++j) {
      cdelta =xnorm(x5(j), _);
        x3(j, arma::span::all) = cdelta;
    }    
    
    cdelta = arma::median(x3, 0);
    
    for (int j = 0; j < m; ++j) {
      xsum(l, j) = cdelta(j);
    } 
    
  }
  
  return List::create(Named("exprs") = xsum);       
}