//' @export // [[Rcpp::export]] arma::vec th(Rcpp::StringVector strata_rsid, Rcpp::StringVector rsid, Rcpp::NumericVector r2){ // Inputs: // // strata_rsid is the overall index of rsid returned from stratify() // rsid is the rsid column from LdList // r2 is the r2 column from LdList // Create the output vector of th numbers arma::vec th(strata_rsid.size(), fill::zeros); // Find the indices of where the rsid are in strata_rsid for(int i = 0; i < rsid.size(); i++){ // Pull out each of the RSIDs successively // std::string id = rsid(i); // Find the index in strata_rsid where the RSID string is // and fill it in with the r2 at the given id. // th( arma::find( strata_rsid == rsid(i) ) ) = r2(i); } // Create duplicate vector to fill in discretized values arma::vec out = th; out.elem( find( th >= 0.2 ) ).fill(0.2); out.elem( find( th >= 0.4 ) ).fill(0.4); out.elem( find( th >= 0.6 ) ).fill(0.6); out.elem( find( th >= 0.8 ) ).fill(0.8); out.elem( find( th >= 0.9 ) ).fill(0.9); out.elem( find( th >= 1 ) ).fill(1); return out; }
Rcpp::StringMatrix DataFrame_to_StringMatrix( Rcpp::DataFrame df ){ Rcpp::StringVector sv = df(0); Rcpp::StringMatrix sm(sv.size(), df.size()); sm.attr("col.names") = df.attr("col.names"); sm.attr("row.names") = df.attr("row.names"); for(int i=0; i < df.size(); i++){ sv = df(i); for(int j=0; j < sv.size(); j++){ sm(j, i) = sv(j); } } return sm; }
void TabEditor::displayEditor(RCore *rExe,std::string name, DataEditor *dataEditor, VariableEditor *variableEditor){ qDebug("Display Editor"); rExe->diplayData(name); //----Update dataEditor->setVarTypes(rExe->getVarTypes()); QStringList varNames; Rcpp::StringVector sv = rExe->getColNames(); for(int i = 0; i < sv.size();i++){ varNames.push_back(QString(sv[i])); } dataEditor->setVarNames(varNames); dataEditor->loadData(rExe->getDataFrame(),rExe->getColNames()); //--- variableEditor->loadVariable(rExe,rExe->getColNames()); addTab(dataEditor,"Data"); addTab(variableEditor, "Variable"); setTabPosition(West); //Update 30 Juni connect(this, SIGNAL(currentChanged(int)), variableEditor, SLOT(checkWidgetVisibility())); }
// [[Rcpp::export]] Rcpp::NumericMatrix infoContentMethod_cpp( Rcpp::StringVector& id1_, Rcpp::StringVector& id2_, Rcpp::List& anc_, Rcpp::NumericVector& ic_, const std::string& method_, const std::string& ont_ ) { go_dist_func_t* go_dist; // Resnik does not consider how distant the terms are from their common ancestor. // Lin and Jiang take that distance into account. if (method_ == "Resnik") { go_dist = &go_dist_Resnik; } else if (method_ == "Lin") { go_dist = &go_dist_Lin; } else if (method_ == "Jiang") { go_dist = &go_dist_Jiang; } else if (method_ == "Rel") { go_dist = &go_dist_Rel; } else { throw std::runtime_error( "Unknown GO distance method" ); } typedef std::string term_id_t; typedef std::set<term_id_t> term_set_t; // calculate the maximum IC and build the map of normalized IC typedef std::map<term_id_t, double> ic_map_t; ic_map_t normIcMap; // more specific term, larger IC value. // Normalized, all divide the most informative IC. // all IC values range from 0(root node) to 1(most specific node) double mic = NA_REAL; { Rcpp::StringVector icNames( ic_.names() ); for (std::size_t i=0; i < ic_.size(); i++ ) { const double cic = ic_[i]; if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue; if ( Rcpp::NumericVector::is_na( mic ) || mic < cic ) mic = cic; } LOG_DEBUG( "mic=" << mic ); for (std::size_t i=0; i < ic_.size(); i++ ) { const double cic = ic_[i]; if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue; normIcMap.insert( std::make_pair( (std::string) icNames[i], cic / mic ) ); } } // set root node IC to 0 if(ont_ == "DO") { normIcMap["DOID:4"] = 0; } else { normIcMap["all"] = 0; } // convert anc_ into map of sets typedef std::map<term_id_t, term_set_t> anc_map_t; anc_map_t ancMap; { Rcpp::StringVector goTerms( anc_.names() ); for (std::size_t i=0; i < anc_.size(); i++ ) { const std::vector<std::string> ancVec = Rcpp::as<std::vector<std::string> >( anc_[i] ); term_set_t ancestors( ancVec.begin(), ancVec.end() ); // term itself is also considered an ancestor ancestors.insert( (std::string)goTerms[i] ); ancMap.insert( std::make_pair( (std::string) goTerms[i], ancestors ) ); } } Rcpp::NumericMatrix res( id1_.size(), id2_.size() ); res.attr("dimnames") = Rcpp::Rcpp_list2( id1_, id2_ ); for ( std::size_t i = 0; i < id1_.size(); i++ ) { const std::string id1_term = (std::string)id1_[i]; const ic_map_t::const_iterator iIcIt = normIcMap.find( id1_term ); if ( iIcIt != normIcMap.end() && iIcIt->second != 0 ) { const double iIc = iIcIt->second; LOG_DEBUG( "ic[" << id1_term << "]=" << iIc ); const anc_map_t::const_iterator iAncsIt = ancMap.find( id1_term ); for ( std::size_t j = 0; j < id2_.size(); j++ ) { const std::string id2_term = (std::string)id2_[j]; const ic_map_t::const_iterator jIcIt = normIcMap.find( id2_term ); if ( jIcIt != normIcMap.end() && jIcIt->second != 0 ) { const anc_map_t::const_iterator jAncsIt = ancMap.find( id2_term ); // find common ancestors term_set_t commonAncs; if ( iAncsIt != ancMap.end() && jAncsIt != ancMap.end() ) { std::set_intersection( iAncsIt->second.begin(), iAncsIt->second.end(), jAncsIt->second.begin(), jAncsIt->second.end(), std::inserter( commonAncs, commonAncs.end() ) ); } LOG_DEBUG( "n(commonAncs(" << id1_term << "," << id2_term << "))=" << commonAncs.size() ); // Information Content of the most informative common ancestor (MICA) double mica = 0; for ( term_set_t::const_iterator termIt = commonAncs.begin(); termIt != commonAncs.end(); ++termIt ) { ic_map_t::const_iterator ancIcIt = normIcMap.find( *termIt ); if ( ancIcIt != normIcMap.end() && mica < ancIcIt->second ) mica = ancIcIt->second; } LOG_DEBUG( "mica(" << id1_term << "," << id2_term << ")=" << mica ); res(i,j) = go_dist( mica, iIc, jIcIt->second, mic ); } else { res(i,j) = NA_REAL; } } } else { for ( std::size_t j = 0; j < id2_.size(); j++ ) { res(i,j) = NA_REAL; } } } return ( res ); }
// [[Rcpp::export]] void write_vcf_body_gz( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) { // http://stackoverflow.com/a/5649224 // fix DataFrame Rcpp::StringVector chrom = fix["CHROM"]; Rcpp::StringVector pos = fix["POS"]; Rcpp::StringVector id = fix["ID"]; Rcpp::StringVector ref = fix["REF"]; Rcpp::StringVector alt = fix["ALT"]; Rcpp::StringVector qual = fix["QUAL"]; Rcpp::StringVector filter = fix["FILTER"]; Rcpp::StringVector info = fix["INFO"]; // gt DataFrame Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt); Rcpp::StringVector column_names(gt.size()); column_names = gt.attr("names"); int i = 0; int j = 0; gzFile *fi = (gzFile *)gzopen(filename.c_str(),"ab"); // gzFile *fi = (gzFile *)gzopen(filename.c_str(),"abw"); for(i=0; i<chrom.size(); i++){ Rcpp::checkUserInterrupt(); if(mask == 1 && filter(i) != "PASS" ){ // Don't print variant. } else { std::string tmpstring; tmpstring = chrom(i); tmpstring = tmpstring + "\t" + pos(i) + "\t"; if(id(i) == NA_STRING){ tmpstring = tmpstring + "."; } else { tmpstring = tmpstring + id(i); } tmpstring = tmpstring + "\t" + ref(i) + "\t" + alt(i) + "\t"; if(qual(i) == NA_STRING){ tmpstring = tmpstring + "." + "\t"; } else { tmpstring = tmpstring + qual(i) + "\t"; } if(filter(i) == NA_STRING){ tmpstring = tmpstring + "." + "\t"; } else { tmpstring = tmpstring + filter(i) + "\t"; } tmpstring = tmpstring + info(i); // gt portion for(j=0; j<column_names.size(); j++){ if(gt_cm(i, j) == NA_STRING){ tmpstring = tmpstring + "\t" + "./."; } else { tmpstring = tmpstring + "\t" + gt_cm(i, j); } } // gzwrite(fi,"my decompressed data",strlen("my decompressed data")); // gzwrite(fi,"\n",strlen("\n")); // std::string tmpstring = "test string\n"; gzwrite(fi, (char *)tmpstring.c_str(), tmpstring.size()); gzwrite(fi,"\n",strlen("\n")); } } gzclose(fi); return; }
// [[Rcpp::export]] void write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) { //int write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) { // fix DataFrame Rcpp::StringVector chrom = fix["CHROM"]; Rcpp::StringVector pos = fix["POS"]; Rcpp::StringVector id = fix["ID"]; Rcpp::StringVector ref = fix["REF"]; Rcpp::StringVector alt = fix["ALT"]; Rcpp::StringVector qual = fix["QUAL"]; Rcpp::StringVector filter = fix["FILTER"]; Rcpp::StringVector info = fix["INFO"]; // gt DataFrame Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt); Rcpp::StringVector column_names(gt.size()); column_names = gt.attr("names"); // column_names = gt_cm.attr("col.names"); // delete gt; int i = 0; int j = 0; // Uncompressed. std::ofstream myfile; myfile.open (filename.c_str(), std::ios::out | std::ios::app | std::ios::binary); // gzFile *fi = (gzFile *)gzopen("file.gz","wb"); for(i=0; i<chrom.size(); i++){ Rcpp::checkUserInterrupt(); if(mask == 1 && filter(i) == "PASS" ){ // Don't print variant. } else { myfile << chrom(i); myfile << "\t"; myfile << pos(i); myfile << "\t"; if(id(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << id(i); myfile << "\t"; } myfile << ref(i); myfile << "\t"; myfile << alt(i); myfile << "\t"; if(qual(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << qual(i); myfile << "\t"; } if(filter(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << filter(i); myfile << "\t"; } if(info(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << info(i); } // gt region. myfile << "\t"; myfile << gt_cm(i, 0); for(j=1; j<column_names.size(); j++){ myfile << "\t"; myfile << gt_cm(i, j); } myfile << "\n"; } } myfile.close(); return; }
//' rcpp_lines_as_network //' //' Return OSM data in Simple Features format //' //' @param sf_lines An sf collection of LINESTRING objects //' @param pr Rcpp::DataFrame containing the weighting profile //' //' @return Rcpp::List objects of OSM data //' //' @noRd // [[Rcpp::export]] Rcpp::List rcpp_lines_as_network (const Rcpp::List &sf_lines, Rcpp::DataFrame pr) { std::map <std::string, float> profile; Rcpp::StringVector hw = pr [1]; Rcpp::NumericVector val = pr [2]; for (int i = 0; i != hw.size (); i ++) profile.insert (std::make_pair (std::string (hw [i]), val [i])); Rcpp::CharacterVector nms = sf_lines.attr ("names"); if (nms [nms.size () - 1] != "geometry") throw std::runtime_error ("sf_lines have no geometry component"); if (nms [0] != "osm_id") throw std::runtime_error ("sf_lines have no osm_id component"); int one_way_index = -1; int one_way_bicycle_index = -1; int highway_index = -1; for (int i = 0; i < nms.size (); i++) { if (nms [i] == "oneway") one_way_index = i; if (nms [i] == "oneway.bicycle") one_way_bicycle_index = i; if (nms [i] == "highway") highway_index = i; } Rcpp::CharacterVector ow = NULL; Rcpp::CharacterVector owb = NULL; Rcpp::CharacterVector highway = NULL; if (one_way_index >= 0) ow = sf_lines [one_way_index]; if (one_way_bicycle_index >= 0) owb = sf_lines [one_way_bicycle_index]; if (highway_index >= 0) highway = sf_lines [highway_index]; if (ow.size () > 0) { if (ow.size () == owb.size ()) { for (unsigned i = 0; i != ow.size (); ++ i) if (ow [i] == "NA" && owb [i] != "NA") ow [i] = owb [i]; } else if (owb.size () > ow.size ()) ow = owb; } Rcpp::List geoms = sf_lines [nms.size () - 1]; std::vector<bool> isOneWay (geoms.length ()); std::fill (isOneWay.begin (), isOneWay.end (), false); // Get dimension of matrix size_t nrows = 0; int ngeoms = 0; for (auto g = geoms.begin (); g != geoms.end (); ++g) { // Rcpp uses an internal proxy iterator here, NOT a direct copy Rcpp::NumericMatrix gi = (*g); int rows = gi.nrow () - 1; nrows += rows; if (ngeoms < ow.size ()) { if (!(ow [ngeoms] == "yes" || ow [ngeoms] == "-1")) { nrows += rows; isOneWay [ngeoms] = true; } } ngeoms ++; } Rcpp::NumericMatrix nmat = Rcpp::NumericMatrix (Rcpp::Dimension (nrows, 6)); Rcpp::CharacterMatrix idmat = Rcpp::CharacterMatrix (Rcpp::Dimension (nrows, 3)); nrows = 0; ngeoms = 0; int fake_id = 0; for (auto g = geoms.begin (); g != geoms.end (); ++ g) { Rcpp::NumericMatrix gi = (*g); std::string hway = std::string (highway [ngeoms]); float hw_factor = profile [hway]; if (hw_factor == 0.0) hw_factor = 1e-5; hw_factor = 1.0 / hw_factor; Rcpp::List ginames = gi.attr ("dimnames"); Rcpp::CharacterVector rnms; if (ginames.length () > 0) rnms = ginames [0]; else { rnms = Rcpp::CharacterVector (gi.nrow ()); for (int i = 0; i < gi.nrow (); i ++) rnms [i] = fake_id ++; } if (rnms.size () != gi.nrow ()) throw std::runtime_error ("geom size differs from rownames"); for (int i = 1; i < gi.nrow (); i ++) { float d = haversine (gi (i-1, 0), gi (i-1, 1), gi (i, 0), gi (i, 1)); nmat (nrows, 0) = gi (i-1, 0); nmat (nrows, 1) = gi (i-1, 1); nmat (nrows, 2) = gi (i, 0); nmat (nrows, 3) = gi (i, 1); nmat (nrows, 4) = d; nmat (nrows, 5) = d * hw_factor; idmat (nrows, 0) = rnms (i-1); idmat (nrows, 1) = rnms (i); idmat (nrows, 2) = hway; nrows ++; if (isOneWay [ngeoms]) { nmat (nrows, 0) = gi (i, 0); nmat (nrows, 1) = gi (i, 1); nmat (nrows, 2) = gi (i-1, 0); nmat (nrows, 3) = gi (i-1, 1); nmat (nrows, 4) = d; nmat (nrows, 5) = d * hw_factor; idmat (nrows, 0) = rnms (i); idmat (nrows, 1) = rnms (i-1); idmat (nrows, 2) = hway; nrows ++; } } ngeoms ++; } Rcpp::List res (2); res [0] = nmat; res [1] = idmat; return res; }