//' @export // [[Rcpp::export]] arma::vec th(Rcpp::StringVector strata_rsid, Rcpp::StringVector rsid, Rcpp::NumericVector r2){ // Inputs: // // strata_rsid is the overall index of rsid returned from stratify() // rsid is the rsid column from LdList // r2 is the r2 column from LdList // Create the output vector of th numbers arma::vec th(strata_rsid.size(), fill::zeros); // Find the indices of where the rsid are in strata_rsid for(int i = 0; i < rsid.size(); i++){ // Pull out each of the RSIDs successively // std::string id = rsid(i); // Find the index in strata_rsid where the RSID string is // and fill it in with the r2 at the given id. // th( arma::find( strata_rsid == rsid(i) ) ) = r2(i); } // Create duplicate vector to fill in discretized values arma::vec out = th; out.elem( find( th >= 0.2 ) ).fill(0.2); out.elem( find( th >= 0.4 ) ).fill(0.4); out.elem( find( th >= 0.6 ) ).fill(0.6); out.elem( find( th >= 0.8 ) ).fill(0.8); out.elem( find( th >= 0.9 ) ).fill(0.9); out.elem( find( th >= 1 ) ).fill(1); return out; }
void graph_from_df (Rcpp::DataFrame gr, vertex_map_t &vm, edge_map_t &edge_map, vert2edge_map_t &vert2edge_map) { Rcpp::StringVector from = gr ["from_id"]; Rcpp::StringVector to = gr ["to_id"]; Rcpp::NumericVector from_lon = gr ["from_lon"]; Rcpp::NumericVector from_lat = gr ["from_lat"]; Rcpp::NumericVector to_lon = gr ["to_lon"]; Rcpp::NumericVector to_lat = gr ["to_lat"]; Rcpp::NumericVector edge_id = gr ["edge_id"]; Rcpp::NumericVector dist = gr ["d"]; Rcpp::NumericVector weight = gr ["d_weighted"]; Rcpp::StringVector hw = gr ["highway"]; for (int i = 0; i < to.length (); i ++) { osm_id_t from_id = std::string (from [i]); osm_id_t to_id = std::string (to [i]); if (vm.find (from_id) == vm.end ()) { osm_vertex_t fromV = osm_vertex_t (); fromV.set_lat (from_lat [i]); fromV.set_lon (from_lon [i]); vm.emplace (from_id, fromV); } osm_vertex_t from_vtx = vm.at (from_id); from_vtx.add_neighbour_out (to_id); vm [from_id] = from_vtx; if (vm.find (to_id) == vm.end ()) { osm_vertex_t toV = osm_vertex_t (); toV.set_lat (to_lat [i]); toV.set_lon (to_lon [i]); vm.emplace (to_id, toV); } osm_vertex_t to_vtx = vm.at (to_id); to_vtx.add_neighbour_in (from_id); vm [to_id] = to_vtx; std::set <int> replacementEdges; osm_edge_t edge = osm_edge_t (from_id, to_id, dist [i], weight [i], std::string (hw [i]), edge_id [i], replacementEdges); edge_map.emplace (edge_id [i], edge); add_to_edge_map (vert2edge_map, from_id, edge_id [i]); add_to_edge_map (vert2edge_map, to_id, edge_id [i]); } }
RcppExport SEXP test_cpp(SEXP a, SEXP b) { Rcpp::NumericVector xa(a); Rcpp::NumericVector xb(b); // Rcpp::StringVector aaa = "deine mudder"; Rcpp::StringVector aaa = a; aaa.push_back("1") ; int n_xa = xa.size(), n_xb = xb.size(); int nab = n_xa + n_xb - 1; Rcpp::NumericVector xab(nab); for (int i = 0; i < n_xa; i++) for (int j = 0; j < n_xb; j++) xab[i + j] += xa[i] * xb[j]; return aaa ; return xab; }
Rcpp::StringMatrix DataFrame_to_StringMatrix( Rcpp::DataFrame df ){ Rcpp::StringVector sv = df(0); Rcpp::StringMatrix sm(sv.size(), df.size()); sm.attr("col.names") = df.attr("col.names"); sm.attr("row.names") = df.attr("row.names"); for(int i=0; i < df.size(); i++){ sv = df(i); for(int j=0; j < sv.size(); j++){ sm(j, i) = sv(j); } } return sm; }
void TabEditor::displayEditor(RCore *rExe,std::string name, DataEditor *dataEditor, VariableEditor *variableEditor){ qDebug("Display Editor"); rExe->diplayData(name); //----Update dataEditor->setVarTypes(rExe->getVarTypes()); QStringList varNames; Rcpp::StringVector sv = rExe->getColNames(); for(int i = 0; i < sv.size();i++){ varNames.push_back(QString(sv[i])); } dataEditor->setVarNames(varNames); dataEditor->loadData(rExe->getDataFrame(),rExe->getColNames()); //--- variableEditor->loadVariable(rExe,rExe->getColNames()); addTab(dataEditor,"Data"); addTab(variableEditor, "Variable"); setTabPosition(West); //Update 30 Juni connect(this, SIGNAL(currentChanged(int)), variableEditor, SLOT(checkWidgetVisibility())); }
// [[Rcpp::export]] Rcpp::NumericMatrix infoContentMethod_cpp( Rcpp::StringVector& id1_, Rcpp::StringVector& id2_, Rcpp::List& anc_, Rcpp::NumericVector& ic_, const std::string& method_, const std::string& ont_ ) { go_dist_func_t* go_dist; // Resnik does not consider how distant the terms are from their common ancestor. // Lin and Jiang take that distance into account. if (method_ == "Resnik") { go_dist = &go_dist_Resnik; } else if (method_ == "Lin") { go_dist = &go_dist_Lin; } else if (method_ == "Jiang") { go_dist = &go_dist_Jiang; } else if (method_ == "Rel") { go_dist = &go_dist_Rel; } else { throw std::runtime_error( "Unknown GO distance method" ); } typedef std::string term_id_t; typedef std::set<term_id_t> term_set_t; // calculate the maximum IC and build the map of normalized IC typedef std::map<term_id_t, double> ic_map_t; ic_map_t normIcMap; // more specific term, larger IC value. // Normalized, all divide the most informative IC. // all IC values range from 0(root node) to 1(most specific node) double mic = NA_REAL; { Rcpp::StringVector icNames( ic_.names() ); for (std::size_t i=0; i < ic_.size(); i++ ) { const double cic = ic_[i]; if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue; if ( Rcpp::NumericVector::is_na( mic ) || mic < cic ) mic = cic; } LOG_DEBUG( "mic=" << mic ); for (std::size_t i=0; i < ic_.size(); i++ ) { const double cic = ic_[i]; if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue; normIcMap.insert( std::make_pair( (std::string) icNames[i], cic / mic ) ); } } // set root node IC to 0 if(ont_ == "DO") { normIcMap["DOID:4"] = 0; } else { normIcMap["all"] = 0; } // convert anc_ into map of sets typedef std::map<term_id_t, term_set_t> anc_map_t; anc_map_t ancMap; { Rcpp::StringVector goTerms( anc_.names() ); for (std::size_t i=0; i < anc_.size(); i++ ) { const std::vector<std::string> ancVec = Rcpp::as<std::vector<std::string> >( anc_[i] ); term_set_t ancestors( ancVec.begin(), ancVec.end() ); // term itself is also considered an ancestor ancestors.insert( (std::string)goTerms[i] ); ancMap.insert( std::make_pair( (std::string) goTerms[i], ancestors ) ); } } Rcpp::NumericMatrix res( id1_.size(), id2_.size() ); res.attr("dimnames") = Rcpp::Rcpp_list2( id1_, id2_ ); for ( std::size_t i = 0; i < id1_.size(); i++ ) { const std::string id1_term = (std::string)id1_[i]; const ic_map_t::const_iterator iIcIt = normIcMap.find( id1_term ); if ( iIcIt != normIcMap.end() && iIcIt->second != 0 ) { const double iIc = iIcIt->second; LOG_DEBUG( "ic[" << id1_term << "]=" << iIc ); const anc_map_t::const_iterator iAncsIt = ancMap.find( id1_term ); for ( std::size_t j = 0; j < id2_.size(); j++ ) { const std::string id2_term = (std::string)id2_[j]; const ic_map_t::const_iterator jIcIt = normIcMap.find( id2_term ); if ( jIcIt != normIcMap.end() && jIcIt->second != 0 ) { const anc_map_t::const_iterator jAncsIt = ancMap.find( id2_term ); // find common ancestors term_set_t commonAncs; if ( iAncsIt != ancMap.end() && jAncsIt != ancMap.end() ) { std::set_intersection( iAncsIt->second.begin(), iAncsIt->second.end(), jAncsIt->second.begin(), jAncsIt->second.end(), std::inserter( commonAncs, commonAncs.end() ) ); } LOG_DEBUG( "n(commonAncs(" << id1_term << "," << id2_term << "))=" << commonAncs.size() ); // Information Content of the most informative common ancestor (MICA) double mica = 0; for ( term_set_t::const_iterator termIt = commonAncs.begin(); termIt != commonAncs.end(); ++termIt ) { ic_map_t::const_iterator ancIcIt = normIcMap.find( *termIt ); if ( ancIcIt != normIcMap.end() && mica < ancIcIt->second ) mica = ancIcIt->second; } LOG_DEBUG( "mica(" << id1_term << "," << id2_term << ")=" << mica ); res(i,j) = go_dist( mica, iIc, jIcIt->second, mic ); } else { res(i,j) = NA_REAL; } } } else { for ( std::size_t j = 0; j < id2_.size(); j++ ) { res(i,j) = NA_REAL; } } } return ( res ); }
// [[Rcpp::export]] void write_vcf_body_gz( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) { // http://stackoverflow.com/a/5649224 // fix DataFrame Rcpp::StringVector chrom = fix["CHROM"]; Rcpp::StringVector pos = fix["POS"]; Rcpp::StringVector id = fix["ID"]; Rcpp::StringVector ref = fix["REF"]; Rcpp::StringVector alt = fix["ALT"]; Rcpp::StringVector qual = fix["QUAL"]; Rcpp::StringVector filter = fix["FILTER"]; Rcpp::StringVector info = fix["INFO"]; // gt DataFrame Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt); Rcpp::StringVector column_names(gt.size()); column_names = gt.attr("names"); int i = 0; int j = 0; gzFile *fi = (gzFile *)gzopen(filename.c_str(),"ab"); // gzFile *fi = (gzFile *)gzopen(filename.c_str(),"abw"); for(i=0; i<chrom.size(); i++){ Rcpp::checkUserInterrupt(); if(mask == 1 && filter(i) != "PASS" ){ // Don't print variant. } else { std::string tmpstring; tmpstring = chrom(i); tmpstring = tmpstring + "\t" + pos(i) + "\t"; if(id(i) == NA_STRING){ tmpstring = tmpstring + "."; } else { tmpstring = tmpstring + id(i); } tmpstring = tmpstring + "\t" + ref(i) + "\t" + alt(i) + "\t"; if(qual(i) == NA_STRING){ tmpstring = tmpstring + "." + "\t"; } else { tmpstring = tmpstring + qual(i) + "\t"; } if(filter(i) == NA_STRING){ tmpstring = tmpstring + "." + "\t"; } else { tmpstring = tmpstring + filter(i) + "\t"; } tmpstring = tmpstring + info(i); // gt portion for(j=0; j<column_names.size(); j++){ if(gt_cm(i, j) == NA_STRING){ tmpstring = tmpstring + "\t" + "./."; } else { tmpstring = tmpstring + "\t" + gt_cm(i, j); } } // gzwrite(fi,"my decompressed data",strlen("my decompressed data")); // gzwrite(fi,"\n",strlen("\n")); // std::string tmpstring = "test string\n"; gzwrite(fi, (char *)tmpstring.c_str(), tmpstring.size()); gzwrite(fi,"\n",strlen("\n")); } } gzclose(fi); return; }
// [[Rcpp::export]] void write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) { //int write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) { // fix DataFrame Rcpp::StringVector chrom = fix["CHROM"]; Rcpp::StringVector pos = fix["POS"]; Rcpp::StringVector id = fix["ID"]; Rcpp::StringVector ref = fix["REF"]; Rcpp::StringVector alt = fix["ALT"]; Rcpp::StringVector qual = fix["QUAL"]; Rcpp::StringVector filter = fix["FILTER"]; Rcpp::StringVector info = fix["INFO"]; // gt DataFrame Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt); Rcpp::StringVector column_names(gt.size()); column_names = gt.attr("names"); // column_names = gt_cm.attr("col.names"); // delete gt; int i = 0; int j = 0; // Uncompressed. std::ofstream myfile; myfile.open (filename.c_str(), std::ios::out | std::ios::app | std::ios::binary); // gzFile *fi = (gzFile *)gzopen("file.gz","wb"); for(i=0; i<chrom.size(); i++){ Rcpp::checkUserInterrupt(); if(mask == 1 && filter(i) == "PASS" ){ // Don't print variant. } else { myfile << chrom(i); myfile << "\t"; myfile << pos(i); myfile << "\t"; if(id(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << id(i); myfile << "\t"; } myfile << ref(i); myfile << "\t"; myfile << alt(i); myfile << "\t"; if(qual(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << qual(i); myfile << "\t"; } if(filter(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << filter(i); myfile << "\t"; } if(info(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << info(i); } // gt region. myfile << "\t"; myfile << gt_cm(i, 0); for(j=1; j<column_names.size(); j++){ myfile << "\t"; myfile << gt_cm(i, j); } myfile << "\n"; } } myfile.close(); return; }
//' rcpp_lines_as_network //' //' Return OSM data in Simple Features format //' //' @param sf_lines An sf collection of LINESTRING objects //' @param pr Rcpp::DataFrame containing the weighting profile //' //' @return Rcpp::List objects of OSM data //' //' @noRd // [[Rcpp::export]] Rcpp::List rcpp_lines_as_network (const Rcpp::List &sf_lines, Rcpp::DataFrame pr) { std::map <std::string, float> profile; Rcpp::StringVector hw = pr [1]; Rcpp::NumericVector val = pr [2]; for (int i = 0; i != hw.size (); i ++) profile.insert (std::make_pair (std::string (hw [i]), val [i])); Rcpp::CharacterVector nms = sf_lines.attr ("names"); if (nms [nms.size () - 1] != "geometry") throw std::runtime_error ("sf_lines have no geometry component"); if (nms [0] != "osm_id") throw std::runtime_error ("sf_lines have no osm_id component"); int one_way_index = -1; int one_way_bicycle_index = -1; int highway_index = -1; for (int i = 0; i < nms.size (); i++) { if (nms [i] == "oneway") one_way_index = i; if (nms [i] == "oneway.bicycle") one_way_bicycle_index = i; if (nms [i] == "highway") highway_index = i; } Rcpp::CharacterVector ow = NULL; Rcpp::CharacterVector owb = NULL; Rcpp::CharacterVector highway = NULL; if (one_way_index >= 0) ow = sf_lines [one_way_index]; if (one_way_bicycle_index >= 0) owb = sf_lines [one_way_bicycle_index]; if (highway_index >= 0) highway = sf_lines [highway_index]; if (ow.size () > 0) { if (ow.size () == owb.size ()) { for (unsigned i = 0; i != ow.size (); ++ i) if (ow [i] == "NA" && owb [i] != "NA") ow [i] = owb [i]; } else if (owb.size () > ow.size ()) ow = owb; } Rcpp::List geoms = sf_lines [nms.size () - 1]; std::vector<bool> isOneWay (geoms.length ()); std::fill (isOneWay.begin (), isOneWay.end (), false); // Get dimension of matrix size_t nrows = 0; int ngeoms = 0; for (auto g = geoms.begin (); g != geoms.end (); ++g) { // Rcpp uses an internal proxy iterator here, NOT a direct copy Rcpp::NumericMatrix gi = (*g); int rows = gi.nrow () - 1; nrows += rows; if (ngeoms < ow.size ()) { if (!(ow [ngeoms] == "yes" || ow [ngeoms] == "-1")) { nrows += rows; isOneWay [ngeoms] = true; } } ngeoms ++; } Rcpp::NumericMatrix nmat = Rcpp::NumericMatrix (Rcpp::Dimension (nrows, 6)); Rcpp::CharacterMatrix idmat = Rcpp::CharacterMatrix (Rcpp::Dimension (nrows, 3)); nrows = 0; ngeoms = 0; int fake_id = 0; for (auto g = geoms.begin (); g != geoms.end (); ++ g) { Rcpp::NumericMatrix gi = (*g); std::string hway = std::string (highway [ngeoms]); float hw_factor = profile [hway]; if (hw_factor == 0.0) hw_factor = 1e-5; hw_factor = 1.0 / hw_factor; Rcpp::List ginames = gi.attr ("dimnames"); Rcpp::CharacterVector rnms; if (ginames.length () > 0) rnms = ginames [0]; else { rnms = Rcpp::CharacterVector (gi.nrow ()); for (int i = 0; i < gi.nrow (); i ++) rnms [i] = fake_id ++; } if (rnms.size () != gi.nrow ()) throw std::runtime_error ("geom size differs from rownames"); for (int i = 1; i < gi.nrow (); i ++) { float d = haversine (gi (i-1, 0), gi (i-1, 1), gi (i, 0), gi (i, 1)); nmat (nrows, 0) = gi (i-1, 0); nmat (nrows, 1) = gi (i-1, 1); nmat (nrows, 2) = gi (i, 0); nmat (nrows, 3) = gi (i, 1); nmat (nrows, 4) = d; nmat (nrows, 5) = d * hw_factor; idmat (nrows, 0) = rnms (i-1); idmat (nrows, 1) = rnms (i); idmat (nrows, 2) = hway; nrows ++; if (isOneWay [ngeoms]) { nmat (nrows, 0) = gi (i, 0); nmat (nrows, 1) = gi (i, 1); nmat (nrows, 2) = gi (i-1, 0); nmat (nrows, 3) = gi (i-1, 1); nmat (nrows, 4) = d; nmat (nrows, 5) = d * hw_factor; idmat (nrows, 0) = rnms (i); idmat (nrows, 1) = rnms (i-1); idmat (nrows, 2) = hway; nrows ++; } } ngeoms ++; } Rcpp::List res (2); res [0] = nmat; res [1] = idmat; return res; }
/** <summary> Appends data onto an existing timeseries.</summary> <remarks> There are several checks which will be performed. All must pass. - appropriate data types between the timeseries field and the appending data - all fields of the timeseries must exist in the appending data - all fields of the data frame must have the same length</remarks> <param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param> <param name="seriesName">String argument for the timeseries name.</param> <param name="appendData">Data frame argument of the data to append.</param> <param name="discardOverlap">If true, appended records that overlap with existing records are discarded. </param> <returns> Returns 1 if successful.</returns> */ SEXP TSDBappend(SEXP _groupID, SEXP _seriesName, SEXP _appendData, SEXP _discardOverlap) { try { using namespace std; //checking arguments if (TYPEOF(_groupID) != INTSXP) throw std::runtime_error("Group ID should be an integer argument."); if (TYPEOF(_seriesName) != STRSXP) throw std::runtime_error("Timeseries name should be a string argument."); if (TYPEOF(_appendData) != VECSXP) throw std::runtime_error("Data should be a data frame."); /****************************************************** CHECKING PROPERTIES OF THE TABLE AND THE APPENDING DATA *******************************************************/ //properties of the table string seriesName = Rcpp::as<string>(_seriesName); int groupID = Rcpp::as<int>(_groupID); tsdb::Timeseries ts(groupID, seriesName); size_t tableFieldCount = ts.structure()->getNFields(); //properties of the appending data frame Rcpp::List appendData(_appendData); Rcpp::StringVector appendDataNames = appendData.attr("names"); size_t appendingFieldCount = appendDataNames.length(); //checking for number of fields if (appendingFieldCount != tableFieldCount) throw std::runtime_error("Appending data frame and the TSDB table " "have a different number of fields."); //checking to see if the field in the appending data exists in the //TSDB table; also checking for types int numRecordsToAppend = LENGTH(appendData[(string) appendDataNames[0]]); for (size_t i=0; i<appendingFieldCount; i++) { //if the index isn't found, an exception will occur size_t index = ts.structure()->getFieldIndexByName((string) appendDataNames[i]); //checking the types match string TSDBtype = ts.structure()->getField(index)->getTSDBType(); if (TSDBtype == "Timestamp" || TSDBtype == "Double") { if (TYPEOF(appendData[(string) appendDataNames[i]]) != REALSXP) { throw std::runtime_error( TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype)); } } else if (TSDBtype == "Int8" || TSDBtype == "Int32" || TSDBtype == "Date") { if (TYPEOF(appendData[(string) appendDataNames[i]]) != INTSXP) { throw std::runtime_error( TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype)); } } else if (TSDBtype == "String") { if (TYPEOF(appendData[(string) appendDataNames[i]]) != STRSXP) { throw std::runtime_error( TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype)); } } //checking existence of table fields in the data frame string fieldName = ts.structure()->getField(i)->getName(); int check = (int) std::count(appendDataNames.begin(),appendDataNames.end(),fieldName.c_str()); if (check != 1) { if (check == 0) throw std::runtime_error("Field " + fieldName + " was not found " "in the appending data."); else throw std::runtime_error("Field " + fieldName + " was found " "multiple times."); } //checking for uniform length in data frame. //this could be an issue if a named list is passed as an argument. if (numRecordsToAppend != LENGTH(appendData[(string) appendDataNames[0]])) throw std::runtime_error("All fields in the appending data must have" " the same length/"); } /************************************** CHECKS PASSED. APPENDING DATA. **************************************/ //vector of formatted input, ready for appending /* http://stackoverflow.com/questions/7251253/c-no-matching-function-for-call-but-the-candidate-has-the-exact-same-signatur explains why 'tsdb::RecordSet records((size_t) numRecordsToAppend, ts.structure());' would not work. */ boost::shared_ptr<tsdb::Structure> tsStructure = ts.structure(); tsdb::RecordSet records((size_t) numRecordsToAppend, tsStructure); for (size_t dfIndex=0; dfIndex<appendingFieldCount; dfIndex++) { string dfName = (string) appendDataNames[dfIndex]; //index in the TSDB table size_t tableIndex = ts.structure()->getFieldIndexByName(dfName); string TSDBtype = ts.structure()->getField(tableIndex)->getTSDBType(); if (TSDBtype == "Timestamp") { SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex); for (int row=0; row<numRecordsToAppend; row++) records[row][tableIndex] = (tsdb::timestamp_t) (REAL(dfColumn)[row]); } else if (TSDBtype == "Double") { SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex); for (int row=0; row<numRecordsToAppend; row++) records[row][tableIndex] = (tsdb::ieee64_t) (REAL(dfColumn)[row]); } else if (TSDBtype == "Int8") { SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex); for (int row=0; row<numRecordsToAppend; row++) records[row][tableIndex] = (tsdb::int8_t) (INTEGER(dfColumn)[row]); } else if (TSDBtype == "Int32") { SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex); for (int row=0; row<numRecordsToAppend; row++) records[row][tableIndex] = (tsdb::int32_t) (INTEGER(dfColumn)[row]); } else if (TSDBtype == "Date") { SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex); for (int row=0; row<numRecordsToAppend; row++) records[row][tableIndex] = (tsdb::date_t) (INTEGER(dfColumn)[row]); } else if (TSDBtype.find("String") != std::string::npos) { Rcpp::StringVector dfColumn(VECTOR_ELT(_appendData,dfIndex)); for (int row=0; row<numRecordsToAppend; row++) records[row][tableIndex] = (string) dfColumn[row]; } } //appending the data bool discardOverlap = Rcpp::as<bool>(_discardOverlap); ts.appendRecordSet(records,discardOverlap); return Rcpp::wrap(1); } catch( std::exception &ex ) { forward_exception_to_r(ex); } catch(...) { ::Rf_error( "c++ exception (unknown reason)" ); } return R_NilValue; }
/** <summary> Creates a new timeseries within an HDF5 file.</summary> <param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param> <param name="seriesName">String argument for the timeseries name.</param> <param name="seriesDescription">String argument for timeseries' description.</param> <param name="fields">Data frame argument containing two named elements: -fieldNames - names of the fields -fieldTypes - types of the fields These elements must have the same dimension.</param> <returns> Returns a non-negative integer HDF5 group identifier if successful; otherwise returns a negative value. </returns> */ SEXP TSDBcreate_timeseries(SEXP _groupID, SEXP _seriesName, SEXP _seriesDescription, SEXP _fields) { try { using namespace std; //checking arguments if (TYPEOF(_groupID) != INTSXP) throw std::runtime_error("Group ID should be an integer argument."); if (TYPEOF(_seriesName) != STRSXP) throw std::runtime_error("Timeseries name should be a string argument."); if (TYPEOF(_seriesDescription) != STRSXP) throw std::runtime_error("Timeseries description should be a string argument."); if (TYPEOF(_fields) != VECSXP) throw std::runtime_error("Fields argument should be a named list."); //checking properties of the list Rcpp::List fieldsList(_fields); Rcpp::StringVector listNames = fieldsList.attr("names"); //checking for the existence of 'fieldNames' and 'fieldTypes' list elements int check = (int) std::count(listNames.begin(),listNames.end(),"fieldNames"); if (check != 1) throw std::runtime_error("Fields argument should have one element named 'fieldNames'."); check = (int) std::count(listNames.begin(),listNames.end(),"fieldTypes"); if (check != 1) throw std::runtime_error("Fields argument should have one element named 'fieldTypes'."); Rcpp::StringVector fieldNames = fieldsList["fieldNames"]; Rcpp::StringVector fieldTypes = fieldsList["fieldTypes"]; if (fieldNames.length() != fieldNames.length()) throw std::runtime_error("'fieldNames' and 'fieldTypes' should have the same length."); //grabbing other arguments string seriesName = Rcpp::as<string>(_seriesName); string seriesDescription = Rcpp::as<string>(_seriesDescription); int groupID = Rcpp::as<int>(_groupID); if (groupID < 0) throw std::runtime_error("Invalid group ID."); //building structure of the new timeseries int numFields = fieldNames.length(); vector<tsdb::Field*> fields; //first field is always the timestamp fields.push_back(new tsdb::TimestampField("TSDB_timestamp")); for (int i=0; i<numFields; i++) { string name = (string) fieldNames[i]; string type = (string) fieldTypes[i]; //transforming to lower case, so the comparison is case INsensitive boost::to_lower(type); if (type == "int8") fields.push_back(new tsdb::Int8Field(name)); else if (type == "int32") fields.push_back(new tsdb::Int32Field(name)); else if (type == "double") fields.push_back(new tsdb::DoubleField(name)); else if (type == "date") fields.push_back(new tsdb::DateField(name)); else if (type.find("string") != std::string::npos) { char* stringLengthPr = strtok((char*) type.c_str(),"("); if (stringLengthPr == NULL) throw std::runtime_error("Field type for a string must have the form 'string(n)', for some integer n."); stringLengthPr = strtok(NULL, ")"); if (stringLengthPr == NULL) throw std::runtime_error("Field type for a string must have the form 'string(n)', for some integer n."); int stringLength = atoi(stringLengthPr); fields.push_back(new tsdb::StringField(name,stringLength)); } } boost::shared_ptr<tsdb::Structure> st = boost::make_shared<tsdb::Structure>(fields,false); tsdb::Timeseries ts = tsdb::Timeseries(groupID,seriesName,seriesDescription,st); Rcpp::IntegerVector status(1); status[0] = 1; return status; } catch( std::exception &ex ) { forward_exception_to_r(ex); } catch(...) { ::Rf_error( "c++ exception (unknown reason)" ); } return R_NilValue; }
/** <summary> Pulls records from the timeseries.</summary> <param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param> <param name="seriesName">String argument for the timeseries name.</param> <param name="startTimestamp"> Double argument for the first wanted record. Since R doesn't have a 64 bit integer, this argument will be received by the library as a double, representing milliseconds since January 1, 1970. However, within R the argument can be a string following the format '2009-04-01 16:30:00.003', which will be converted to a double, or simply as a double. As long as the the argument respresents a time less than 2^53 seconds from the epoch, which will happen in a few hundreds of thousands of years, there should be no loss of precision.</param> <param name="lastTimestamp"> Double arugment for the last timestamp of the wanted record.</param> <param name="fieldsWanted"> Character vectora rgument for the wanted fields.</param> <returns> Returns a data frame of wanted fields for the wanted records.</returns> */ SEXP TSDBget_records(SEXP _groupID, SEXP _seriesName, SEXP _startTimestamp, SEXP _endTimestamp, SEXP _fieldsWanted) { try { using namespace std; //checking arguments if (TYPEOF(_groupID) != INTSXP) throw std::runtime_error("Group ID should an integer argument."); if (TYPEOF(_seriesName) != STRSXP) throw std::runtime_error("Timeseries name should a string."); if (TYPEOF(_startTimestamp) != REALSXP || TYPEOF(_endTimestamp) != REALSXP) throw std::runtime_error("Timestamp arguments must have type double."); //fieldsWanted might be an empty argument if (TYPEOF(_fieldsWanted) != STRSXP && TYPEOF(_fieldsWanted) != NILSXP) throw std::runtime_error("Timestamp arguments must have type double."); //getting arguments hid_t groupID = (hid_t) Rcpp::as<int>(_groupID); string seriesName = Rcpp::as<std::string>(_seriesName); tsdb::timestamp_t startTimestamp = (tsdb::timestamp_t) Rcpp::as<double>(_startTimestamp); tsdb::timestamp_t endTimestamp = (tsdb::timestamp_t) Rcpp::as<double>(_endTimestamp); Rcpp::StringVector fieldsWanted; size_t numFieldsWanted; vector<size_t> indicesWanted; if (groupID < 0) throw std::runtime_error("Invalid group ID."); //creating timeseries object tsdb::Timeseries ts(groupID, seriesName); if (TYPEOF(_fieldsWanted) != NILSXP) { //figuring out the indices of wanted columns fieldsWanted = Rcpp::StringVector(_fieldsWanted); numFieldsWanted = fieldsWanted.length(); for (size_t i=0; i<numFieldsWanted; i++) { size_t index = ts.structure()-> getFieldIndexByName((char*)fieldsWanted[i]); indicesWanted.push_back(index); } } else { //grabbing all columns numFieldsWanted = ts.structure()->getNFields(); char** fieldNames = ts.structure()->getNameOfFieldsAsArray(); fieldsWanted = Rcpp::StringVector(numFieldsWanted); for (size_t i=0; i<numFieldsWanted; i++) { indicesWanted.push_back(i); fieldsWanted[i] = fieldNames[i]; } } //loading the records into memory tsdb::RecordSet recordSet = ts.recordSet(startTimestamp, endTimestamp); size_t numRecords = recordSet.size(); Rcpp::List records; //record container //looping through wanted columns for (size_t i = 0; i<numFieldsWanted; i++) { size_t index = indicesWanted[i]; //type of the column string fieldType = ts.structure()->getField(index)->getTSDBType(); if (fieldType == "Timestamp") { Rcpp::NumericVector columnData(numRecords); for (size_t row=0; row<numRecords; row++) columnData[row] = recordSet[row][index].toTimestamp(); records.push_back(columnData,(char*)fieldsWanted[i]); } if (fieldType == "Date") { Rcpp::IntegerVector columnData(numRecords); for (size_t row=0; row<numRecords; row++) columnData[row] = recordSet[row][index].toDate(); records.push_back(columnData,(char*)fieldsWanted[i]); } if (fieldType == "Int8") { Rcpp::IntegerVector columnData(numRecords); for (size_t row=0; row<numRecords; row++) columnData[row] = recordSet[row][index].toInt8(); records.push_back(columnData,(char*)fieldsWanted[i]); } if (fieldType == "Int32") { Rcpp::IntegerVector columnData(numRecords); for (size_t row=0; row<numRecords; row++) columnData[row] = recordSet[row][index].toInt32(); records.push_back(columnData,(char*)fieldsWanted[i]); } if (fieldType == "Double") { Rcpp::NumericVector columnData(numRecords); for (size_t row=0; row<numRecords; row++) columnData[row] = recordSet[row][index].toDouble(); records.push_back(columnData,(char*)fieldsWanted[i]); } if (fieldType.find("String") != std::string::npos) { Rcpp::StringVector columnData(numRecords); for (size_t row=0; row<numRecords; row++) columnData[row] = recordSet[row][index].toString(); records.push_back(columnData,(char*)fieldsWanted[i]); } } return Rcpp::DataFrame::create(records); } catch( std::exception &ex ) { forward_exception_to_r(ex); } catch(...) { ::Rf_error( "c++ exception (unknown reason)" ); } return R_NilValue; }