Ejemplo n.º 1
0
//' @export
// [[Rcpp::export]]
arma::vec th(Rcpp::StringVector strata_rsid, Rcpp::StringVector rsid, Rcpp::NumericVector r2){
	
	// Inputs:
	//
	// strata_rsid is the overall index of rsid returned from stratify()
	// rsid is the rsid column from LdList
	// r2 is the r2 column from LdList
	
	// Create the output vector of th numbers
	arma::vec th(strata_rsid.size(), fill::zeros);

	// Find the indices of where the rsid are in strata_rsid
	for(int i = 0; i < rsid.size(); i++){

		// Pull out each of the RSIDs successively 
		// std::string id = rsid(i);

		// Find the index in strata_rsid where the RSID string is
		// and fill it in with the r2 at the given id.
//		th( arma::find( strata_rsid == rsid(i) ) ) = r2(i);
	}

	// Create duplicate vector to fill in discretized values
	arma::vec out = th;

	out.elem( find( th >= 0.2 ) ).fill(0.2);
	out.elem( find( th >= 0.4 ) ).fill(0.4);
	out.elem( find( th >= 0.6 ) ).fill(0.6);
	out.elem( find( th >= 0.8 ) ).fill(0.8);
	out.elem( find( th >= 0.9 ) ).fill(0.9);
	out.elem( find( th >= 1 ) ).fill(1);
	
	return out;

}
Ejemplo n.º 2
0
void graph_from_df (Rcpp::DataFrame gr, vertex_map_t &vm,
        edge_map_t &edge_map, vert2edge_map_t &vert2edge_map)
{
    Rcpp::StringVector from = gr ["from_id"];
    Rcpp::StringVector to = gr ["to_id"];
    Rcpp::NumericVector from_lon = gr ["from_lon"];
    Rcpp::NumericVector from_lat = gr ["from_lat"];
    Rcpp::NumericVector to_lon = gr ["to_lon"];
    Rcpp::NumericVector to_lat = gr ["to_lat"];
    Rcpp::NumericVector edge_id = gr ["edge_id"];
    Rcpp::NumericVector dist = gr ["d"];
    Rcpp::NumericVector weight = gr ["d_weighted"];
    Rcpp::StringVector hw = gr ["highway"];

    for (int i = 0; i < to.length (); i ++)
    {
        osm_id_t from_id = std::string (from [i]);
        osm_id_t to_id = std::string (to [i]);

        if (vm.find (from_id) == vm.end ())
        {
            osm_vertex_t fromV = osm_vertex_t ();
            fromV.set_lat (from_lat [i]);
            fromV.set_lon (from_lon [i]);
            vm.emplace (from_id, fromV);
        }
        osm_vertex_t from_vtx = vm.at (from_id);
        from_vtx.add_neighbour_out (to_id);
        vm [from_id] = from_vtx;

        if (vm.find (to_id) == vm.end ())
        {
            osm_vertex_t toV = osm_vertex_t ();
            toV.set_lat (to_lat [i]);
            toV.set_lon (to_lon [i]);
            vm.emplace (to_id, toV);
        }
        osm_vertex_t to_vtx = vm.at (to_id);
        to_vtx.add_neighbour_in (from_id);
        vm [to_id] = to_vtx;

        std::set <int> replacementEdges;
        osm_edge_t edge = osm_edge_t (from_id, to_id, dist [i], weight [i],
                std::string (hw [i]), edge_id [i], replacementEdges);
        edge_map.emplace (edge_id [i], edge);
        add_to_edge_map (vert2edge_map, from_id, edge_id [i]);
        add_to_edge_map (vert2edge_map, to_id, edge_id [i]);
    }
}
RcppExport SEXP test_cpp(SEXP a, SEXP b) {
  Rcpp::NumericVector xa(a);
  Rcpp::NumericVector xb(b);
  // Rcpp::StringVector aaa = "deine mudder";
  Rcpp::StringVector aaa = a;
  aaa.push_back("1") ;
  int n_xa = xa.size(), n_xb = xb.size();
  int nab = n_xa + n_xb - 1;
  Rcpp::NumericVector xab(nab);
  for (int i = 0; i < n_xa; i++)
    for (int j = 0; j < n_xb; j++)
      xab[i + j] += xa[i] * xb[j];
  return aaa ;
  return xab;
}
Ejemplo n.º 4
0
Rcpp::StringMatrix DataFrame_to_StringMatrix( Rcpp::DataFrame df ){
  Rcpp::StringVector sv = df(0);
  Rcpp::StringMatrix sm(sv.size(), df.size());
  
  sm.attr("col.names") = df.attr("col.names");
  sm.attr("row.names") = df.attr("row.names");

  for(int i=0; i < df.size(); i++){
    sv = df(i);
    for(int j=0; j < sv.size(); j++){
      sm(j, i) = sv(j);
    }
  }

  return sm;
}
Ejemplo n.º 5
0
void TabEditor::displayEditor(RCore *rExe,std::string name, DataEditor *dataEditor, VariableEditor *variableEditor){
    qDebug("Display Editor");
    rExe->diplayData(name);
    //----Update
    dataEditor->setVarTypes(rExe->getVarTypes());
    QStringList varNames;
    Rcpp::StringVector sv = rExe->getColNames();
    for(int i = 0; i < sv.size();i++){
        varNames.push_back(QString(sv[i]));
    }
    dataEditor->setVarNames(varNames);
    dataEditor->loadData(rExe->getDataFrame(),rExe->getColNames());
    //---
    variableEditor->loadVariable(rExe,rExe->getColNames());

    addTab(dataEditor,"Data");
    addTab(variableEditor, "Variable");
    setTabPosition(West);
    //Update 30 Juni
    connect(this, SIGNAL(currentChanged(int)), variableEditor, SLOT(checkWidgetVisibility()));
}
Ejemplo n.º 6
0
// [[Rcpp::export]]
Rcpp::NumericMatrix infoContentMethod_cpp(
    Rcpp::StringVector&  id1_,
    Rcpp::StringVector&  id2_,
    Rcpp::List&          anc_,
    Rcpp::NumericVector& ic_,
    const std::string&   method_,
    const std::string&   ont_
) {
    go_dist_func_t* go_dist;
    // Resnik does not consider how distant the terms are from their common ancestor.
    //  Lin and Jiang take that distance into account.
    if (method_ == "Resnik") {
        go_dist = &go_dist_Resnik;
    }
    else if (method_ == "Lin") {
        go_dist = &go_dist_Lin;
    }
    else if (method_ == "Jiang") {
        go_dist = &go_dist_Jiang;
    }
    else if (method_ == "Rel") {
        go_dist = &go_dist_Rel;
    }
    else {
        throw std::runtime_error( "Unknown GO distance method" );
    }

    typedef std::string term_id_t;
    typedef std::set<term_id_t> term_set_t;

    // calculate the maximum IC and build the map of normalized IC
    typedef std::map<term_id_t, double> ic_map_t;
    ic_map_t normIcMap;
    // more specific term, larger IC value.
    // Normalized, all divide the most informative IC.
    // all IC values range from 0(root node) to 1(most specific node)
    double mic = NA_REAL;
    {
        Rcpp::StringVector icNames( ic_.names() );
        for (std::size_t i=0; i < ic_.size(); i++ ) {
            const double cic = ic_[i];
            if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue;
            if ( Rcpp::NumericVector::is_na( mic ) || mic < cic ) mic = cic;
        }
        LOG_DEBUG( "mic=" << mic );
        for (std::size_t i=0; i < ic_.size(); i++ ) {
            const double cic = ic_[i];
            if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue;
            normIcMap.insert( std::make_pair( (std::string) icNames[i], cic / mic ) );
        }
    }

    // set root node IC to 0
    if(ont_ == "DO") {
        normIcMap["DOID:4"] = 0;
    } else {
        normIcMap["all"] = 0;
    }

    // convert anc_ into map of sets
    typedef std::map<term_id_t, term_set_t> anc_map_t;
    anc_map_t ancMap;
    {
        Rcpp::StringVector goTerms( anc_.names() );
        for (std::size_t i=0; i < anc_.size(); i++ ) {
            const std::vector<std::string> ancVec = Rcpp::as<std::vector<std::string> >( anc_[i] );
            term_set_t ancestors( ancVec.begin(), ancVec.end() );
            // term itself is also considered an ancestor
            ancestors.insert( (std::string)goTerms[i] );
            ancMap.insert( std::make_pair( (std::string) goTerms[i], ancestors ) );
        }
    }

    Rcpp::NumericMatrix res( id1_.size(), id2_.size() );
    res.attr("dimnames") = Rcpp::Rcpp_list2( id1_, id2_ );
    for ( std::size_t i = 0; i < id1_.size(); i++ ) {
        const std::string id1_term = (std::string)id1_[i];
        const ic_map_t::const_iterator iIcIt = normIcMap.find( id1_term );
        if ( iIcIt != normIcMap.end() && iIcIt->second != 0 ) {
            const double iIc = iIcIt->second;
            LOG_DEBUG( "ic[" << id1_term << "]=" << iIc );
            const anc_map_t::const_iterator iAncsIt = ancMap.find( id1_term );
            for ( std::size_t j = 0; j < id2_.size(); j++ ) {
                const std::string id2_term = (std::string)id2_[j];
                const ic_map_t::const_iterator jIcIt = normIcMap.find( id2_term );
                if ( jIcIt != normIcMap.end() && jIcIt->second != 0 ) {
                    const anc_map_t::const_iterator jAncsIt = ancMap.find( id2_term );
                    // find common ancestors
                    term_set_t commonAncs;
                    if ( iAncsIt != ancMap.end() && jAncsIt != ancMap.end() ) {
                        std::set_intersection( iAncsIt->second.begin(), iAncsIt->second.end(),
                                               jAncsIt->second.begin(), jAncsIt->second.end(),
                                               std::inserter( commonAncs, commonAncs.end() ) );
                    }
                    LOG_DEBUG( "n(commonAncs(" << id1_term << "," << id2_term << "))=" << commonAncs.size() );

                    // Information Content of the most informative common ancestor (MICA)
                    double mica = 0;
                    for ( term_set_t::const_iterator termIt = commonAncs.begin(); termIt != commonAncs.end(); ++termIt ) {
                        ic_map_t::const_iterator ancIcIt = normIcMap.find( *termIt );
                        if ( ancIcIt != normIcMap.end() && mica < ancIcIt->second ) mica = ancIcIt->second;
                    }
                    LOG_DEBUG( "mica(" << id1_term << "," << id2_term << ")=" << mica );
                    res(i,j) = go_dist( mica, iIc, jIcIt->second, mic );
                } else {
                    res(i,j) = NA_REAL;
                }
            }
        } else {
            for ( std::size_t j = 0; j < id2_.size(); j++ ) {
                res(i,j) = NA_REAL;
            }
        }
    }
    return ( res );
}
Ejemplo n.º 7
0
// [[Rcpp::export]]
void write_vcf_body_gz( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) {
  // http://stackoverflow.com/a/5649224
  
  // fix DataFrame
  Rcpp::StringVector chrom  = fix["CHROM"];
  Rcpp::StringVector pos    = fix["POS"];
  Rcpp::StringVector id     = fix["ID"];
  Rcpp::StringVector ref    = fix["REF"];
  Rcpp::StringVector alt    = fix["ALT"];
  Rcpp::StringVector qual   = fix["QUAL"];
  Rcpp::StringVector filter = fix["FILTER"];
  Rcpp::StringVector info   = fix["INFO"];
  
  // gt DataFrame
  Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt);
  Rcpp::StringVector column_names(gt.size());
  column_names = gt.attr("names");
  
  int i = 0;
  int j = 0;
  
  gzFile *fi = (gzFile *)gzopen(filename.c_str(),"ab");
//  gzFile *fi = (gzFile *)gzopen(filename.c_str(),"abw");
  for(i=0; i<chrom.size(); i++){
    Rcpp::checkUserInterrupt();
    if(mask == 1 && filter(i) != "PASS" ){
      // Don't print variant.
    } else {
      std::string tmpstring;
      tmpstring = chrom(i);
      tmpstring = tmpstring + "\t" + pos(i) + "\t";
      if(id(i) == NA_STRING){
        tmpstring = tmpstring + ".";
      } else {
        tmpstring = tmpstring + id(i);
      }
      tmpstring = tmpstring + "\t" + ref(i) + "\t" + alt(i) + "\t";
      if(qual(i) == NA_STRING){
        tmpstring = tmpstring + "." + "\t";
      } else {
        tmpstring = tmpstring + qual(i) + "\t";
      }
      if(filter(i) == NA_STRING){
        tmpstring = tmpstring + "." + "\t";
      } else {
        tmpstring = tmpstring + filter(i) + "\t";
      }
      tmpstring = tmpstring + info(i);

      // gt portion
      for(j=0; j<column_names.size(); j++){
        if(gt_cm(i, j) == NA_STRING){
          tmpstring = tmpstring + "\t" + "./.";
        } else {
          tmpstring = tmpstring + "\t" + gt_cm(i, j);
        }
      }


//      gzwrite(fi,"my decompressed data",strlen("my decompressed data"));
//      gzwrite(fi,"\n",strlen("\n"));
//      std::string tmpstring = "test string\n";
      gzwrite(fi, (char *)tmpstring.c_str(), tmpstring.size());
      
      gzwrite(fi,"\n",strlen("\n"));
    }
  }
  gzclose(fi);
  
  
  return;
}
Ejemplo n.º 8
0
// [[Rcpp::export]]
void write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) {
//int write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) {

  // fix DataFrame
  Rcpp::StringVector chrom  = fix["CHROM"];
  Rcpp::StringVector pos    = fix["POS"];
  Rcpp::StringVector id     = fix["ID"];
  Rcpp::StringVector ref    = fix["REF"];
  Rcpp::StringVector alt    = fix["ALT"];
  Rcpp::StringVector qual   = fix["QUAL"];
  Rcpp::StringVector filter = fix["FILTER"];
  Rcpp::StringVector info   = fix["INFO"];

  // gt DataFrame
  Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt);
  Rcpp::StringVector column_names(gt.size());
  column_names = gt.attr("names");
//  column_names = gt_cm.attr("col.names");
//  delete gt;
  
  int i = 0;
  int j = 0;

  // Uncompressed.
  std::ofstream myfile;
  myfile.open (filename.c_str(), std::ios::out | std::ios::app | std::ios::binary);
  
//  gzFile *fi = (gzFile *)gzopen("file.gz","wb");
  

  for(i=0; i<chrom.size(); i++){
    Rcpp::checkUserInterrupt();
    if(mask == 1 && filter(i) == "PASS" ){
      // Don't print variant.
    } else {
      myfile << chrom(i);
      myfile << "\t";
      myfile << pos(i);
      myfile << "\t";
      if(id(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << id(i);
        myfile << "\t";
      }
      myfile << ref(i);
      myfile << "\t";
      myfile << alt(i);
      myfile << "\t";
      if(qual(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << qual(i);
        myfile << "\t";
      }
      if(filter(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << filter(i);
        myfile << "\t";
      }
      if(info(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << info(i);
      }
      
      // gt region.
      myfile << "\t";
      myfile << gt_cm(i, 0);
      for(j=1; j<column_names.size(); j++){
        myfile << "\t";
        myfile << gt_cm(i, j);
      }

      myfile << "\n";
    }
  }

  myfile.close();
  
  return;
}
Ejemplo n.º 9
0
//' rcpp_lines_as_network
//'
//' Return OSM data in Simple Features format
//'
//' @param sf_lines An sf collection of LINESTRING objects
//' @param pr Rcpp::DataFrame containing the weighting profile
//'
//' @return Rcpp::List objects of OSM data
//'
//' @noRd
// [[Rcpp::export]]
Rcpp::List rcpp_lines_as_network (const Rcpp::List &sf_lines,
        Rcpp::DataFrame pr)
{
    std::map <std::string, float> profile;
    Rcpp::StringVector hw = pr [1];
    Rcpp::NumericVector val = pr [2];
    for (int i = 0; i != hw.size (); i ++)
        profile.insert (std::make_pair (std::string (hw [i]), val [i]));

    Rcpp::CharacterVector nms = sf_lines.attr ("names");
    if (nms [nms.size () - 1] != "geometry")
        throw std::runtime_error ("sf_lines have no geometry component");
    if (nms [0] != "osm_id")
        throw std::runtime_error ("sf_lines have no osm_id component");
    int one_way_index = -1;
    int one_way_bicycle_index = -1;
    int highway_index = -1;
    for (int i = 0; i < nms.size (); i++)
    {
        if (nms [i] == "oneway")
            one_way_index = i;
        if (nms [i] == "oneway.bicycle")
            one_way_bicycle_index = i;
        if (nms [i] == "highway")
            highway_index = i;
    }
    Rcpp::CharacterVector ow = NULL;
    Rcpp::CharacterVector owb = NULL;
    Rcpp::CharacterVector highway = NULL;
    if (one_way_index >= 0)
        ow = sf_lines [one_way_index];
    if (one_way_bicycle_index >= 0)
        owb = sf_lines [one_way_bicycle_index];
    if (highway_index >= 0)
        highway = sf_lines [highway_index];
    if (ow.size () > 0)
    {
        if (ow.size () == owb.size ())
        {
            for (unsigned i = 0; i != ow.size (); ++ i)
                if (ow [i] == "NA" && owb [i] != "NA")
                    ow [i] = owb [i];
        } else if (owb.size () > ow.size ())
            ow = owb;
    }

    Rcpp::List geoms = sf_lines [nms.size () - 1];
    std::vector<bool> isOneWay (geoms.length ());
    std::fill (isOneWay.begin (), isOneWay.end (), false);
    // Get dimension of matrix
    size_t nrows = 0;
    int ngeoms = 0;
    for (auto g = geoms.begin (); g != geoms.end (); ++g)
    {
        // Rcpp uses an internal proxy iterator here, NOT a direct copy
        Rcpp::NumericMatrix gi = (*g);
        int rows = gi.nrow () - 1;
        nrows += rows;
        if (ngeoms < ow.size ())
        {
            if (!(ow [ngeoms] == "yes" || ow [ngeoms] == "-1"))
            {
                nrows += rows;
                isOneWay [ngeoms] = true;
            }
        }
        ngeoms ++;
    }

    Rcpp::NumericMatrix nmat = Rcpp::NumericMatrix (Rcpp::Dimension (nrows, 6));
    Rcpp::CharacterMatrix idmat = Rcpp::CharacterMatrix (Rcpp::Dimension (nrows,
                3));

    nrows = 0;
    ngeoms = 0;
    int fake_id = 0;
    for (auto g = geoms.begin (); g != geoms.end (); ++ g)
    {
        Rcpp::NumericMatrix gi = (*g);
        std::string hway = std::string (highway [ngeoms]);
        float hw_factor = profile [hway];
        if (hw_factor == 0.0) hw_factor = 1e-5;
        hw_factor = 1.0 / hw_factor;

        Rcpp::List ginames = gi.attr ("dimnames");
        Rcpp::CharacterVector rnms;
        if (ginames.length () > 0)
            rnms = ginames [0];
        else
        {
            rnms = Rcpp::CharacterVector (gi.nrow ());
            for (int i = 0; i < gi.nrow (); i ++)
                rnms [i] = fake_id ++;
        }
        if (rnms.size () != gi.nrow ())
            throw std::runtime_error ("geom size differs from rownames");

        for (int i = 1; i < gi.nrow (); i ++)
        {
            float d = haversine (gi (i-1, 0), gi (i-1, 1), gi (i, 0),
                    gi (i, 1));
            nmat (nrows, 0) = gi (i-1, 0);
            nmat (nrows, 1) = gi (i-1, 1);
            nmat (nrows, 2) = gi (i, 0);
            nmat (nrows, 3) = gi (i, 1);
            nmat (nrows, 4) = d;
            nmat (nrows, 5) = d * hw_factor;
            idmat (nrows, 0) = rnms (i-1);
            idmat (nrows, 1) = rnms (i);
            idmat (nrows, 2) = hway;
            nrows ++;
            if (isOneWay [ngeoms])
            {
                nmat (nrows, 0) = gi (i, 0);
                nmat (nrows, 1) = gi (i, 1);
                nmat (nrows, 2) = gi (i-1, 0);
                nmat (nrows, 3) = gi (i-1, 1);
                nmat (nrows, 4) = d;
                nmat (nrows, 5) = d * hw_factor;
                idmat (nrows, 0) = rnms (i);
                idmat (nrows, 1) = rnms (i-1);
                idmat (nrows, 2) = hway;
                nrows ++;
            }
        }
        ngeoms ++;
    }

    Rcpp::List res (2);
    res [0] = nmat;
    res [1] = idmat;

    return res;
}
Ejemplo n.º 10
0
/**
<summary> Appends data onto an existing timeseries.</summary>
<remarks> There are several checks which will be performed. All must pass.
- appropriate data types between the timeseries field and the appending data
- all fields of the timeseries must exist in the appending data
- all fields of the data frame must have the same length</remarks>

<param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param>
<param name="seriesName">String argument for the timeseries name.</param>
<param name="appendData">Data frame argument of the data to append.</param>
<param name="discardOverlap">If true, appended records that overlap with
existing records are discarded. </param>
<returns> Returns 1 if successful.</returns>
*/
SEXP TSDBappend(SEXP _groupID, SEXP _seriesName, SEXP _appendData,
				SEXP _discardOverlap)
{
try {
	using namespace std;
	//checking arguments
	if (TYPEOF(_groupID) != INTSXP)
		throw std::runtime_error("Group ID should be an integer argument.");

	if (TYPEOF(_seriesName) != STRSXP)
		throw std::runtime_error("Timeseries name should be a string argument.");

	if (TYPEOF(_appendData) != VECSXP)
		throw std::runtime_error("Data should be a data frame.");

	/******************************************************
	CHECKING PROPERTIES OF THE TABLE AND THE APPENDING DATA
	*******************************************************/
	//properties of the table
	string seriesName = Rcpp::as<string>(_seriesName);
	int groupID = Rcpp::as<int>(_groupID);
	tsdb::Timeseries ts(groupID, seriesName);
	size_t tableFieldCount = ts.structure()->getNFields();

	//properties of the appending data frame
	Rcpp::List appendData(_appendData);
	Rcpp::StringVector appendDataNames = appendData.attr("names");
	size_t appendingFieldCount = appendDataNames.length();

	//checking for number of fields
	if (appendingFieldCount != tableFieldCount)
		throw std::runtime_error("Appending data frame and the TSDB table "
				"have a different number of fields.");

	//checking to see if the field in the appending data exists in the
	//TSDB table; also checking for types
	int numRecordsToAppend = LENGTH(appendData[(string) appendDataNames[0]]);
	for (size_t i=0; i<appendingFieldCount; i++)
	{
		//if the index isn't found, an exception will occur
		size_t index = ts.structure()->getFieldIndexByName((string) appendDataNames[i]);

		//checking the types match
		string TSDBtype = ts.structure()->getField(index)->getTSDBType();
		if (TSDBtype == "Timestamp" || TSDBtype == "Double")
		{
			if (TYPEOF(appendData[(string) appendDataNames[i]]) != REALSXP)
			{
				throw std::runtime_error(
				    TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype));
			}
		}
		else if (TSDBtype == "Int8" || TSDBtype == "Int32" || TSDBtype == "Date")
		{
		    if (TYPEOF(appendData[(string) appendDataNames[i]]) != INTSXP)
			{
				throw std::runtime_error(
				    TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype));
			}
		}
		else if (TSDBtype == "String")
		{
		    if (TYPEOF(appendData[(string) appendDataNames[i]]) != STRSXP)
			{
				throw std::runtime_error(
				    TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype));
			}
		}

		//checking existence of table fields in the data frame
		string fieldName = ts.structure()->getField(i)->getName();
		int check = (int) std::count(appendDataNames.begin(),appendDataNames.end(),fieldName.c_str());
		if (check != 1)
		{
			if (check == 0)
				throw std::runtime_error("Field " + fieldName + " was not found "
						"in the appending data.");
			else
				throw std::runtime_error("Field " + fieldName + " was found "
						"multiple times.");
		}

		//checking for uniform length in data frame.
		//this could be an issue if a named list is passed as an argument.
		if (numRecordsToAppend != LENGTH(appendData[(string) appendDataNames[0]]))
			throw std::runtime_error("All fields in the appending data must have"
					" the same length/");
	}

	/**************************************
	CHECKS PASSED. APPENDING DATA.
	**************************************/
	//vector of formatted input, ready for appending
	/*
	http://stackoverflow.com/questions/7251253/c-no-matching-function-for-call-but-the-candidate-has-the-exact-same-signatur
	explains why 'tsdb::RecordSet records((size_t) numRecordsToAppend, ts.structure());'
	would not work.
	*/
	boost::shared_ptr<tsdb::Structure> tsStructure = ts.structure();
	tsdb::RecordSet records((size_t) numRecordsToAppend, tsStructure);

	for (size_t dfIndex=0; dfIndex<appendingFieldCount; dfIndex++)
	{
		string dfName = (string) appendDataNames[dfIndex];

		//index in the TSDB table
		size_t tableIndex = ts.structure()->getFieldIndexByName(dfName);
		string TSDBtype = ts.structure()->getField(tableIndex)->getTSDBType();

		if (TSDBtype == "Timestamp")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::timestamp_t) (REAL(dfColumn)[row]);
		}
		else if (TSDBtype == "Double")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::ieee64_t) (REAL(dfColumn)[row]);
		}
		else if (TSDBtype == "Int8")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::int8_t) (INTEGER(dfColumn)[row]);
		}
		else if (TSDBtype == "Int32")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::int32_t) (INTEGER(dfColumn)[row]);
		}
		else if (TSDBtype == "Date")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::date_t) (INTEGER(dfColumn)[row]);
		}
		else if (TSDBtype.find("String") != std::string::npos)
		{
			Rcpp::StringVector dfColumn(VECTOR_ELT(_appendData,dfIndex));

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (string) dfColumn[row];
		}
	}

	//appending the data
	bool discardOverlap = Rcpp::as<bool>(_discardOverlap);
	ts.appendRecordSet(records,discardOverlap);

	return Rcpp::wrap(1);
}
catch( std::exception &ex ) {
	forward_exception_to_r(ex);
} catch(...) {
	::Rf_error( "c++ exception (unknown reason)" );
}
return R_NilValue;
}
Ejemplo n.º 11
0
/**
<summary> Creates a new timeseries within an HDF5 file.</summary>
<param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param>
<param name="seriesName">String argument for the timeseries name.</param>
<param name="seriesDescription">String argument for timeseries' description.</param>
<param name="fields">Data frame argument containing two named elements:
-fieldNames - names of the fields
-fieldTypes - types of the fields
These elements must have the same dimension.</param>
<returns> Returns a non-negative integer HDF5 group identifier if successful; otherwise
returns a negative value. </returns>
*/
SEXP TSDBcreate_timeseries(SEXP _groupID, SEXP _seriesName,
		SEXP _seriesDescription, SEXP _fields)
{
try {
	using namespace std;
	//checking arguments
	if (TYPEOF(_groupID) != INTSXP)
		throw std::runtime_error("Group ID should be an integer argument.");

	if (TYPEOF(_seriesName) != STRSXP)
		throw std::runtime_error("Timeseries name should be a string argument.");

	if (TYPEOF(_seriesDescription) != STRSXP)
		throw std::runtime_error("Timeseries description should be a string argument.");

	if (TYPEOF(_fields) != VECSXP)
		throw std::runtime_error("Fields argument should be a named list.");

	//checking properties of the list
	Rcpp::List fieldsList(_fields);
	Rcpp::StringVector listNames = fieldsList.attr("names");

	//checking for the existence of 'fieldNames' and 'fieldTypes' list elements
	int check = (int) std::count(listNames.begin(),listNames.end(),"fieldNames");
	if (check != 1)
		throw std::runtime_error("Fields argument should have one element named 'fieldNames'.");

	check = (int) std::count(listNames.begin(),listNames.end(),"fieldTypes");
	if (check != 1)
		throw std::runtime_error("Fields argument should have one element named 'fieldTypes'.");

	Rcpp::StringVector fieldNames = fieldsList["fieldNames"];
	Rcpp::StringVector fieldTypes = fieldsList["fieldTypes"];

	if (fieldNames.length() != fieldNames.length())
		throw std::runtime_error("'fieldNames' and 'fieldTypes' should have the same length.");

	//grabbing other arguments
	string seriesName = Rcpp::as<string>(_seriesName);
	string seriesDescription = Rcpp::as<string>(_seriesDescription);
	int groupID = Rcpp::as<int>(_groupID);

	if (groupID < 0)
		throw std::runtime_error("Invalid group ID.");

	//building structure of the new timeseries
	int numFields = fieldNames.length();
	vector<tsdb::Field*> fields;

	//first field is always the timestamp
	fields.push_back(new tsdb::TimestampField("TSDB_timestamp"));

	for (int i=0; i<numFields; i++)
	{
		string name = (string) fieldNames[i];
		string type = (string) fieldTypes[i];

		//transforming to lower case, so the comparison is case INsensitive
		boost::to_lower(type);
		if (type == "int8")
			fields.push_back(new tsdb::Int8Field(name));
		else if (type == "int32")
			fields.push_back(new tsdb::Int32Field(name));
		else if (type == "double")
			fields.push_back(new tsdb::DoubleField(name));
		else if (type == "date")
			fields.push_back(new tsdb::DateField(name));
		else if (type.find("string") != std::string::npos)
		{
			char* stringLengthPr = strtok((char*) type.c_str(),"(");
			if (stringLengthPr == NULL)
				throw std::runtime_error("Field type for a string must have the form 'string(n)', for some integer n.");

			stringLengthPr = strtok(NULL, ")");
			if (stringLengthPr == NULL)
				throw std::runtime_error("Field type for a string must have the form 'string(n)', for some integer n.");

			int stringLength = atoi(stringLengthPr);
			fields.push_back(new tsdb::StringField(name,stringLength));
		}
	}

	boost::shared_ptr<tsdb::Structure> st =
			boost::make_shared<tsdb::Structure>(fields,false);
	tsdb::Timeseries ts =
			tsdb::Timeseries(groupID,seriesName,seriesDescription,st);

	Rcpp::IntegerVector status(1);
	status[0] = 1;
	return status;
}
catch( std::exception &ex ) {
	forward_exception_to_r(ex);
} catch(...) {
	::Rf_error( "c++ exception (unknown reason)" );
}
return R_NilValue;
}
Ejemplo n.º 12
0
/**
<summary> Pulls records from the timeseries.</summary>
<param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param>
<param name="seriesName">String argument for the timeseries name.</param>
<param name="startTimestamp"> Double argument for the first wanted record. Since R doesn't
have a 64 bit integer, this argument will be received by the library as a double,
representing milliseconds since January 1, 1970. However, within R the argument can
be a string following the format '2009-04-01 16:30:00.003', which will be converted to
a double, or simply as a double. As long as the the argument respresents a time less
than 2^53 seconds from the epoch, which will happen in a few hundreds of thousands
of years, there should be no loss of precision.</param>
<param name="lastTimestamp"> Double arugment for the last timestamp of the wanted record.</param>
<param name="fieldsWanted"> Character vectora rgument for the wanted fields.</param>
<returns> Returns a data frame of wanted fields for the wanted records.</returns>
*/
SEXP TSDBget_records(SEXP _groupID, SEXP _seriesName,
		SEXP _startTimestamp, SEXP _endTimestamp, SEXP _fieldsWanted)
{
try {
	using namespace std;

	//checking arguments
	if (TYPEOF(_groupID) != INTSXP)
		throw std::runtime_error("Group ID should an integer argument.");

	if (TYPEOF(_seriesName) != STRSXP)
		throw std::runtime_error("Timeseries name should a string.");

	if (TYPEOF(_startTimestamp) != REALSXP || TYPEOF(_endTimestamp) != REALSXP)
		throw std::runtime_error("Timestamp arguments must have type double.");

	//fieldsWanted might be an empty argument
	if (TYPEOF(_fieldsWanted) != STRSXP && TYPEOF(_fieldsWanted) != NILSXP)
		throw std::runtime_error("Timestamp arguments must have type double.");

	//getting arguments
	hid_t groupID = (hid_t) Rcpp::as<int>(_groupID);
	string seriesName = Rcpp::as<std::string>(_seriesName);
	tsdb::timestamp_t startTimestamp = (tsdb::timestamp_t) Rcpp::as<double>(_startTimestamp);
	tsdb::timestamp_t endTimestamp = (tsdb::timestamp_t) Rcpp::as<double>(_endTimestamp);
	Rcpp::StringVector fieldsWanted;
	size_t numFieldsWanted;
	vector<size_t> indicesWanted;

	if (groupID < 0)
		throw std::runtime_error("Invalid group ID.");

	//creating timeseries object
	tsdb::Timeseries ts(groupID, seriesName);

	if (TYPEOF(_fieldsWanted) != NILSXP)
	{
		//figuring out the indices of wanted columns
		fieldsWanted = Rcpp::StringVector(_fieldsWanted);
		numFieldsWanted = fieldsWanted.length();
		for (size_t i=0; i<numFieldsWanted; i++)
		{
			size_t index = ts.structure()->
					getFieldIndexByName((char*)fieldsWanted[i]);
			indicesWanted.push_back(index);
		}
	}
	else
	{
		//grabbing all columns
		numFieldsWanted = ts.structure()->getNFields();
		char** fieldNames = ts.structure()->getNameOfFieldsAsArray();
		fieldsWanted = Rcpp::StringVector(numFieldsWanted);
		for (size_t i=0; i<numFieldsWanted; i++)
		{
			indicesWanted.push_back(i);
			fieldsWanted[i] = fieldNames[i];
		}
	}

	//loading the records into memory
	tsdb::RecordSet recordSet = ts.recordSet(startTimestamp, endTimestamp);
	size_t numRecords = recordSet.size();

	Rcpp::List records; //record container

	//looping through wanted columns
	for (size_t i = 0; i<numFieldsWanted; i++)
	{
		size_t index = indicesWanted[i];

		//type of the column
		string fieldType = ts.structure()->getField(index)->getTSDBType();

		if (fieldType == "Timestamp")
		{
			Rcpp::NumericVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toTimestamp();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
		if (fieldType == "Date")
		{
			Rcpp::IntegerVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toDate();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
		if (fieldType == "Int8")
		{
			Rcpp::IntegerVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toInt8();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
		if (fieldType == "Int32")
		{
			Rcpp::IntegerVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toInt32();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
		if (fieldType == "Double")
		{
			Rcpp::NumericVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toDouble();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
		if (fieldType.find("String") != std::string::npos)
		{
			Rcpp::StringVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toString();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
	}

	return Rcpp::DataFrame::create(records);
}
catch( std::exception &ex ) {
	forward_exception_to_r(ex);
} catch(...) {
	::Rf_error( "c++ exception (unknown reason)" );
}
return R_NilValue;
}