Beispiel #1
0
void graph_from_df (Rcpp::DataFrame gr, vertex_map_t &vm,
        edge_map_t &edge_map, vert2edge_map_t &vert2edge_map)
{
    Rcpp::StringVector from = gr ["from_id"];
    Rcpp::StringVector to = gr ["to_id"];
    Rcpp::NumericVector from_lon = gr ["from_lon"];
    Rcpp::NumericVector from_lat = gr ["from_lat"];
    Rcpp::NumericVector to_lon = gr ["to_lon"];
    Rcpp::NumericVector to_lat = gr ["to_lat"];
    Rcpp::NumericVector edge_id = gr ["edge_id"];
    Rcpp::NumericVector dist = gr ["d"];
    Rcpp::NumericVector weight = gr ["d_weighted"];
    Rcpp::StringVector hw = gr ["highway"];

    for (int i = 0; i < to.length (); i ++)
    {
        osm_id_t from_id = std::string (from [i]);
        osm_id_t to_id = std::string (to [i]);

        if (vm.find (from_id) == vm.end ())
        {
            osm_vertex_t fromV = osm_vertex_t ();
            fromV.set_lat (from_lat [i]);
            fromV.set_lon (from_lon [i]);
            vm.emplace (from_id, fromV);
        }
        osm_vertex_t from_vtx = vm.at (from_id);
        from_vtx.add_neighbour_out (to_id);
        vm [from_id] = from_vtx;

        if (vm.find (to_id) == vm.end ())
        {
            osm_vertex_t toV = osm_vertex_t ();
            toV.set_lat (to_lat [i]);
            toV.set_lon (to_lon [i]);
            vm.emplace (to_id, toV);
        }
        osm_vertex_t to_vtx = vm.at (to_id);
        to_vtx.add_neighbour_in (from_id);
        vm [to_id] = to_vtx;

        std::set <int> replacementEdges;
        osm_edge_t edge = osm_edge_t (from_id, to_id, dist [i], weight [i],
                std::string (hw [i]), edge_id [i], replacementEdges);
        edge_map.emplace (edge_id [i], edge);
        add_to_edge_map (vert2edge_map, from_id, edge_id [i]);
        add_to_edge_map (vert2edge_map, to_id, edge_id [i]);
    }
}
Beispiel #2
0
/**
<summary> Creates a new timeseries within an HDF5 file.</summary>
<param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param>
<param name="seriesName">String argument for the timeseries name.</param>
<param name="seriesDescription">String argument for timeseries' description.</param>
<param name="fields">Data frame argument containing two named elements:
-fieldNames - names of the fields
-fieldTypes - types of the fields
These elements must have the same dimension.</param>
<returns> Returns a non-negative integer HDF5 group identifier if successful; otherwise
returns a negative value. </returns>
*/
SEXP TSDBcreate_timeseries(SEXP _groupID, SEXP _seriesName,
		SEXP _seriesDescription, SEXP _fields)
{
try {
	using namespace std;
	//checking arguments
	if (TYPEOF(_groupID) != INTSXP)
		throw std::runtime_error("Group ID should be an integer argument.");

	if (TYPEOF(_seriesName) != STRSXP)
		throw std::runtime_error("Timeseries name should be a string argument.");

	if (TYPEOF(_seriesDescription) != STRSXP)
		throw std::runtime_error("Timeseries description should be a string argument.");

	if (TYPEOF(_fields) != VECSXP)
		throw std::runtime_error("Fields argument should be a named list.");

	//checking properties of the list
	Rcpp::List fieldsList(_fields);
	Rcpp::StringVector listNames = fieldsList.attr("names");

	//checking for the existence of 'fieldNames' and 'fieldTypes' list elements
	int check = (int) std::count(listNames.begin(),listNames.end(),"fieldNames");
	if (check != 1)
		throw std::runtime_error("Fields argument should have one element named 'fieldNames'.");

	check = (int) std::count(listNames.begin(),listNames.end(),"fieldTypes");
	if (check != 1)
		throw std::runtime_error("Fields argument should have one element named 'fieldTypes'.");

	Rcpp::StringVector fieldNames = fieldsList["fieldNames"];
	Rcpp::StringVector fieldTypes = fieldsList["fieldTypes"];

	if (fieldNames.length() != fieldNames.length())
		throw std::runtime_error("'fieldNames' and 'fieldTypes' should have the same length.");

	//grabbing other arguments
	string seriesName = Rcpp::as<string>(_seriesName);
	string seriesDescription = Rcpp::as<string>(_seriesDescription);
	int groupID = Rcpp::as<int>(_groupID);

	if (groupID < 0)
		throw std::runtime_error("Invalid group ID.");

	//building structure of the new timeseries
	int numFields = fieldNames.length();
	vector<tsdb::Field*> fields;

	//first field is always the timestamp
	fields.push_back(new tsdb::TimestampField("TSDB_timestamp"));

	for (int i=0; i<numFields; i++)
	{
		string name = (string) fieldNames[i];
		string type = (string) fieldTypes[i];

		//transforming to lower case, so the comparison is case INsensitive
		boost::to_lower(type);
		if (type == "int8")
			fields.push_back(new tsdb::Int8Field(name));
		else if (type == "int32")
			fields.push_back(new tsdb::Int32Field(name));
		else if (type == "double")
			fields.push_back(new tsdb::DoubleField(name));
		else if (type == "date")
			fields.push_back(new tsdb::DateField(name));
		else if (type.find("string") != std::string::npos)
		{
			char* stringLengthPr = strtok((char*) type.c_str(),"(");
			if (stringLengthPr == NULL)
				throw std::runtime_error("Field type for a string must have the form 'string(n)', for some integer n.");

			stringLengthPr = strtok(NULL, ")");
			if (stringLengthPr == NULL)
				throw std::runtime_error("Field type for a string must have the form 'string(n)', for some integer n.");

			int stringLength = atoi(stringLengthPr);
			fields.push_back(new tsdb::StringField(name,stringLength));
		}
	}

	boost::shared_ptr<tsdb::Structure> st =
			boost::make_shared<tsdb::Structure>(fields,false);
	tsdb::Timeseries ts =
			tsdb::Timeseries(groupID,seriesName,seriesDescription,st);

	Rcpp::IntegerVector status(1);
	status[0] = 1;
	return status;
}
catch( std::exception &ex ) {
	forward_exception_to_r(ex);
} catch(...) {
	::Rf_error( "c++ exception (unknown reason)" );
}
return R_NilValue;
}
Beispiel #3
0
/**
<summary> Appends data onto an existing timeseries.</summary>
<remarks> There are several checks which will be performed. All must pass.
- appropriate data types between the timeseries field and the appending data
- all fields of the timeseries must exist in the appending data
- all fields of the data frame must have the same length</remarks>

<param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param>
<param name="seriesName">String argument for the timeseries name.</param>
<param name="appendData">Data frame argument of the data to append.</param>
<param name="discardOverlap">If true, appended records that overlap with
existing records are discarded. </param>
<returns> Returns 1 if successful.</returns>
*/
SEXP TSDBappend(SEXP _groupID, SEXP _seriesName, SEXP _appendData,
				SEXP _discardOverlap)
{
try {
	using namespace std;
	//checking arguments
	if (TYPEOF(_groupID) != INTSXP)
		throw std::runtime_error("Group ID should be an integer argument.");

	if (TYPEOF(_seriesName) != STRSXP)
		throw std::runtime_error("Timeseries name should be a string argument.");

	if (TYPEOF(_appendData) != VECSXP)
		throw std::runtime_error("Data should be a data frame.");

	/******************************************************
	CHECKING PROPERTIES OF THE TABLE AND THE APPENDING DATA
	*******************************************************/
	//properties of the table
	string seriesName = Rcpp::as<string>(_seriesName);
	int groupID = Rcpp::as<int>(_groupID);
	tsdb::Timeseries ts(groupID, seriesName);
	size_t tableFieldCount = ts.structure()->getNFields();

	//properties of the appending data frame
	Rcpp::List appendData(_appendData);
	Rcpp::StringVector appendDataNames = appendData.attr("names");
	size_t appendingFieldCount = appendDataNames.length();

	//checking for number of fields
	if (appendingFieldCount != tableFieldCount)
		throw std::runtime_error("Appending data frame and the TSDB table "
				"have a different number of fields.");

	//checking to see if the field in the appending data exists in the
	//TSDB table; also checking for types
	int numRecordsToAppend = LENGTH(appendData[(string) appendDataNames[0]]);
	for (size_t i=0; i<appendingFieldCount; i++)
	{
		//if the index isn't found, an exception will occur
		size_t index = ts.structure()->getFieldIndexByName((string) appendDataNames[i]);

		//checking the types match
		string TSDBtype = ts.structure()->getField(index)->getTSDBType();
		if (TSDBtype == "Timestamp" || TSDBtype == "Double")
		{
			if (TYPEOF(appendData[(string) appendDataNames[i]]) != REALSXP)
			{
				throw std::runtime_error(
				    TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype));
			}
		}
		else if (TSDBtype == "Int8" || TSDBtype == "Int32" || TSDBtype == "Date")
		{
		    if (TYPEOF(appendData[(string) appendDataNames[i]]) != INTSXP)
			{
				throw std::runtime_error(
				    TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype));
			}
		}
		else if (TSDBtype == "String")
		{
		    if (TYPEOF(appendData[(string) appendDataNames[i]]) != STRSXP)
			{
				throw std::runtime_error(
				    TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype));
			}
		}

		//checking existence of table fields in the data frame
		string fieldName = ts.structure()->getField(i)->getName();
		int check = (int) std::count(appendDataNames.begin(),appendDataNames.end(),fieldName.c_str());
		if (check != 1)
		{
			if (check == 0)
				throw std::runtime_error("Field " + fieldName + " was not found "
						"in the appending data.");
			else
				throw std::runtime_error("Field " + fieldName + " was found "
						"multiple times.");
		}

		//checking for uniform length in data frame.
		//this could be an issue if a named list is passed as an argument.
		if (numRecordsToAppend != LENGTH(appendData[(string) appendDataNames[0]]))
			throw std::runtime_error("All fields in the appending data must have"
					" the same length/");
	}

	/**************************************
	CHECKS PASSED. APPENDING DATA.
	**************************************/
	//vector of formatted input, ready for appending
	/*
	http://stackoverflow.com/questions/7251253/c-no-matching-function-for-call-but-the-candidate-has-the-exact-same-signatur
	explains why 'tsdb::RecordSet records((size_t) numRecordsToAppend, ts.structure());'
	would not work.
	*/
	boost::shared_ptr<tsdb::Structure> tsStructure = ts.structure();
	tsdb::RecordSet records((size_t) numRecordsToAppend, tsStructure);

	for (size_t dfIndex=0; dfIndex<appendingFieldCount; dfIndex++)
	{
		string dfName = (string) appendDataNames[dfIndex];

		//index in the TSDB table
		size_t tableIndex = ts.structure()->getFieldIndexByName(dfName);
		string TSDBtype = ts.structure()->getField(tableIndex)->getTSDBType();

		if (TSDBtype == "Timestamp")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::timestamp_t) (REAL(dfColumn)[row]);
		}
		else if (TSDBtype == "Double")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::ieee64_t) (REAL(dfColumn)[row]);
		}
		else if (TSDBtype == "Int8")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::int8_t) (INTEGER(dfColumn)[row]);
		}
		else if (TSDBtype == "Int32")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::int32_t) (INTEGER(dfColumn)[row]);
		}
		else if (TSDBtype == "Date")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::date_t) (INTEGER(dfColumn)[row]);
		}
		else if (TSDBtype.find("String") != std::string::npos)
		{
			Rcpp::StringVector dfColumn(VECTOR_ELT(_appendData,dfIndex));

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (string) dfColumn[row];
		}
	}

	//appending the data
	bool discardOverlap = Rcpp::as<bool>(_discardOverlap);
	ts.appendRecordSet(records,discardOverlap);

	return Rcpp::wrap(1);
}
catch( std::exception &ex ) {
	forward_exception_to_r(ex);
} catch(...) {
	::Rf_error( "c++ exception (unknown reason)" );
}
return R_NilValue;
}
Beispiel #4
0
/**
<summary> Pulls records from the timeseries.</summary>
<param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param>
<param name="seriesName">String argument for the timeseries name.</param>
<param name="startTimestamp"> Double argument for the first wanted record. Since R doesn't
have a 64 bit integer, this argument will be received by the library as a double,
representing milliseconds since January 1, 1970. However, within R the argument can
be a string following the format '2009-04-01 16:30:00.003', which will be converted to
a double, or simply as a double. As long as the the argument respresents a time less
than 2^53 seconds from the epoch, which will happen in a few hundreds of thousands
of years, there should be no loss of precision.</param>
<param name="lastTimestamp"> Double arugment for the last timestamp of the wanted record.</param>
<param name="fieldsWanted"> Character vectora rgument for the wanted fields.</param>
<returns> Returns a data frame of wanted fields for the wanted records.</returns>
*/
SEXP TSDBget_records(SEXP _groupID, SEXP _seriesName,
		SEXP _startTimestamp, SEXP _endTimestamp, SEXP _fieldsWanted)
{
try {
	using namespace std;

	//checking arguments
	if (TYPEOF(_groupID) != INTSXP)
		throw std::runtime_error("Group ID should an integer argument.");

	if (TYPEOF(_seriesName) != STRSXP)
		throw std::runtime_error("Timeseries name should a string.");

	if (TYPEOF(_startTimestamp) != REALSXP || TYPEOF(_endTimestamp) != REALSXP)
		throw std::runtime_error("Timestamp arguments must have type double.");

	//fieldsWanted might be an empty argument
	if (TYPEOF(_fieldsWanted) != STRSXP && TYPEOF(_fieldsWanted) != NILSXP)
		throw std::runtime_error("Timestamp arguments must have type double.");

	//getting arguments
	hid_t groupID = (hid_t) Rcpp::as<int>(_groupID);
	string seriesName = Rcpp::as<std::string>(_seriesName);
	tsdb::timestamp_t startTimestamp = (tsdb::timestamp_t) Rcpp::as<double>(_startTimestamp);
	tsdb::timestamp_t endTimestamp = (tsdb::timestamp_t) Rcpp::as<double>(_endTimestamp);
	Rcpp::StringVector fieldsWanted;
	size_t numFieldsWanted;
	vector<size_t> indicesWanted;

	if (groupID < 0)
		throw std::runtime_error("Invalid group ID.");

	//creating timeseries object
	tsdb::Timeseries ts(groupID, seriesName);

	if (TYPEOF(_fieldsWanted) != NILSXP)
	{
		//figuring out the indices of wanted columns
		fieldsWanted = Rcpp::StringVector(_fieldsWanted);
		numFieldsWanted = fieldsWanted.length();
		for (size_t i=0; i<numFieldsWanted; i++)
		{
			size_t index = ts.structure()->
					getFieldIndexByName((char*)fieldsWanted[i]);
			indicesWanted.push_back(index);
		}
	}
	else
	{
		//grabbing all columns
		numFieldsWanted = ts.structure()->getNFields();
		char** fieldNames = ts.structure()->getNameOfFieldsAsArray();
		fieldsWanted = Rcpp::StringVector(numFieldsWanted);
		for (size_t i=0; i<numFieldsWanted; i++)
		{
			indicesWanted.push_back(i);
			fieldsWanted[i] = fieldNames[i];
		}
	}

	//loading the records into memory
	tsdb::RecordSet recordSet = ts.recordSet(startTimestamp, endTimestamp);
	size_t numRecords = recordSet.size();

	Rcpp::List records; //record container

	//looping through wanted columns
	for (size_t i = 0; i<numFieldsWanted; i++)
	{
		size_t index = indicesWanted[i];

		//type of the column
		string fieldType = ts.structure()->getField(index)->getTSDBType();

		if (fieldType == "Timestamp")
		{
			Rcpp::NumericVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toTimestamp();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
		if (fieldType == "Date")
		{
			Rcpp::IntegerVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toDate();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
		if (fieldType == "Int8")
		{
			Rcpp::IntegerVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toInt8();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
		if (fieldType == "Int32")
		{
			Rcpp::IntegerVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toInt32();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
		if (fieldType == "Double")
		{
			Rcpp::NumericVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toDouble();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
		if (fieldType.find("String") != std::string::npos)
		{
			Rcpp::StringVector columnData(numRecords);

			for (size_t row=0; row<numRecords; row++)
				columnData[row] = recordSet[row][index].toString();

			records.push_back(columnData,(char*)fieldsWanted[i]);
		}
	}

	return Rcpp::DataFrame::create(records);
}
catch( std::exception &ex ) {
	forward_exception_to_r(ex);
} catch(...) {
	::Rf_error( "c++ exception (unknown reason)" );
}
return R_NilValue;
}