예제 #1
0
파일: tsdbR.cpp 프로젝트: afiedler/tsdb
/**
<summary> Appends data onto an existing timeseries.</summary>
<remarks> There are several checks which will be performed. All must pass.
- appropriate data types between the timeseries field and the appending data
- all fields of the timeseries must exist in the appending data
- all fields of the data frame must have the same length</remarks>

<param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param>
<param name="seriesName">String argument for the timeseries name.</param>
<param name="appendData">Data frame argument of the data to append.</param>
<param name="discardOverlap">If true, appended records that overlap with
existing records are discarded. </param>
<returns> Returns 1 if successful.</returns>
*/
SEXP TSDBappend(SEXP _groupID, SEXP _seriesName, SEXP _appendData,
				SEXP _discardOverlap)
{
try {
	using namespace std;
	//checking arguments
	if (TYPEOF(_groupID) != INTSXP)
		throw std::runtime_error("Group ID should be an integer argument.");

	if (TYPEOF(_seriesName) != STRSXP)
		throw std::runtime_error("Timeseries name should be a string argument.");

	if (TYPEOF(_appendData) != VECSXP)
		throw std::runtime_error("Data should be a data frame.");

	/******************************************************
	CHECKING PROPERTIES OF THE TABLE AND THE APPENDING DATA
	*******************************************************/
	//properties of the table
	string seriesName = Rcpp::as<string>(_seriesName);
	int groupID = Rcpp::as<int>(_groupID);
	tsdb::Timeseries ts(groupID, seriesName);
	size_t tableFieldCount = ts.structure()->getNFields();

	//properties of the appending data frame
	Rcpp::List appendData(_appendData);
	Rcpp::StringVector appendDataNames = appendData.attr("names");
	size_t appendingFieldCount = appendDataNames.length();

	//checking for number of fields
	if (appendingFieldCount != tableFieldCount)
		throw std::runtime_error("Appending data frame and the TSDB table "
				"have a different number of fields.");

	//checking to see if the field in the appending data exists in the
	//TSDB table; also checking for types
	int numRecordsToAppend = LENGTH(appendData[(string) appendDataNames[0]]);
	for (size_t i=0; i<appendingFieldCount; i++)
	{
		//if the index isn't found, an exception will occur
		size_t index = ts.structure()->getFieldIndexByName((string) appendDataNames[i]);

		//checking the types match
		string TSDBtype = ts.structure()->getField(index)->getTSDBType();
		if (TSDBtype == "Timestamp" || TSDBtype == "Double")
		{
			if (TYPEOF(appendData[(string) appendDataNames[i]]) != REALSXP)
			{
				throw std::runtime_error(
				    TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype));
			}
		}
		else if (TSDBtype == "Int8" || TSDBtype == "Int32" || TSDBtype == "Date")
		{
		    if (TYPEOF(appendData[(string) appendDataNames[i]]) != INTSXP)
			{
				throw std::runtime_error(
				    TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype));
			}
		}
		else if (TSDBtype == "String")
		{
		    if (TYPEOF(appendData[(string) appendDataNames[i]]) != STRSXP)
			{
				throw std::runtime_error(
				    TSDBappendErrorMessage((string) appendDataNames[i],TSDBtype));
			}
		}

		//checking existence of table fields in the data frame
		string fieldName = ts.structure()->getField(i)->getName();
		int check = (int) std::count(appendDataNames.begin(),appendDataNames.end(),fieldName.c_str());
		if (check != 1)
		{
			if (check == 0)
				throw std::runtime_error("Field " + fieldName + " was not found "
						"in the appending data.");
			else
				throw std::runtime_error("Field " + fieldName + " was found "
						"multiple times.");
		}

		//checking for uniform length in data frame.
		//this could be an issue if a named list is passed as an argument.
		if (numRecordsToAppend != LENGTH(appendData[(string) appendDataNames[0]]))
			throw std::runtime_error("All fields in the appending data must have"
					" the same length/");
	}

	/**************************************
	CHECKS PASSED. APPENDING DATA.
	**************************************/
	//vector of formatted input, ready for appending
	/*
	http://stackoverflow.com/questions/7251253/c-no-matching-function-for-call-but-the-candidate-has-the-exact-same-signatur
	explains why 'tsdb::RecordSet records((size_t) numRecordsToAppend, ts.structure());'
	would not work.
	*/
	boost::shared_ptr<tsdb::Structure> tsStructure = ts.structure();
	tsdb::RecordSet records((size_t) numRecordsToAppend, tsStructure);

	for (size_t dfIndex=0; dfIndex<appendingFieldCount; dfIndex++)
	{
		string dfName = (string) appendDataNames[dfIndex];

		//index in the TSDB table
		size_t tableIndex = ts.structure()->getFieldIndexByName(dfName);
		string TSDBtype = ts.structure()->getField(tableIndex)->getTSDBType();

		if (TSDBtype == "Timestamp")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::timestamp_t) (REAL(dfColumn)[row]);
		}
		else if (TSDBtype == "Double")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::ieee64_t) (REAL(dfColumn)[row]);
		}
		else if (TSDBtype == "Int8")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::int8_t) (INTEGER(dfColumn)[row]);
		}
		else if (TSDBtype == "Int32")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::int32_t) (INTEGER(dfColumn)[row]);
		}
		else if (TSDBtype == "Date")
		{
			SEXP dfColumn = VECTOR_ELT(_appendData,dfIndex);

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (tsdb::date_t) (INTEGER(dfColumn)[row]);
		}
		else if (TSDBtype.find("String") != std::string::npos)
		{
			Rcpp::StringVector dfColumn(VECTOR_ELT(_appendData,dfIndex));

			for (int row=0; row<numRecordsToAppend; row++)
				records[row][tableIndex] = (string) dfColumn[row];
		}
	}

	//appending the data
	bool discardOverlap = Rcpp::as<bool>(_discardOverlap);
	ts.appendRecordSet(records,discardOverlap);

	return Rcpp::wrap(1);
}
catch( std::exception &ex ) {
	forward_exception_to_r(ex);
} catch(...) {
	::Rf_error( "c++ exception (unknown reason)" );
}
return R_NilValue;
}
예제 #2
0
파일: tsdbR.cpp 프로젝트: afiedler/tsdb
/**
<summary> Creates a new timeseries within an HDF5 file.</summary>
<param name="groupID">Integer argument for the HDF5 file identifier (group ID).</param>
<param name="seriesName">String argument for the timeseries name.</param>
<param name="seriesDescription">String argument for timeseries' description.</param>
<param name="fields">Data frame argument containing two named elements:
-fieldNames - names of the fields
-fieldTypes - types of the fields
These elements must have the same dimension.</param>
<returns> Returns a non-negative integer HDF5 group identifier if successful; otherwise
returns a negative value. </returns>
*/
SEXP TSDBcreate_timeseries(SEXP _groupID, SEXP _seriesName,
		SEXP _seriesDescription, SEXP _fields)
{
try {
	using namespace std;
	//checking arguments
	if (TYPEOF(_groupID) != INTSXP)
		throw std::runtime_error("Group ID should be an integer argument.");

	if (TYPEOF(_seriesName) != STRSXP)
		throw std::runtime_error("Timeseries name should be a string argument.");

	if (TYPEOF(_seriesDescription) != STRSXP)
		throw std::runtime_error("Timeseries description should be a string argument.");

	if (TYPEOF(_fields) != VECSXP)
		throw std::runtime_error("Fields argument should be a named list.");

	//checking properties of the list
	Rcpp::List fieldsList(_fields);
	Rcpp::StringVector listNames = fieldsList.attr("names");

	//checking for the existence of 'fieldNames' and 'fieldTypes' list elements
	int check = (int) std::count(listNames.begin(),listNames.end(),"fieldNames");
	if (check != 1)
		throw std::runtime_error("Fields argument should have one element named 'fieldNames'.");

	check = (int) std::count(listNames.begin(),listNames.end(),"fieldTypes");
	if (check != 1)
		throw std::runtime_error("Fields argument should have one element named 'fieldTypes'.");

	Rcpp::StringVector fieldNames = fieldsList["fieldNames"];
	Rcpp::StringVector fieldTypes = fieldsList["fieldTypes"];

	if (fieldNames.length() != fieldNames.length())
		throw std::runtime_error("'fieldNames' and 'fieldTypes' should have the same length.");

	//grabbing other arguments
	string seriesName = Rcpp::as<string>(_seriesName);
	string seriesDescription = Rcpp::as<string>(_seriesDescription);
	int groupID = Rcpp::as<int>(_groupID);

	if (groupID < 0)
		throw std::runtime_error("Invalid group ID.");

	//building structure of the new timeseries
	int numFields = fieldNames.length();
	vector<tsdb::Field*> fields;

	//first field is always the timestamp
	fields.push_back(new tsdb::TimestampField("TSDB_timestamp"));

	for (int i=0; i<numFields; i++)
	{
		string name = (string) fieldNames[i];
		string type = (string) fieldTypes[i];

		//transforming to lower case, so the comparison is case INsensitive
		boost::to_lower(type);
		if (type == "int8")
			fields.push_back(new tsdb::Int8Field(name));
		else if (type == "int32")
			fields.push_back(new tsdb::Int32Field(name));
		else if (type == "double")
			fields.push_back(new tsdb::DoubleField(name));
		else if (type == "date")
			fields.push_back(new tsdb::DateField(name));
		else if (type.find("string") != std::string::npos)
		{
			char* stringLengthPr = strtok((char*) type.c_str(),"(");
			if (stringLengthPr == NULL)
				throw std::runtime_error("Field type for a string must have the form 'string(n)', for some integer n.");

			stringLengthPr = strtok(NULL, ")");
			if (stringLengthPr == NULL)
				throw std::runtime_error("Field type for a string must have the form 'string(n)', for some integer n.");

			int stringLength = atoi(stringLengthPr);
			fields.push_back(new tsdb::StringField(name,stringLength));
		}
	}

	boost::shared_ptr<tsdb::Structure> st =
			boost::make_shared<tsdb::Structure>(fields,false);
	tsdb::Timeseries ts =
			tsdb::Timeseries(groupID,seriesName,seriesDescription,st);

	Rcpp::IntegerVector status(1);
	status[0] = 1;
	return status;
}
catch( std::exception &ex ) {
	forward_exception_to_r(ex);
} catch(...) {
	::Rf_error( "c++ exception (unknown reason)" );
}
return R_NilValue;
}