Ejemplo n.º 1
0
Interaction::Interaction(vector<double> const& instance,vector<bool> const& factor,
Rcpp::DataFrame const& class_data,double epsilon_cont,double epsilon_cat,int p_depth,int nb_class,double p_radius){
    vector<InteractionItem> new_items(class_data.size());
    Rcpp::CharacterVector names = class_data.names();

    for(int i=0;i<class_data.size();++i){
        vector<double> attr_data = Rcpp::as<vector<double> >(class_data[i]);
        string name = Rcpp::as<string>(names[i]);
        if(factor[i]){
            InteractionItem item(name,instance[i],i,epsilon_cat,factor[i],attr_data);
            new_items[i] = item;
        }
        else{
            InteractionItem item(name,instance[i],i,epsilon_cont,factor[i],attr_data);
            new_items[i] = item;
        }
    }
    
    items = new_items;
    depth = p_depth;
    sims_valid = false;
    vector<double> new_sims(nb_class);
    sims = new_sims;
    radius = p_radius;
}
Ejemplo n.º 2
0
bool dataframesAreEqual(Rcpp::DataFrame &a, Rcpp::DataFrame &b) {
    Rcpp::NumericVector a1 = a["a"];
    Rcpp::CharacterVector b1 = a["b"];
    Rcpp::DoubleVector c1 = a["c"];

    Rcpp::NumericVector a2 = b["a"];
    Rcpp::CharacterVector b2 = b["b"];
    Rcpp::DoubleVector c2 = b["c"];

    if(a.size() != b.size()) {
        DLOG(INFO) << "sizes different";
        return false;
    }
    for(int i = 0; i < a1.size(); i++) {
        if(a1[i] != a2[i]) {
            DLOG(INFO) << a1[i] << " vs " << a2[i];
            return false;
        }
        if(b1[i] != b2[i]) {
            DLOG(INFO) << b1[i] << " vs " << b2[i];
            return false;
        }
        if(c1[i] != c2[i]) {
            DLOG(INFO) << c1[i] << " vs " << c2[i];
            return false;
        }
    }
    return true;
}
Ejemplo n.º 3
0
vector<double> random_instance(Rcpp::DataFrame const& x, Generator& prng){
  // Select random row number
  int row = prng();
  
  vector<double> instance(x.size());
  for(int i=0;i<x.size();++i){
    vector<double> col = Rcpp::as<vector<double> >(x[i]);
    instance[i] = col[row];
  }
  
  return instance;
}
Ejemplo n.º 4
0
Rcpp::StringMatrix DataFrame_to_StringMatrix( Rcpp::DataFrame df ){
  Rcpp::StringVector sv = df(0);
  Rcpp::StringMatrix sm(sv.size(), df.size());
  
  sm.attr("col.names") = df.attr("col.names");
  sm.attr("row.names") = df.attr("row.names");

  for(int i=0; i < df.size(); i++){
    sv = df(i);
    for(int j=0; j < sv.size(); j++){
      sm(j, i) = sv(j);
    }
  }

  return sm;
}
Ejemplo n.º 5
0
bool intercrossingGenerations(Rcpp::DataFrame& pedigree, int nFounders, Rcpp::IntegerVector& mpcrossID, std::vector<int>& output)
{
	#define pedFind(id) findIDInPedigree(id, pedigree)
	int nFinals = mpcrossID.size();
	int nPedigreeRows = pedigree.nrows();
	Rcpp::IntegerVector male = Rcpp::as<Rcpp::IntegerVector>(pedigree("Male")), female = Rcpp::as<Rcpp::IntegerVector>(pedigree("Female"));
	for(int finalCounter = 0; finalCounter < nFinals; finalCounter++)
	{
		int currentPedRow = pedFind(mpcrossID[finalCounter]);
		if(currentPedRow < 0 || currentPedRow > nPedigreeRows) return false;

		//Counter to stop if the loop goes too long and might be infinite
		int loopCounter = 0;
		//Pick the last row and proceed backwards up the pedigree until we're through all the selfing generations
		while(male(currentPedRow) == female(currentPedRow))
		{
			int nextPedID = male(currentPedRow);
			if(nextPedID < 0 || nextPedID > nPedigreeRows) return false;
			currentPedRow = pedFind(nextPedID);

			if(currentPedRow < 0 || currentPedRow > nPedigreeRows) return false;

			loopCounter++;
			if(loopCounter > 2000) return false;
		}
		//When we reach an NA in the pedigree the while condition will terminate, which is an error
		if((male(currentPedRow) != male(currentPedRow)) || (female(currentPedRow) != female(currentPedRow)))
		{
			return false;
		}
		int ngen = 0;
		while(male(currentPedRow) > 0)
		{
			int nextPedID = male(currentPedRow);
			if(nextPedID < 0 || nextPedID > nPedigreeRows) return false;
			currentPedRow = pedFind(nextPedID);
			if(currentPedRow < 0 || currentPedRow > nPedigreeRows) return false;

			ngen++;
			if(ngen > 2000) return false;
		}
		//another check for NA values
		if(male(currentPedRow) != male(currentPedRow))
		{
			return false;
		}
		output[finalCounter] = ngen - (int)((log(nFounders) / log(2)) + 0.5);
		if(output[finalCounter] < 0) return false;
	}
	#undef pedFind
	return true;
}
void helper(const std::string& url,
            const bool dropCaches,
            const std::string& objType) {
    if(dropCaches) {
        LOG(INFO) << "Dropping caches ...";
        int res, res2;
        if ((res = system ("sync")) == -1) {
            throw std::runtime_error("Error running sync");
        }
        if ((res2 = system ("sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'")) == -1) {
            throw std::runtime_error("Error dropping caches");
        }
    }

    //std::string extension = base::utils::getExtension(url);
    std::string protocol = base::utils::getProtocol(url);
    std::string filename = base::utils::stripProtocol(url);
    base::IFilePtr file = hdfsutils::FileFactory::makeFile(protocol, filename);
    if( hdfsutils::isHdfs(protocol)) {
        hdfsutils::HdfsFile *p = (hdfsutils::HdfsFile *)file.get();
        base::ConfigurationMap hdfsconf;
        hdfsconf["hdfsConfigurationFile"] = std::string("../ddc/test/data/server.conf");
        p->configure(hdfsconf);
    }
    base::FileStatus status = file->stat();


    base::ConfigurationMap conf;

    std::string schema = "000:int64,"
                  "001:int64,002:int64,003:int64,004:int64,005:int64,006:int64,007:int64,008:int64,"
                  "009:int64,010:int64,011:int64,012:int64,013:int64,014:int64,015:int64,016:int64,"
                  "017:int64,018:int64,019:int64,020:int64,021:int64,022:int64,023:int64,024:int64,"
                  "025:int64,026:int64,027:int64,028:int64,029:int64,030:int64,031:int64,032:int64,"
                  "033:int64,034:int64,035:int64,036:int64,037:int64,038:int64,039:int64,040:int64,"
                  "041:int64,042:int64,043:int64,044:int64,045:int64,046:int64,047:int64,048:int64,"
                  "049:int64,050:int64,051:int64,052:int64,053:int64,054:int64,055:int64,056:int64,"
                  "057:int64,058:int64,059:int64,060:int64,061:int64,062:int64,063:int64,064:int64,"
                  "065:int64,066:int64,067:int64,068:int64,069:int64,070:int64,071:int64,072:int64,"
                  "073:int64,074:int64,075:int64,076:int64,077:int64,078:int64,079:int64,080:int64,"
                  "081:int64,082:int64,083:int64,084:int64,085:int64,086:int64,087:int64,088:int64,"
                  "089:int64,090:int64,091:int64,092:int64,093:int64,094:int64,095:int64,096:int64,"
                  "097:int64,098:int64,099:int64,100:int64,101:int64,102:int64,103:int64,104:int64,"
                  "105:int64,106:int64,107:int64,108:int64,109:int64,110:int64,111:int64,112:int64,"
                  "113:int64,114:int64,115:int64,116:int64,117:int64,118:int64,119:int64,120:int64,"
                  "121:int64,122:int64,123:int64,124:int64,125:int64,126:int64,127:int64";

    conf["schemaUrl"] = schema;
    conf["chunkStart"] = (uint64_t)0;
    conf["chunkEnd"] = (uint64_t)status.length;

    conf["hdfsConfigurationFile"] = std::string("../ddc/test/data/server.conf")   ;

    boost::shared_ptr<Rcpp::DataFrame> dfptr =
        boost::any_cast<boost::shared_ptr<Rcpp::DataFrame>>(ddc_read(url,
                                                                     objType,
                                                                     conf));
        Rcpp::DataFrame df = Rcpp::DataFrame(*(dfptr.get()));

    LOG(INFO) << "Got dataframe of size " << df.size();
}
Ejemplo n.º 7
0
// [[Rcpp::export]]
void write_vcf_body_gz( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) {
  // http://stackoverflow.com/a/5649224
  
  // fix DataFrame
  Rcpp::StringVector chrom  = fix["CHROM"];
  Rcpp::StringVector pos    = fix["POS"];
  Rcpp::StringVector id     = fix["ID"];
  Rcpp::StringVector ref    = fix["REF"];
  Rcpp::StringVector alt    = fix["ALT"];
  Rcpp::StringVector qual   = fix["QUAL"];
  Rcpp::StringVector filter = fix["FILTER"];
  Rcpp::StringVector info   = fix["INFO"];
  
  // gt DataFrame
  Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt);
  Rcpp::StringVector column_names(gt.size());
  column_names = gt.attr("names");
  
  int i = 0;
  int j = 0;
  
  gzFile *fi = (gzFile *)gzopen(filename.c_str(),"ab");
//  gzFile *fi = (gzFile *)gzopen(filename.c_str(),"abw");
  for(i=0; i<chrom.size(); i++){
    Rcpp::checkUserInterrupt();
    if(mask == 1 && filter(i) != "PASS" ){
      // Don't print variant.
    } else {
      std::string tmpstring;
      tmpstring = chrom(i);
      tmpstring = tmpstring + "\t" + pos(i) + "\t";
      if(id(i) == NA_STRING){
        tmpstring = tmpstring + ".";
      } else {
        tmpstring = tmpstring + id(i);
      }
      tmpstring = tmpstring + "\t" + ref(i) + "\t" + alt(i) + "\t";
      if(qual(i) == NA_STRING){
        tmpstring = tmpstring + "." + "\t";
      } else {
        tmpstring = tmpstring + qual(i) + "\t";
      }
      if(filter(i) == NA_STRING){
        tmpstring = tmpstring + "." + "\t";
      } else {
        tmpstring = tmpstring + filter(i) + "\t";
      }
      tmpstring = tmpstring + info(i);

      // gt portion
      for(j=0; j<column_names.size(); j++){
        if(gt_cm(i, j) == NA_STRING){
          tmpstring = tmpstring + "\t" + "./.";
        } else {
          tmpstring = tmpstring + "\t" + gt_cm(i, j);
        }
      }


//      gzwrite(fi,"my decompressed data",strlen("my decompressed data"));
//      gzwrite(fi,"\n",strlen("\n"));
//      std::string tmpstring = "test string\n";
      gzwrite(fi, (char *)tmpstring.c_str(), tmpstring.size());
      
      gzwrite(fi,"\n",strlen("\n"));
    }
  }
  gzclose(fi);
  
  
  return;
}
Ejemplo n.º 8
0
// [[Rcpp::export]]
void write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) {
//int write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) {

  // fix DataFrame
  Rcpp::StringVector chrom  = fix["CHROM"];
  Rcpp::StringVector pos    = fix["POS"];
  Rcpp::StringVector id     = fix["ID"];
  Rcpp::StringVector ref    = fix["REF"];
  Rcpp::StringVector alt    = fix["ALT"];
  Rcpp::StringVector qual   = fix["QUAL"];
  Rcpp::StringVector filter = fix["FILTER"];
  Rcpp::StringVector info   = fix["INFO"];

  // gt DataFrame
  Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt);
  Rcpp::StringVector column_names(gt.size());
  column_names = gt.attr("names");
//  column_names = gt_cm.attr("col.names");
//  delete gt;
  
  int i = 0;
  int j = 0;

  // Uncompressed.
  std::ofstream myfile;
  myfile.open (filename.c_str(), std::ios::out | std::ios::app | std::ios::binary);
  
//  gzFile *fi = (gzFile *)gzopen("file.gz","wb");
  

  for(i=0; i<chrom.size(); i++){
    Rcpp::checkUserInterrupt();
    if(mask == 1 && filter(i) == "PASS" ){
      // Don't print variant.
    } else {
      myfile << chrom(i);
      myfile << "\t";
      myfile << pos(i);
      myfile << "\t";
      if(id(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << id(i);
        myfile << "\t";
      }
      myfile << ref(i);
      myfile << "\t";
      myfile << alt(i);
      myfile << "\t";
      if(qual(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << qual(i);
        myfile << "\t";
      }
      if(filter(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << filter(i);
        myfile << "\t";
      }
      if(info(i) == NA_STRING){
        myfile << ".";
        myfile << "\t";
      } else {
        myfile << info(i);
      }
      
      // gt region.
      myfile << "\t";
      myfile << gt_cm(i, 0);
      for(j=1; j<column_names.size(); j++){
        myfile << "\t";
        myfile << gt_cm(i, j);
      }

      myfile << "\n";
    }
  }

  myfile.close();
  
  return;
}
Ejemplo n.º 9
0
RcppExport SEXP RXMLADiscover(SEXP handle, SEXP request, SEXP rRestrictionsString, SEXP rPropertiesString)
{
	XmlaWebServiceSoapProxy service = XmlaWebServiceSoapProxy(SOAP_XML_DEFAULTNS, SOAP_XML_DEFAULTNS);

	Rcpp::XPtr<XMLAHandle> ptr(handle);
	const char *connectionString = ptr->connectionString;
	std::string propertiesString = CHAR(STRING_ELT(rPropertiesString,0));
	std::string restrictionsString = CHAR(STRING_ELT(rRestrictionsString, 0));

	ns1__Session session;
	std::string sessionId = ptr->sessionID;
	session.SessionId = &sessionId;
	service.soap_header(NULL, NULL, &session, NULL);

	_ns1__Discover discover;
	ns1__Restrictions restrictions;
	ns1__RestrictionList restrictionList;
	ns1__Properties properties;
	ns1__PropertyList propertyList;
	_ns1__DiscoverResponse discoverResponse;

	std::string requestType = CHAR(STRING_ELT(request,0));
	std::transform(requestType.begin(), requestType.end(), requestType.begin(), ::toupper);
	discover.RequestType = &requestType;
	discover.Restrictions = &restrictions;
	restrictions.RestrictionList = &restrictionList;
	discover.Properties = &properties;
	properties.PropertyList = &propertyList;
	if (!propertiesString.empty()) {
		parseKeyValuePairs(&propertiesString, propertyList.__any);
	}
	if (!restrictionsString.empty()) {
		parseKeyValuePairs(&restrictionsString, restrictionList.__any);
	}
	service.userid = ptr->userName;
	service.passwd = ptr->password;

	if (service.Discover(connectionString, NULL, &discover, &discoverResponse) == SOAP_OK) {
		std::string rawXML = "<root xmlns=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"><xsd:schema targetNamespace=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:sql=\"urn:schemas-microsoft-com:xml-sql\" elementFormDefault=\"qualified\">";
		rawXML = rawXML + discoverResponse.return_->ns2__root->xsd__schema + "</xsd:schema></root>";
		char *schema = strdup(rawXML.c_str());
		rapidxml::xml_document<> doc;
		doc.parse<0>(schema);

		// Find XML section containing column names
		rapidxml::xml_node<char> *rowNode = doc.first_node()->first_node()->first_node("xsd:complexType");
		while(rowNode != NULL && strcmp(rowNode->first_attribute("name")->value(), "row") != 0)	{
			rowNode = rowNode->next_sibling("xsd:complexType");
		}

		rapidxml::xml_node<char> *schemaElementNode = rowNode->first_node()->first_node();
		std::vector<char *> rows = discoverResponse.return_->ns2__root->__union_ResultXmlRoot->row;
		Rcpp::DataFrame resultDataFrame;
		Rcpp::CharacterVector colNames;
		char *colName;

		while(schemaElementNode != NULL) {
			colName = schemaElementNode->first_attribute("name")->value();
			colNames.push_back(colName);
			if (schemaElementNode->first_attribute("type") != 0) {
				rowSetParseData(rows, &resultDataFrame, colName, true);
			}
			else {
				rowSetParseData(rows, &resultDataFrame, colName, false);
			}
			schemaElementNode = schemaElementNode->next_sibling();
		}
		resultDataFrame.attr("names") = colNames;
		service.destroy();
		return resultDataFrame;
	}

	else {
		std::cerr << service.fault->faultstring << std::endl;
	}
	service.destroy();
	return Rcpp::wrap(false);
}
Ejemplo n.º 10
0
RcppExport SEXP RXMLAExecute(SEXP handle, SEXP query, SEXP rPropertiesString)
{
	XmlaWebServiceSoapProxy service = XmlaWebServiceSoapProxy(SOAP_XML_DEFAULTNS, SOAP_XML_DEFAULTNS);

	Rcpp::XPtr<XMLAHandle> ptr(handle);
	const char *connectionString = ptr->connectionString;
	std::string propertiesString = CHAR(STRING_ELT(rPropertiesString,0));

	ns1__Session session;
	std::string sessionId = ptr->sessionID;
	session.SessionId = &sessionId;
	service.soap_header(NULL, NULL, &session, NULL);

	_ns1__Execute execute;
	ns1__CommandStatement command;
	ns1__Properties properties;
	ns1__PropertyList propertyList;
	_ns1__ExecuteResponse response;

	std::string statement = CHAR(STRING_ELT(query,0));
	command.Statement = &statement;
	execute.Command = &command;
	execute.Properties = &properties;
	properties.PropertyList = &propertyList;
	if (!propertiesString.empty()) {
		parseKeyValuePairs(&propertiesString, propertyList.__any);
	}
	service.userid = ptr->userName;
	service.passwd = ptr->password;

	if (service.Execute(connectionString, NULL, &execute, &response) == SOAP_OK) {
		// Parse MDDataSet
		if (response.return_->ns4__root != NULL && response.return_->ns4__root->__union_ResultXmlRoot != NULL && response.return_->ns4__root->__union_ResultXmlRoot->Axes != NULL) {
			if (response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis.size() < 3) {
				std::cerr << "Error: No data on Axis1" << std::endl;
				return Rcpp::wrap(false);
			}
			if (response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis.size() > 3) {
				std::cerr << "Error: More than 2 axes not supported" << std::endl;
				return Rcpp::wrap(false);
			}

			ns4__Axes *axes = response.return_->ns4__root->__union_ResultXmlRoot->Axes;
			std::vector<ns4__Cell *> cellDataVector = response.return_->ns4__root->__union_ResultXmlRoot->CellData->Cell;
			int numCols = response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis[0]->__union_Axis->Tuples->Tuple.size();
			int numRows = response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis[1]->__union_Axis->Tuples->Tuple.size();
			int cellDataVectorMember = 0;

			Rcpp::CharacterVector colNames;
			Rcpp::CharacterVector rowNames;
			Rcpp::NumericMatrix resultMatrix(numRows, numCols);

			for (int row = 0; row < numRows; row++)	{
				for (int col = 0; col < numCols; col++)	{
					if (cellDataVector[cellDataVectorMember]->CellOrdinal == ((row * numCols) + col)) {
						resultMatrix(row, col) = *cellDataVector[cellDataVectorMember]->Value;
						if (cellDataVectorMember < cellDataVector.size() - 1) {
							cellDataVectorMember += 1;
						}
					}
					else {
						resultMatrix(row, col) = NA_REAL;
					}
				}
				mdDataSetGetNames(rowNames, axes, row, true);
			}

			for (int col = 0; col < numCols; col++)	{
				mdDataSetGetNames(colNames, axes, col, false);
			}

			colNames.push_front("Row Names");
			Rcpp::DataFrame resultDataFrame(resultMatrix);
			resultDataFrame.push_front(rowNames);
			resultDataFrame.attr("names") = colNames;
			service.destroy();
			return resultDataFrame;
		}
		// Parse RowSet
		else if (response.return_->ns2__root != NULL 
			&& response.return_->ns2__root->xsd__schema != NULL 
			&& response.return_->ns2__root->__union_ResultXmlRoot != NULL 
			&& !response.return_->ns2__root->__union_ResultXmlRoot->row.empty()) {

				std::string rawXML = "<root xmlns=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"><xsd:schema targetNamespace=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:sql=\"urn:schemas-microsoft-com:xml-sql\" elementFormDefault=\"qualified\">";
				rawXML = rawXML + response.return_->ns2__root->xsd__schema + "</xsd:schema></root>";
				char *schema = strdup(rawXML.c_str());
				rapidxml::xml_document<> doc;
				doc.parse<0>(schema);

				// Find XML section containing column names
				rapidxml::xml_node<char> *rowNode = doc.first_node()->first_node()->first_node("xsd:complexType");
				while(rowNode != NULL && strcmp(rowNode->first_attribute("name")->value(), "row") != 0)	{
					rowNode = rowNode->next_sibling("xsd:complexType");
				}

				rapidxml::xml_node<char> *schemaElementNode = rowNode->first_node()->first_node();
				std::vector<char *> rows = response.return_->ns2__root->__union_ResultXmlRoot->row;
				Rcpp::DataFrame resultDataFrame;
				Rcpp::CharacterVector colNames;
				char *colName;

				while(schemaElementNode != NULL) {
					colName = schemaElementNode->first_attribute("name")->value();
					colNames.push_back(colName);
					if (schemaElementNode->first_attribute("type") != 0) {
						rowSetParseData(rows, &resultDataFrame, colName, true);
					}
					else {
						rowSetParseData(rows, &resultDataFrame, colName, false);
					}
					schemaElementNode = schemaElementNode->next_sibling();
				}
				resultDataFrame.attr("names") = colNames;
				service.destroy();
				return resultDataFrame;
		}
		service.destroy();
		return Rcpp::wrap(true);
	}
	else {
		char * errorMessage = service.fault->detail->__any;
		std::cerr << errorMessage << std::endl;
	}
	service.destroy();
	return Rcpp::wrap(false);
}