Interaction::Interaction(vector<double> const& instance,vector<bool> const& factor, Rcpp::DataFrame const& class_data,double epsilon_cont,double epsilon_cat,int p_depth,int nb_class,double p_radius){ vector<InteractionItem> new_items(class_data.size()); Rcpp::CharacterVector names = class_data.names(); for(int i=0;i<class_data.size();++i){ vector<double> attr_data = Rcpp::as<vector<double> >(class_data[i]); string name = Rcpp::as<string>(names[i]); if(factor[i]){ InteractionItem item(name,instance[i],i,epsilon_cat,factor[i],attr_data); new_items[i] = item; } else{ InteractionItem item(name,instance[i],i,epsilon_cont,factor[i],attr_data); new_items[i] = item; } } items = new_items; depth = p_depth; sims_valid = false; vector<double> new_sims(nb_class); sims = new_sims; radius = p_radius; }
bool dataframesAreEqual(Rcpp::DataFrame &a, Rcpp::DataFrame &b) { Rcpp::NumericVector a1 = a["a"]; Rcpp::CharacterVector b1 = a["b"]; Rcpp::DoubleVector c1 = a["c"]; Rcpp::NumericVector a2 = b["a"]; Rcpp::CharacterVector b2 = b["b"]; Rcpp::DoubleVector c2 = b["c"]; if(a.size() != b.size()) { DLOG(INFO) << "sizes different"; return false; } for(int i = 0; i < a1.size(); i++) { if(a1[i] != a2[i]) { DLOG(INFO) << a1[i] << " vs " << a2[i]; return false; } if(b1[i] != b2[i]) { DLOG(INFO) << b1[i] << " vs " << b2[i]; return false; } if(c1[i] != c2[i]) { DLOG(INFO) << c1[i] << " vs " << c2[i]; return false; } } return true; }
vector<double> random_instance(Rcpp::DataFrame const& x, Generator& prng){ // Select random row number int row = prng(); vector<double> instance(x.size()); for(int i=0;i<x.size();++i){ vector<double> col = Rcpp::as<vector<double> >(x[i]); instance[i] = col[row]; } return instance; }
Rcpp::StringMatrix DataFrame_to_StringMatrix( Rcpp::DataFrame df ){ Rcpp::StringVector sv = df(0); Rcpp::StringMatrix sm(sv.size(), df.size()); sm.attr("col.names") = df.attr("col.names"); sm.attr("row.names") = df.attr("row.names"); for(int i=0; i < df.size(); i++){ sv = df(i); for(int j=0; j < sv.size(); j++){ sm(j, i) = sv(j); } } return sm; }
bool intercrossingGenerations(Rcpp::DataFrame& pedigree, int nFounders, Rcpp::IntegerVector& mpcrossID, std::vector<int>& output) { #define pedFind(id) findIDInPedigree(id, pedigree) int nFinals = mpcrossID.size(); int nPedigreeRows = pedigree.nrows(); Rcpp::IntegerVector male = Rcpp::as<Rcpp::IntegerVector>(pedigree("Male")), female = Rcpp::as<Rcpp::IntegerVector>(pedigree("Female")); for(int finalCounter = 0; finalCounter < nFinals; finalCounter++) { int currentPedRow = pedFind(mpcrossID[finalCounter]); if(currentPedRow < 0 || currentPedRow > nPedigreeRows) return false; //Counter to stop if the loop goes too long and might be infinite int loopCounter = 0; //Pick the last row and proceed backwards up the pedigree until we're through all the selfing generations while(male(currentPedRow) == female(currentPedRow)) { int nextPedID = male(currentPedRow); if(nextPedID < 0 || nextPedID > nPedigreeRows) return false; currentPedRow = pedFind(nextPedID); if(currentPedRow < 0 || currentPedRow > nPedigreeRows) return false; loopCounter++; if(loopCounter > 2000) return false; } //When we reach an NA in the pedigree the while condition will terminate, which is an error if((male(currentPedRow) != male(currentPedRow)) || (female(currentPedRow) != female(currentPedRow))) { return false; } int ngen = 0; while(male(currentPedRow) > 0) { int nextPedID = male(currentPedRow); if(nextPedID < 0 || nextPedID > nPedigreeRows) return false; currentPedRow = pedFind(nextPedID); if(currentPedRow < 0 || currentPedRow > nPedigreeRows) return false; ngen++; if(ngen > 2000) return false; } //another check for NA values if(male(currentPedRow) != male(currentPedRow)) { return false; } output[finalCounter] = ngen - (int)((log(nFounders) / log(2)) + 0.5); if(output[finalCounter] < 0) return false; } #undef pedFind return true; }
void helper(const std::string& url, const bool dropCaches, const std::string& objType) { if(dropCaches) { LOG(INFO) << "Dropping caches ..."; int res, res2; if ((res = system ("sync")) == -1) { throw std::runtime_error("Error running sync"); } if ((res2 = system ("sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'")) == -1) { throw std::runtime_error("Error dropping caches"); } } //std::string extension = base::utils::getExtension(url); std::string protocol = base::utils::getProtocol(url); std::string filename = base::utils::stripProtocol(url); base::IFilePtr file = hdfsutils::FileFactory::makeFile(protocol, filename); if( hdfsutils::isHdfs(protocol)) { hdfsutils::HdfsFile *p = (hdfsutils::HdfsFile *)file.get(); base::ConfigurationMap hdfsconf; hdfsconf["hdfsConfigurationFile"] = std::string("../ddc/test/data/server.conf"); p->configure(hdfsconf); } base::FileStatus status = file->stat(); base::ConfigurationMap conf; std::string schema = "000:int64," "001:int64,002:int64,003:int64,004:int64,005:int64,006:int64,007:int64,008:int64," "009:int64,010:int64,011:int64,012:int64,013:int64,014:int64,015:int64,016:int64," "017:int64,018:int64,019:int64,020:int64,021:int64,022:int64,023:int64,024:int64," "025:int64,026:int64,027:int64,028:int64,029:int64,030:int64,031:int64,032:int64," "033:int64,034:int64,035:int64,036:int64,037:int64,038:int64,039:int64,040:int64," "041:int64,042:int64,043:int64,044:int64,045:int64,046:int64,047:int64,048:int64," "049:int64,050:int64,051:int64,052:int64,053:int64,054:int64,055:int64,056:int64," "057:int64,058:int64,059:int64,060:int64,061:int64,062:int64,063:int64,064:int64," "065:int64,066:int64,067:int64,068:int64,069:int64,070:int64,071:int64,072:int64," "073:int64,074:int64,075:int64,076:int64,077:int64,078:int64,079:int64,080:int64," "081:int64,082:int64,083:int64,084:int64,085:int64,086:int64,087:int64,088:int64," "089:int64,090:int64,091:int64,092:int64,093:int64,094:int64,095:int64,096:int64," "097:int64,098:int64,099:int64,100:int64,101:int64,102:int64,103:int64,104:int64," "105:int64,106:int64,107:int64,108:int64,109:int64,110:int64,111:int64,112:int64," "113:int64,114:int64,115:int64,116:int64,117:int64,118:int64,119:int64,120:int64," "121:int64,122:int64,123:int64,124:int64,125:int64,126:int64,127:int64"; conf["schemaUrl"] = schema; conf["chunkStart"] = (uint64_t)0; conf["chunkEnd"] = (uint64_t)status.length; conf["hdfsConfigurationFile"] = std::string("../ddc/test/data/server.conf") ; boost::shared_ptr<Rcpp::DataFrame> dfptr = boost::any_cast<boost::shared_ptr<Rcpp::DataFrame>>(ddc_read(url, objType, conf)); Rcpp::DataFrame df = Rcpp::DataFrame(*(dfptr.get())); LOG(INFO) << "Got dataframe of size " << df.size(); }
// [[Rcpp::export]] void write_vcf_body_gz( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) { // http://stackoverflow.com/a/5649224 // fix DataFrame Rcpp::StringVector chrom = fix["CHROM"]; Rcpp::StringVector pos = fix["POS"]; Rcpp::StringVector id = fix["ID"]; Rcpp::StringVector ref = fix["REF"]; Rcpp::StringVector alt = fix["ALT"]; Rcpp::StringVector qual = fix["QUAL"]; Rcpp::StringVector filter = fix["FILTER"]; Rcpp::StringVector info = fix["INFO"]; // gt DataFrame Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt); Rcpp::StringVector column_names(gt.size()); column_names = gt.attr("names"); int i = 0; int j = 0; gzFile *fi = (gzFile *)gzopen(filename.c_str(),"ab"); // gzFile *fi = (gzFile *)gzopen(filename.c_str(),"abw"); for(i=0; i<chrom.size(); i++){ Rcpp::checkUserInterrupt(); if(mask == 1 && filter(i) != "PASS" ){ // Don't print variant. } else { std::string tmpstring; tmpstring = chrom(i); tmpstring = tmpstring + "\t" + pos(i) + "\t"; if(id(i) == NA_STRING){ tmpstring = tmpstring + "."; } else { tmpstring = tmpstring + id(i); } tmpstring = tmpstring + "\t" + ref(i) + "\t" + alt(i) + "\t"; if(qual(i) == NA_STRING){ tmpstring = tmpstring + "." + "\t"; } else { tmpstring = tmpstring + qual(i) + "\t"; } if(filter(i) == NA_STRING){ tmpstring = tmpstring + "." + "\t"; } else { tmpstring = tmpstring + filter(i) + "\t"; } tmpstring = tmpstring + info(i); // gt portion for(j=0; j<column_names.size(); j++){ if(gt_cm(i, j) == NA_STRING){ tmpstring = tmpstring + "\t" + "./."; } else { tmpstring = tmpstring + "\t" + gt_cm(i, j); } } // gzwrite(fi,"my decompressed data",strlen("my decompressed data")); // gzwrite(fi,"\n",strlen("\n")); // std::string tmpstring = "test string\n"; gzwrite(fi, (char *)tmpstring.c_str(), tmpstring.size()); gzwrite(fi,"\n",strlen("\n")); } } gzclose(fi); return; }
// [[Rcpp::export]] void write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) { //int write_vcf_body( Rcpp::DataFrame fix, Rcpp::DataFrame gt, std::string filename , int mask=0 ) { // fix DataFrame Rcpp::StringVector chrom = fix["CHROM"]; Rcpp::StringVector pos = fix["POS"]; Rcpp::StringVector id = fix["ID"]; Rcpp::StringVector ref = fix["REF"]; Rcpp::StringVector alt = fix["ALT"]; Rcpp::StringVector qual = fix["QUAL"]; Rcpp::StringVector filter = fix["FILTER"]; Rcpp::StringVector info = fix["INFO"]; // gt DataFrame Rcpp::StringMatrix gt_cm = DataFrame_to_StringMatrix(gt); Rcpp::StringVector column_names(gt.size()); column_names = gt.attr("names"); // column_names = gt_cm.attr("col.names"); // delete gt; int i = 0; int j = 0; // Uncompressed. std::ofstream myfile; myfile.open (filename.c_str(), std::ios::out | std::ios::app | std::ios::binary); // gzFile *fi = (gzFile *)gzopen("file.gz","wb"); for(i=0; i<chrom.size(); i++){ Rcpp::checkUserInterrupt(); if(mask == 1 && filter(i) == "PASS" ){ // Don't print variant. } else { myfile << chrom(i); myfile << "\t"; myfile << pos(i); myfile << "\t"; if(id(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << id(i); myfile << "\t"; } myfile << ref(i); myfile << "\t"; myfile << alt(i); myfile << "\t"; if(qual(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << qual(i); myfile << "\t"; } if(filter(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << filter(i); myfile << "\t"; } if(info(i) == NA_STRING){ myfile << "."; myfile << "\t"; } else { myfile << info(i); } // gt region. myfile << "\t"; myfile << gt_cm(i, 0); for(j=1; j<column_names.size(); j++){ myfile << "\t"; myfile << gt_cm(i, j); } myfile << "\n"; } } myfile.close(); return; }
RcppExport SEXP RXMLADiscover(SEXP handle, SEXP request, SEXP rRestrictionsString, SEXP rPropertiesString) { XmlaWebServiceSoapProxy service = XmlaWebServiceSoapProxy(SOAP_XML_DEFAULTNS, SOAP_XML_DEFAULTNS); Rcpp::XPtr<XMLAHandle> ptr(handle); const char *connectionString = ptr->connectionString; std::string propertiesString = CHAR(STRING_ELT(rPropertiesString,0)); std::string restrictionsString = CHAR(STRING_ELT(rRestrictionsString, 0)); ns1__Session session; std::string sessionId = ptr->sessionID; session.SessionId = &sessionId; service.soap_header(NULL, NULL, &session, NULL); _ns1__Discover discover; ns1__Restrictions restrictions; ns1__RestrictionList restrictionList; ns1__Properties properties; ns1__PropertyList propertyList; _ns1__DiscoverResponse discoverResponse; std::string requestType = CHAR(STRING_ELT(request,0)); std::transform(requestType.begin(), requestType.end(), requestType.begin(), ::toupper); discover.RequestType = &requestType; discover.Restrictions = &restrictions; restrictions.RestrictionList = &restrictionList; discover.Properties = &properties; properties.PropertyList = &propertyList; if (!propertiesString.empty()) { parseKeyValuePairs(&propertiesString, propertyList.__any); } if (!restrictionsString.empty()) { parseKeyValuePairs(&restrictionsString, restrictionList.__any); } service.userid = ptr->userName; service.passwd = ptr->password; if (service.Discover(connectionString, NULL, &discover, &discoverResponse) == SOAP_OK) { std::string rawXML = "<root xmlns=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"><xsd:schema targetNamespace=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:sql=\"urn:schemas-microsoft-com:xml-sql\" elementFormDefault=\"qualified\">"; rawXML = rawXML + discoverResponse.return_->ns2__root->xsd__schema + "</xsd:schema></root>"; char *schema = strdup(rawXML.c_str()); rapidxml::xml_document<> doc; doc.parse<0>(schema); // Find XML section containing column names rapidxml::xml_node<char> *rowNode = doc.first_node()->first_node()->first_node("xsd:complexType"); while(rowNode != NULL && strcmp(rowNode->first_attribute("name")->value(), "row") != 0) { rowNode = rowNode->next_sibling("xsd:complexType"); } rapidxml::xml_node<char> *schemaElementNode = rowNode->first_node()->first_node(); std::vector<char *> rows = discoverResponse.return_->ns2__root->__union_ResultXmlRoot->row; Rcpp::DataFrame resultDataFrame; Rcpp::CharacterVector colNames; char *colName; while(schemaElementNode != NULL) { colName = schemaElementNode->first_attribute("name")->value(); colNames.push_back(colName); if (schemaElementNode->first_attribute("type") != 0) { rowSetParseData(rows, &resultDataFrame, colName, true); } else { rowSetParseData(rows, &resultDataFrame, colName, false); } schemaElementNode = schemaElementNode->next_sibling(); } resultDataFrame.attr("names") = colNames; service.destroy(); return resultDataFrame; } else { std::cerr << service.fault->faultstring << std::endl; } service.destroy(); return Rcpp::wrap(false); }
RcppExport SEXP RXMLAExecute(SEXP handle, SEXP query, SEXP rPropertiesString) { XmlaWebServiceSoapProxy service = XmlaWebServiceSoapProxy(SOAP_XML_DEFAULTNS, SOAP_XML_DEFAULTNS); Rcpp::XPtr<XMLAHandle> ptr(handle); const char *connectionString = ptr->connectionString; std::string propertiesString = CHAR(STRING_ELT(rPropertiesString,0)); ns1__Session session; std::string sessionId = ptr->sessionID; session.SessionId = &sessionId; service.soap_header(NULL, NULL, &session, NULL); _ns1__Execute execute; ns1__CommandStatement command; ns1__Properties properties; ns1__PropertyList propertyList; _ns1__ExecuteResponse response; std::string statement = CHAR(STRING_ELT(query,0)); command.Statement = &statement; execute.Command = &command; execute.Properties = &properties; properties.PropertyList = &propertyList; if (!propertiesString.empty()) { parseKeyValuePairs(&propertiesString, propertyList.__any); } service.userid = ptr->userName; service.passwd = ptr->password; if (service.Execute(connectionString, NULL, &execute, &response) == SOAP_OK) { // Parse MDDataSet if (response.return_->ns4__root != NULL && response.return_->ns4__root->__union_ResultXmlRoot != NULL && response.return_->ns4__root->__union_ResultXmlRoot->Axes != NULL) { if (response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis.size() < 3) { std::cerr << "Error: No data on Axis1" << std::endl; return Rcpp::wrap(false); } if (response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis.size() > 3) { std::cerr << "Error: More than 2 axes not supported" << std::endl; return Rcpp::wrap(false); } ns4__Axes *axes = response.return_->ns4__root->__union_ResultXmlRoot->Axes; std::vector<ns4__Cell *> cellDataVector = response.return_->ns4__root->__union_ResultXmlRoot->CellData->Cell; int numCols = response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis[0]->__union_Axis->Tuples->Tuple.size(); int numRows = response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis[1]->__union_Axis->Tuples->Tuple.size(); int cellDataVectorMember = 0; Rcpp::CharacterVector colNames; Rcpp::CharacterVector rowNames; Rcpp::NumericMatrix resultMatrix(numRows, numCols); for (int row = 0; row < numRows; row++) { for (int col = 0; col < numCols; col++) { if (cellDataVector[cellDataVectorMember]->CellOrdinal == ((row * numCols) + col)) { resultMatrix(row, col) = *cellDataVector[cellDataVectorMember]->Value; if (cellDataVectorMember < cellDataVector.size() - 1) { cellDataVectorMember += 1; } } else { resultMatrix(row, col) = NA_REAL; } } mdDataSetGetNames(rowNames, axes, row, true); } for (int col = 0; col < numCols; col++) { mdDataSetGetNames(colNames, axes, col, false); } colNames.push_front("Row Names"); Rcpp::DataFrame resultDataFrame(resultMatrix); resultDataFrame.push_front(rowNames); resultDataFrame.attr("names") = colNames; service.destroy(); return resultDataFrame; } // Parse RowSet else if (response.return_->ns2__root != NULL && response.return_->ns2__root->xsd__schema != NULL && response.return_->ns2__root->__union_ResultXmlRoot != NULL && !response.return_->ns2__root->__union_ResultXmlRoot->row.empty()) { std::string rawXML = "<root xmlns=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"><xsd:schema targetNamespace=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:sql=\"urn:schemas-microsoft-com:xml-sql\" elementFormDefault=\"qualified\">"; rawXML = rawXML + response.return_->ns2__root->xsd__schema + "</xsd:schema></root>"; char *schema = strdup(rawXML.c_str()); rapidxml::xml_document<> doc; doc.parse<0>(schema); // Find XML section containing column names rapidxml::xml_node<char> *rowNode = doc.first_node()->first_node()->first_node("xsd:complexType"); while(rowNode != NULL && strcmp(rowNode->first_attribute("name")->value(), "row") != 0) { rowNode = rowNode->next_sibling("xsd:complexType"); } rapidxml::xml_node<char> *schemaElementNode = rowNode->first_node()->first_node(); std::vector<char *> rows = response.return_->ns2__root->__union_ResultXmlRoot->row; Rcpp::DataFrame resultDataFrame; Rcpp::CharacterVector colNames; char *colName; while(schemaElementNode != NULL) { colName = schemaElementNode->first_attribute("name")->value(); colNames.push_back(colName); if (schemaElementNode->first_attribute("type") != 0) { rowSetParseData(rows, &resultDataFrame, colName, true); } else { rowSetParseData(rows, &resultDataFrame, colName, false); } schemaElementNode = schemaElementNode->next_sibling(); } resultDataFrame.attr("names") = colNames; service.destroy(); return resultDataFrame; } service.destroy(); return Rcpp::wrap(true); } else { char * errorMessage = service.fault->detail->__any; std::cerr << errorMessage << std::endl; } service.destroy(); return Rcpp::wrap(false); }