//' @rdname convert //' @keywords internal manip // [[Rcpp::export]] Rcpp::List icd9DecimalToPartsCpp(const Rcpp::CharacterVector icd9Decimal, const Rcpp::String minorEmpty) { Rcpp::CharacterVector majors; Rcpp::CharacterVector minors; int ilen = icd9Decimal.length(); if (ilen == 0) { return Rcpp::List::create(Rcpp::_["major"] = Rcpp::CharacterVector::create(), Rcpp::_["minor"] = Rcpp::CharacterVector::create()); } for (Rcpp::CharacterVector::const_iterator it = icd9Decimal.begin(); it != icd9Decimal.end(); ++it) { Rcpp::String strna = *it; if (strna == NA_STRING || strna == "") { majors.push_back(NA_STRING); minors.push_back(NA_STRING); continue; } // TODO: Rcpp::Rcpp::String doesn't implement many functions, so using STL. A FAST way // would be to use Rcpp::String's function get_cstring, and recode the trim // functions to take const char *. This would avoid the type change AND be // faster trimming. std::string thiscode = Rcpp::as<std::string>(*it); thiscode = strimCpp(thiscode); // This updates 'thisccode' by reference, no copy std::size_t pos = thiscode.find("."); // substring parts std::string majorin; Rcpp::String minorout; if (pos != std::string::npos) { majorin = thiscode.substr(0, pos); minorout = thiscode.substr(pos + 1); } else { majorin = thiscode; minorout = minorEmpty; } majors.push_back(icd9AddLeadingZeroesMajorSingle(majorin)); minors.push_back(minorout); } return Rcpp::List::create(Rcpp::_["major"] = majors, Rcpp::_["minor"] = minors); }
void mdDataSetGetNames(Rcpp::CharacterVector &names, ns4__Axes *axes, int i, bool isRow) { int axis; if (isRow) axis = 1; else axis = 0; std::string name = ""; std::vector<ns4__Member *> memberList = axes->Axis[axis]->__union_Axis->Tuples->Tuple[i]->Member; for (int j = 0; j < memberList.size(); j++) { name = name + *memberList[j]->Caption + ", "; } names.push_back(name.substr(0, name.size() - 2)); }
void rowSetParseData(std::vector<char *> rows, Rcpp::DataFrame *resultDataFrame, char *colName, bool isChar) { rapidxml::xml_document<> data; std::string xmlWrapper; char *xmlRow; int textLength; char *parseText; Rcpp::CharacterVector dimension; Rcpp::NumericVector dataColumn; for (int row = 0; row < rows.size(); row++) { bool found = false; xmlWrapper = "<row>"; xmlWrapper = xmlWrapper + rows[row] + "</row>"; xmlRow = strdup(xmlWrapper.c_str()); textLength = strlen(xmlRow); parseText = new char[textLength+1]; parseText = strcpy(parseText, xmlRow); data.parse<0>(parseText); rapidxml::xml_node<char> *rowData = data.first_node()->first_node(colName); if (rowData != NULL) { if (isChar) dimension.push_back(rowData->value()); else dataColumn.push_back(atof(rowData->value())); found = true; } if (!found && isChar) dimension.push_back(NA_STRING); else if (!found && !isChar) dataColumn.push_back(NA_REAL); } if (isChar) resultDataFrame->push_back(dimension); else resultDataFrame->push_back(dataColumn); }
// This function constructs the output "clustering" data.frame for the dada(...) function. // This contains core and diagnostic information on each partition (or cluster, or Bi). Rcpp::DataFrame b_make_clustering_df(B *b, Sub **subs, Sub **birth_subs, bool has_quals) { unsigned int i, j, r, s, cind, max_reads; Raw *max_raw; Sub *sub; double q_ave, tot_e; // Create output character-vector of representative sequences for each partition (Bi) Rcpp::CharacterVector Rseqs; char oseq[SEQLEN]; for(i=0;i<b->nclust;i++) { max_reads=0; max_raw = NULL; for(r=0;r<b->bi[i]->nraw;r++) { if(b->bi[i]->raw[r]->reads > max_reads) { max_raw = b->bi[i]->raw[r]; max_reads = max_raw->reads; } } // ntcpy(oseq, b->bi[i]->seq); ntcpy(oseq, max_raw->seq); Rseqs.push_back(std::string(oseq)); } // Create output vectors for other columns of clustering data.frame // Each has one entry for each partition (Bi) Rcpp::IntegerVector Rabunds(b->nclust); // abundances Rcpp::IntegerVector Rzeros(b->nclust); // n0 Rcpp::IntegerVector Rones(b->nclust); // n1 Rcpp::IntegerVector Rraws(b->nclust); // nraw Rcpp::NumericVector Rbirth_pvals(b->nclust); // pvalue at birth Rcpp::NumericVector Rbirth_folds(b->nclust); // fold over-abundance at birth Rcpp::IntegerVector Rbirth_hams(b->nclust); // hamming distance at birth Rcpp::NumericVector Rbirth_es(b->nclust); // expected number at birth Rcpp::CharacterVector Rbirth_types; // DEPRECATED Rcpp::NumericVector Rbirth_qaves(b->nclust); // average quality of substitutions that drove birth Rcpp::NumericVector Rpvals(b->nclust); // post-hoc pvalue // Assign values to the output vectors for(i=0;i<b->nclust;i++) { Rabunds[i] = b->bi[i]->reads; Rraws[i] = b->bi[i]->nraw; // n0 and n1 Rzeros[i] = 0; Rones[i] = 0; for(r=0;r<b->bi[i]->nraw;r++) { sub = subs[b->bi[i]->raw[r]->index]; if(sub) { if(sub->nsubs == 0) { Rzeros[i] += b->bi[i]->raw[r]->reads; } if(sub->nsubs == 1) { Rones[i] += b->bi[i]->raw[r]->reads; } } } // Record information from the cluster's birth Rbirth_types.push_back(std::string(b->bi[i]->birth_type)); if(i==0) { // 0-clust wasn't born normally Rbirth_pvals[i] = Rcpp::NumericVector::get_na(); Rbirth_folds[i] = Rcpp::NumericVector::get_na(); Rbirth_hams[i] = Rcpp::IntegerVector::get_na(); Rbirth_es[i] = Rcpp::NumericVector::get_na(); Rbirth_qaves[i] = Rcpp::NumericVector::get_na(); } else { Rbirth_pvals[i] = b->bi[i]->birth_pval; Rbirth_folds[i] = b->bi[i]->birth_fold; Rbirth_hams[i] = b->bi[i]->birth_comp.hamming; Rbirth_es[i] = b->bi[i]->birth_e; // Calculate average quality of birth substitutions if(has_quals) { q_ave = 0.0; sub = birth_subs[i]; if(sub && sub->q1) { for(s=0;s<sub->nsubs;s++) { q_ave += (sub->q1[s]); } q_ave = q_ave/((double)sub->nsubs); } Rbirth_qaves[i] = q_ave; } else { Rbirth_qaves[i] = Rcpp::NumericVector::get_na(); } } // Calculate post-hoc pval // This is not as exhaustive anymore tot_e = 0.0; for(j=0;j<b->nclust;j++) { if(i != j && b->bi[j]->comp_index.count(b->bi[i]->center->index) > 0) { cind = b->bi[j]->comp_index[b->bi[i]->center->index]; tot_e += b->bi[j]->comp[cind].lambda * b->bi[j]->reads; // b->bi[j]->e[b->bi[i]->center->index]; } } Rpvals[i] = calc_pA(1+b->bi[i]->reads, tot_e); // Add 1 because calc_pA subtracts 1 (conditional p-val) } return(Rcpp::DataFrame::create(_["sequence"] = Rseqs, _["abundance"] = Rabunds, _["n0"] = Rzeros, _["n1"] = Rones, _["nunq"] = Rraws, _["pval"] = Rpvals, _["birth_type"] = Rbirth_types, _["birth_pval"] = Rbirth_pvals, _["birth_fold"] = Rbirth_folds, _["birth_ham"] = Rbirth_hams, _["birth_qave"] = Rbirth_qaves)); }
// [[Rcpp::export]] Rcpp::CharacterVector icd9MajMinToCode(const Rcpp::CharacterVector major, const Rcpp::CharacterVector minor, bool isShort) { #ifdef ICD9_DEBUG_TRACE Rcpp::Rcout << "icd9MajMinToCode: major.size() = " << major.size() << " and minor.size() = " << minor.size() << "\n"; #endif if (major.size() != minor.size()) Rcpp::stop("major and minor lengths differ"); #ifdef ICD9_DEBUG_TRACE Rcpp::Rcout << "major and minor are the same?\n"; #endif Rcpp::CharacterVector out; // wish I could reserve space for this Rcpp::CharacterVector::const_iterator j = major.begin(); Rcpp::CharacterVector::const_iterator n = minor.begin(); for (; j != major.end() && n != minor.end(); ++j, ++n) { Rcpp::String mjrelem = *j; if (mjrelem == NA_STRING) { out.push_back(NA_STRING); continue; } // work around Rcpp bug with push_front: convert to string just for this // TODO: try to do this with C string instead const char* smj_c = mjrelem.get_cstring(); std::string smj = std::string(smj_c); switch (strlen(smj_c)) { case 0: out.push_back(NA_STRING); continue; case 1: if (!icd9IsASingleVE(smj_c)) { smj.insert(0, "00"); } break; case 2: if (!icd9IsASingleVE(smj_c)) { smj.insert(0, "0"); } else { smj.insert(1, "0"); } // default: // major is 3 (or more) chars already } Rcpp::String mnrelem = *n; if (mnrelem == NA_STRING) { //out.push_back(mjrelem); out.push_back(smj); continue; } // this can probably be done more quickly: //std::string smj(mjrelem); if (!isShort && mnrelem != "") { smj.append("."); } smj.append(mnrelem); out.push_back(smj); } // ?slow step somewhere around here, with use of Rcpp::String, maybe in the wrapping? Maybe in the multiple push_back calls //return wrap(out); return out; }
void handleSchemaError(void* userData, xmlError* error) { Rcpp::CharacterVector * vec = (Rcpp::CharacterVector *) userData; std::string message = std::string(error->message); message.resize(message.size() - 1); vec->push_back(message); }
//[[Rcpp::export]] Rcpp::NumericMatrix SUPERMATRIX(Rcpp::List a,bool keep_names) { size_t i,j,k; Rcpp::NumericMatrix mat; int tot=0; Rcpp::CharacterVector rnam; Rcpp::CharacterVector cnam; Rcpp::CharacterVector this_nam; Rcpp::List dnames(2); bool any_rnames=false; bool any_cnames=false; for(i=0; i < a.size(); i++) { mat = Rcpp::as<Rcpp::NumericMatrix>(a[i]); if(mat.nrow() ==0) continue; if(mat.nrow() != mat.ncol()) Rcpp::stop("Not all matrices are square"); tot = tot + mat.nrow(); if(!keep_names) continue; dnames = mat.attr("dimnames"); if(dnames.size()==0) { for(j=0; j < mat.nrow(); j++) { rnam.push_back("."); cnam.push_back("."); } continue; } if(!Rf_isNull(dnames[0])) { any_rnames=true; this_nam = dnames[0]; for(j=0; j< this_nam.size(); j++) rnam.push_back(this_nam[j]); } else { for(j=0; j < mat.nrow(); j++) rnam.push_back("."); } if(!Rf_isNull(dnames[1])) { any_cnames=true; this_nam = dnames[1]; for(j=0; j< this_nam.size(); j++) cnam.push_back(this_nam[j]); } else { for(j=0; j < mat.ncol(); j++) cnam.push_back("."); } } int totrow = 0; int totcol = 0; Rcpp::NumericMatrix ret(tot,tot); for(i=0; i < a.size(); i++) { mat = Rcpp::as<Rcpp::NumericMatrix>(a[i]); for(j=0; j < mat.nrow(); j++) { for(k=0; k < mat.ncol(); k++) { ret(totrow+j,totcol+k) = mat(j,k); } } totrow = totrow + mat.nrow(); totcol = totcol + mat.ncol(); } if(keep_names) { Rcpp::List dn = Rcpp::List::create(rnam,cnam); ret.attr("dimnames") = dn; } return(ret); }
RcppExport SEXP RXMLADiscover(SEXP handle, SEXP request, SEXP rRestrictionsString, SEXP rPropertiesString) { XmlaWebServiceSoapProxy service = XmlaWebServiceSoapProxy(SOAP_XML_DEFAULTNS, SOAP_XML_DEFAULTNS); Rcpp::XPtr<XMLAHandle> ptr(handle); const char *connectionString = ptr->connectionString; std::string propertiesString = CHAR(STRING_ELT(rPropertiesString,0)); std::string restrictionsString = CHAR(STRING_ELT(rRestrictionsString, 0)); ns1__Session session; std::string sessionId = ptr->sessionID; session.SessionId = &sessionId; service.soap_header(NULL, NULL, &session, NULL); _ns1__Discover discover; ns1__Restrictions restrictions; ns1__RestrictionList restrictionList; ns1__Properties properties; ns1__PropertyList propertyList; _ns1__DiscoverResponse discoverResponse; std::string requestType = CHAR(STRING_ELT(request,0)); std::transform(requestType.begin(), requestType.end(), requestType.begin(), ::toupper); discover.RequestType = &requestType; discover.Restrictions = &restrictions; restrictions.RestrictionList = &restrictionList; discover.Properties = &properties; properties.PropertyList = &propertyList; if (!propertiesString.empty()) { parseKeyValuePairs(&propertiesString, propertyList.__any); } if (!restrictionsString.empty()) { parseKeyValuePairs(&restrictionsString, restrictionList.__any); } service.userid = ptr->userName; service.passwd = ptr->password; if (service.Discover(connectionString, NULL, &discover, &discoverResponse) == SOAP_OK) { std::string rawXML = "<root xmlns=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"><xsd:schema targetNamespace=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:sql=\"urn:schemas-microsoft-com:xml-sql\" elementFormDefault=\"qualified\">"; rawXML = rawXML + discoverResponse.return_->ns2__root->xsd__schema + "</xsd:schema></root>"; char *schema = strdup(rawXML.c_str()); rapidxml::xml_document<> doc; doc.parse<0>(schema); // Find XML section containing column names rapidxml::xml_node<char> *rowNode = doc.first_node()->first_node()->first_node("xsd:complexType"); while(rowNode != NULL && strcmp(rowNode->first_attribute("name")->value(), "row") != 0) { rowNode = rowNode->next_sibling("xsd:complexType"); } rapidxml::xml_node<char> *schemaElementNode = rowNode->first_node()->first_node(); std::vector<char *> rows = discoverResponse.return_->ns2__root->__union_ResultXmlRoot->row; Rcpp::DataFrame resultDataFrame; Rcpp::CharacterVector colNames; char *colName; while(schemaElementNode != NULL) { colName = schemaElementNode->first_attribute("name")->value(); colNames.push_back(colName); if (schemaElementNode->first_attribute("type") != 0) { rowSetParseData(rows, &resultDataFrame, colName, true); } else { rowSetParseData(rows, &resultDataFrame, colName, false); } schemaElementNode = schemaElementNode->next_sibling(); } resultDataFrame.attr("names") = colNames; service.destroy(); return resultDataFrame; } else { std::cerr << service.fault->faultstring << std::endl; } service.destroy(); return Rcpp::wrap(false); }
RcppExport SEXP RXMLAExecute(SEXP handle, SEXP query, SEXP rPropertiesString) { XmlaWebServiceSoapProxy service = XmlaWebServiceSoapProxy(SOAP_XML_DEFAULTNS, SOAP_XML_DEFAULTNS); Rcpp::XPtr<XMLAHandle> ptr(handle); const char *connectionString = ptr->connectionString; std::string propertiesString = CHAR(STRING_ELT(rPropertiesString,0)); ns1__Session session; std::string sessionId = ptr->sessionID; session.SessionId = &sessionId; service.soap_header(NULL, NULL, &session, NULL); _ns1__Execute execute; ns1__CommandStatement command; ns1__Properties properties; ns1__PropertyList propertyList; _ns1__ExecuteResponse response; std::string statement = CHAR(STRING_ELT(query,0)); command.Statement = &statement; execute.Command = &command; execute.Properties = &properties; properties.PropertyList = &propertyList; if (!propertiesString.empty()) { parseKeyValuePairs(&propertiesString, propertyList.__any); } service.userid = ptr->userName; service.passwd = ptr->password; if (service.Execute(connectionString, NULL, &execute, &response) == SOAP_OK) { // Parse MDDataSet if (response.return_->ns4__root != NULL && response.return_->ns4__root->__union_ResultXmlRoot != NULL && response.return_->ns4__root->__union_ResultXmlRoot->Axes != NULL) { if (response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis.size() < 3) { std::cerr << "Error: No data on Axis1" << std::endl; return Rcpp::wrap(false); } if (response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis.size() > 3) { std::cerr << "Error: More than 2 axes not supported" << std::endl; return Rcpp::wrap(false); } ns4__Axes *axes = response.return_->ns4__root->__union_ResultXmlRoot->Axes; std::vector<ns4__Cell *> cellDataVector = response.return_->ns4__root->__union_ResultXmlRoot->CellData->Cell; int numCols = response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis[0]->__union_Axis->Tuples->Tuple.size(); int numRows = response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis[1]->__union_Axis->Tuples->Tuple.size(); int cellDataVectorMember = 0; Rcpp::CharacterVector colNames; Rcpp::CharacterVector rowNames; Rcpp::NumericMatrix resultMatrix(numRows, numCols); for (int row = 0; row < numRows; row++) { for (int col = 0; col < numCols; col++) { if (cellDataVector[cellDataVectorMember]->CellOrdinal == ((row * numCols) + col)) { resultMatrix(row, col) = *cellDataVector[cellDataVectorMember]->Value; if (cellDataVectorMember < cellDataVector.size() - 1) { cellDataVectorMember += 1; } } else { resultMatrix(row, col) = NA_REAL; } } mdDataSetGetNames(rowNames, axes, row, true); } for (int col = 0; col < numCols; col++) { mdDataSetGetNames(colNames, axes, col, false); } colNames.push_front("Row Names"); Rcpp::DataFrame resultDataFrame(resultMatrix); resultDataFrame.push_front(rowNames); resultDataFrame.attr("names") = colNames; service.destroy(); return resultDataFrame; } // Parse RowSet else if (response.return_->ns2__root != NULL && response.return_->ns2__root->xsd__schema != NULL && response.return_->ns2__root->__union_ResultXmlRoot != NULL && !response.return_->ns2__root->__union_ResultXmlRoot->row.empty()) { std::string rawXML = "<root xmlns=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"><xsd:schema targetNamespace=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:sql=\"urn:schemas-microsoft-com:xml-sql\" elementFormDefault=\"qualified\">"; rawXML = rawXML + response.return_->ns2__root->xsd__schema + "</xsd:schema></root>"; char *schema = strdup(rawXML.c_str()); rapidxml::xml_document<> doc; doc.parse<0>(schema); // Find XML section containing column names rapidxml::xml_node<char> *rowNode = doc.first_node()->first_node()->first_node("xsd:complexType"); while(rowNode != NULL && strcmp(rowNode->first_attribute("name")->value(), "row") != 0) { rowNode = rowNode->next_sibling("xsd:complexType"); } rapidxml::xml_node<char> *schemaElementNode = rowNode->first_node()->first_node(); std::vector<char *> rows = response.return_->ns2__root->__union_ResultXmlRoot->row; Rcpp::DataFrame resultDataFrame; Rcpp::CharacterVector colNames; char *colName; while(schemaElementNode != NULL) { colName = schemaElementNode->first_attribute("name")->value(); colNames.push_back(colName); if (schemaElementNode->first_attribute("type") != 0) { rowSetParseData(rows, &resultDataFrame, colName, true); } else { rowSetParseData(rows, &resultDataFrame, colName, false); } schemaElementNode = schemaElementNode->next_sibling(); } resultDataFrame.attr("names") = colNames; service.destroy(); return resultDataFrame; } service.destroy(); return Rcpp::wrap(true); } else { char * errorMessage = service.fault->detail->__any; std::cerr << errorMessage << std::endl; } service.destroy(); return Rcpp::wrap(false); }