Пример #1
0
//' @rdname convert
//' @keywords internal manip
// [[Rcpp::export]]
Rcpp::List icd9DecimalToPartsCpp(const Rcpp::CharacterVector icd9Decimal, const Rcpp::String minorEmpty) {
	Rcpp::CharacterVector majors;
	Rcpp::CharacterVector minors;
	int ilen = icd9Decimal.length();

	if (ilen == 0) {
		return Rcpp::List::create(Rcpp::_["major"] =
				Rcpp::CharacterVector::create(), Rcpp::_["minor"] =
						Rcpp::CharacterVector::create());
	}

	for (Rcpp::CharacterVector::const_iterator it = icd9Decimal.begin();
			it != icd9Decimal.end(); ++it) {
		Rcpp::String strna = *it;
		if (strna == NA_STRING || strna == "") {
			majors.push_back(NA_STRING);
			minors.push_back(NA_STRING);
			continue;
		}
		// TODO: Rcpp::Rcpp::String doesn't implement many functions, so using STL. A FAST way
		// would be to use Rcpp::String's function get_cstring, and recode the trim
		// functions to take const char *. This would avoid the type change AND be
		// faster trimming.
		std::string thiscode = Rcpp::as<std::string>(*it);
		thiscode = strimCpp(thiscode); // This updates 'thisccode' by reference, no copy
		std::size_t pos = thiscode.find(".");
		// substring parts
		std::string majorin;
		Rcpp::String minorout;
		if (pos != std::string::npos) {
			majorin = thiscode.substr(0, pos);
			minorout = thiscode.substr(pos + 1);
		} else {
			majorin = thiscode;
			minorout = minorEmpty;
		}
		majors.push_back(icd9AddLeadingZeroesMajorSingle(majorin));
		minors.push_back(minorout);
	}
	return Rcpp::List::create(Rcpp::_["major"] = majors, Rcpp::_["minor"] =
			minors);
}
Пример #2
0
void mdDataSetGetNames(Rcpp::CharacterVector &names, ns4__Axes *axes, int i, bool isRow)
{
	int axis;
	if (isRow)
		axis = 1;
	else 
		axis = 0;
	std::string name = "";
	std::vector<ns4__Member *> memberList = axes->Axis[axis]->__union_Axis->Tuples->Tuple[i]->Member;
	for (int j = 0; j < memberList.size(); j++) {
		name = name + *memberList[j]->Caption + ", ";
	}
	names.push_back(name.substr(0, name.size() - 2));
}
Пример #3
0
void rowSetParseData(std::vector<char *> rows, Rcpp::DataFrame *resultDataFrame, char *colName, bool isChar)
{
	rapidxml::xml_document<> data;
	std::string xmlWrapper;
	char *xmlRow;
	int textLength;
	char *parseText;
	Rcpp::CharacterVector dimension;
	Rcpp::NumericVector dataColumn;

	for (int row = 0; row < rows.size(); row++)	{
		bool found = false;
		xmlWrapper = "<row>";
		xmlWrapper = xmlWrapper + rows[row] + "</row>";
		xmlRow = strdup(xmlWrapper.c_str());
		textLength = strlen(xmlRow);
		parseText = new char[textLength+1];
		parseText = strcpy(parseText, xmlRow);
		data.parse<0>(parseText);
		rapidxml::xml_node<char> *rowData = data.first_node()->first_node(colName);
		if (rowData != NULL) {
			if (isChar)
				dimension.push_back(rowData->value());
			else
				dataColumn.push_back(atof(rowData->value()));
			found = true;
		}
		if (!found && isChar)
			dimension.push_back(NA_STRING);
		else if (!found && !isChar)
			dataColumn.push_back(NA_REAL);
	}
	if (isChar)
		resultDataFrame->push_back(dimension);
	else
		resultDataFrame->push_back(dataColumn);
}
Пример #4
0
// This function constructs the output "clustering" data.frame for the dada(...) function.
// This contains core and diagnostic information on each partition (or cluster, or Bi).
Rcpp::DataFrame b_make_clustering_df(B *b, Sub **subs, Sub **birth_subs, bool has_quals) {
  unsigned int i, j, r, s, cind, max_reads;
  Raw *max_raw;
  Sub *sub;
  double q_ave, tot_e;
  
  // Create output character-vector of representative sequences for each partition (Bi)
  Rcpp::CharacterVector Rseqs;
  char oseq[SEQLEN];
  for(i=0;i<b->nclust;i++) {
    max_reads=0;
    max_raw = NULL;
    for(r=0;r<b->bi[i]->nraw;r++) {
      if(b->bi[i]->raw[r]->reads > max_reads) {
        max_raw = b->bi[i]->raw[r];
        max_reads = max_raw->reads;
      }
    }
//    ntcpy(oseq, b->bi[i]->seq);
    ntcpy(oseq, max_raw->seq);
    Rseqs.push_back(std::string(oseq));
  }
  
  // Create output vectors for other columns of clustering data.frame
  // Each has one entry for each partition (Bi)
  Rcpp::IntegerVector Rabunds(b->nclust);      // abundances
  Rcpp::IntegerVector Rzeros(b->nclust);       // n0
  Rcpp::IntegerVector Rones(b->nclust);        // n1
  Rcpp::IntegerVector Rraws(b->nclust);        // nraw
  Rcpp::NumericVector Rbirth_pvals(b->nclust); // pvalue at birth
  Rcpp::NumericVector Rbirth_folds(b->nclust); // fold over-abundance at birth
  Rcpp::IntegerVector Rbirth_hams(b->nclust);  // hamming distance at birth
  Rcpp::NumericVector Rbirth_es(b->nclust);    // expected number at birth
  Rcpp::CharacterVector Rbirth_types;           // DEPRECATED
  Rcpp::NumericVector Rbirth_qaves(b->nclust); // average quality of substitutions that drove birth
  Rcpp::NumericVector Rpvals(b->nclust);       // post-hoc pvalue

  // Assign values to the output vectors
  for(i=0;i<b->nclust;i++) {
    Rabunds[i] = b->bi[i]->reads;
    Rraws[i] = b->bi[i]->nraw;
    // n0 and n1
    Rzeros[i] = 0; Rones[i] = 0;
    for(r=0;r<b->bi[i]->nraw;r++) {
      sub = subs[b->bi[i]->raw[r]->index];
      if(sub) {
        if(sub->nsubs == 0) { Rzeros[i] += b->bi[i]->raw[r]->reads; }
        if(sub->nsubs == 1) { Rones[i] += b->bi[i]->raw[r]->reads; }
      }
    }
    // Record information from the cluster's birth
    Rbirth_types.push_back(std::string(b->bi[i]->birth_type));
    if(i==0) {  // 0-clust wasn't born normally
      Rbirth_pvals[i] = Rcpp::NumericVector::get_na(); 
      Rbirth_folds[i] = Rcpp::NumericVector::get_na(); 
      Rbirth_hams[i] = Rcpp::IntegerVector::get_na(); 
      Rbirth_es[i] = Rcpp::NumericVector::get_na();
      Rbirth_qaves[i] = Rcpp::NumericVector::get_na();
    } else { 
      Rbirth_pvals[i] = b->bi[i]->birth_pval;
      Rbirth_folds[i] = b->bi[i]->birth_fold;
      Rbirth_hams[i] = b->bi[i]->birth_comp.hamming;
      Rbirth_es[i] = b->bi[i]->birth_e;
      // Calculate average quality of birth substitutions
      if(has_quals) {
        q_ave = 0.0;
        sub = birth_subs[i];
        if(sub && sub->q1) {
          for(s=0;s<sub->nsubs;s++) {
            q_ave += (sub->q1[s]);
          }
          q_ave = q_ave/((double)sub->nsubs);
        }
        Rbirth_qaves[i] = q_ave;
      } else {
        Rbirth_qaves[i] = Rcpp::NumericVector::get_na();
      }
    }
    
    // Calculate post-hoc pval
    // This is not as exhaustive anymore
    tot_e = 0.0;
    for(j=0;j<b->nclust;j++) {
      if(i != j && b->bi[j]->comp_index.count(b->bi[i]->center->index) > 0) {
        cind = b->bi[j]->comp_index[b->bi[i]->center->index];
        tot_e += b->bi[j]->comp[cind].lambda * b->bi[j]->reads;
//        b->bi[j]->e[b->bi[i]->center->index];
      }
    }
    Rpvals[i] = calc_pA(1+b->bi[i]->reads, tot_e); // Add 1 because calc_pA subtracts 1 (conditional p-val)
  }
  
  return(Rcpp::DataFrame::create(_["sequence"] = Rseqs, _["abundance"] = Rabunds, _["n0"] = Rzeros, _["n1"] = Rones, _["nunq"] = Rraws, _["pval"] = Rpvals, _["birth_type"] = Rbirth_types, _["birth_pval"] = Rbirth_pvals, _["birth_fold"] = Rbirth_folds, _["birth_ham"] = Rbirth_hams, _["birth_qave"] = Rbirth_qaves));
}
Пример #5
0
// [[Rcpp::export]]
Rcpp::CharacterVector icd9MajMinToCode(const Rcpp::CharacterVector major,
		const Rcpp::CharacterVector minor, bool isShort) {
#ifdef ICD9_DEBUG_TRACE
  Rcpp::Rcout << "icd9MajMinToCode: major.size() = " << major.size()
			<< " and minor.size() = " << minor.size() << "\n";
#endif

	if (major.size() != minor.size())
		Rcpp::stop("major and minor lengths differ");

#ifdef ICD9_DEBUG_TRACE
	Rcpp::Rcout << "major and minor are the same?\n";
#endif

	Rcpp::CharacterVector out; // wish I could reserve space for this
	Rcpp::CharacterVector::const_iterator j = major.begin();
	Rcpp::CharacterVector::const_iterator n = minor.begin();

	for (; j != major.end() && n != minor.end(); ++j, ++n) {
		Rcpp::String mjrelem = *j;
		if (mjrelem == NA_STRING) {
			out.push_back(NA_STRING);
			continue;
		}
		// work around Rcpp bug with push_front: convert to string just for this
		// TODO: try to do this with C string instead
		const char* smj_c = mjrelem.get_cstring();
		std::string smj = std::string(smj_c);
		switch (strlen(smj_c)) {
		case 0:
			out.push_back(NA_STRING);
			continue;
		case 1:
			if (!icd9IsASingleVE(smj_c)) {
				smj.insert(0, "00");
			}
			break;
		case 2:
			if (!icd9IsASingleVE(smj_c)) {
				smj.insert(0, "0");
			} else {
				smj.insert(1, "0");
			}
			// default: // major is 3 (or more) chars already
		}
		Rcpp::String mnrelem = *n;
		if (mnrelem == NA_STRING) {
			//out.push_back(mjrelem);
			out.push_back(smj);
			continue;
		}
		// this can probably be done more quickly:
		//std::string smj(mjrelem);
		if (!isShort && mnrelem != "") {
			smj.append(".");
		}
		smj.append(mnrelem);
		out.push_back(smj);

	}
	// ?slow step somewhere around here, with use of Rcpp::String, maybe in the wrapping? Maybe in the multiple push_back calls

	//return wrap(out);
	return out;
}
Пример #6
0
void handleSchemaError(void* userData, xmlError* error) {
  Rcpp::CharacterVector * vec = (Rcpp::CharacterVector *) userData;
  std::string message = std::string(error->message);
  message.resize(message.size() - 1);
  vec->push_back(message);
}
Пример #7
0
//[[Rcpp::export]]
Rcpp::NumericMatrix SUPERMATRIX(Rcpp::List a,bool keep_names) {

  size_t i,j,k;
  Rcpp::NumericMatrix mat;

  int tot=0;

  Rcpp::CharacterVector rnam;
  Rcpp::CharacterVector cnam;

  Rcpp::CharacterVector this_nam;
  Rcpp::List dnames(2);
  bool any_rnames=false;
  bool any_cnames=false;

  for(i=0; i < a.size(); i++) {
    mat = Rcpp::as<Rcpp::NumericMatrix>(a[i]);
    if(mat.nrow() ==0) continue;
    if(mat.nrow() != mat.ncol()) Rcpp::stop("Not all matrices are square");

    tot = tot + mat.nrow();

    if(!keep_names) continue;

    dnames = mat.attr("dimnames");

    if(dnames.size()==0) {
      for(j=0; j < mat.nrow(); j++) {
	rnam.push_back(".");
	cnam.push_back(".");
      }
      continue;
    }

    if(!Rf_isNull(dnames[0])) {
      any_rnames=true;
      this_nam = dnames[0];
      for(j=0; j< this_nam.size(); j++) rnam.push_back(this_nam[j]);
    } else {
      for(j=0; j < mat.nrow(); j++) rnam.push_back(".");
    }
    if(!Rf_isNull(dnames[1])) {
      any_cnames=true;
      this_nam = dnames[1];
      for(j=0; j< this_nam.size(); j++) cnam.push_back(this_nam[j]);
    } else {
      for(j=0; j < mat.ncol(); j++) cnam.push_back(".");
    }
  }


  int totrow = 0;
  int totcol = 0;

  Rcpp::NumericMatrix ret(tot,tot);
  for(i=0; i < a.size(); i++) {
    mat = Rcpp::as<Rcpp::NumericMatrix>(a[i]);

    for(j=0; j < mat.nrow(); j++) {
      for(k=0; k < mat.ncol(); k++) {
	ret(totrow+j,totcol+k) = mat(j,k);
      }
    }
    totrow = totrow + mat.nrow();
    totcol = totcol + mat.ncol();
  }

  if(keep_names) {
    Rcpp::List dn = Rcpp::List::create(rnam,cnam);
    ret.attr("dimnames") = dn;
  }

  return(ret);
}
Пример #8
0
RcppExport SEXP RXMLADiscover(SEXP handle, SEXP request, SEXP rRestrictionsString, SEXP rPropertiesString)
{
	XmlaWebServiceSoapProxy service = XmlaWebServiceSoapProxy(SOAP_XML_DEFAULTNS, SOAP_XML_DEFAULTNS);

	Rcpp::XPtr<XMLAHandle> ptr(handle);
	const char *connectionString = ptr->connectionString;
	std::string propertiesString = CHAR(STRING_ELT(rPropertiesString,0));
	std::string restrictionsString = CHAR(STRING_ELT(rRestrictionsString, 0));

	ns1__Session session;
	std::string sessionId = ptr->sessionID;
	session.SessionId = &sessionId;
	service.soap_header(NULL, NULL, &session, NULL);

	_ns1__Discover discover;
	ns1__Restrictions restrictions;
	ns1__RestrictionList restrictionList;
	ns1__Properties properties;
	ns1__PropertyList propertyList;
	_ns1__DiscoverResponse discoverResponse;

	std::string requestType = CHAR(STRING_ELT(request,0));
	std::transform(requestType.begin(), requestType.end(), requestType.begin(), ::toupper);
	discover.RequestType = &requestType;
	discover.Restrictions = &restrictions;
	restrictions.RestrictionList = &restrictionList;
	discover.Properties = &properties;
	properties.PropertyList = &propertyList;
	if (!propertiesString.empty()) {
		parseKeyValuePairs(&propertiesString, propertyList.__any);
	}
	if (!restrictionsString.empty()) {
		parseKeyValuePairs(&restrictionsString, restrictionList.__any);
	}
	service.userid = ptr->userName;
	service.passwd = ptr->password;

	if (service.Discover(connectionString, NULL, &discover, &discoverResponse) == SOAP_OK) {
		std::string rawXML = "<root xmlns=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"><xsd:schema targetNamespace=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:sql=\"urn:schemas-microsoft-com:xml-sql\" elementFormDefault=\"qualified\">";
		rawXML = rawXML + discoverResponse.return_->ns2__root->xsd__schema + "</xsd:schema></root>";
		char *schema = strdup(rawXML.c_str());
		rapidxml::xml_document<> doc;
		doc.parse<0>(schema);

		// Find XML section containing column names
		rapidxml::xml_node<char> *rowNode = doc.first_node()->first_node()->first_node("xsd:complexType");
		while(rowNode != NULL && strcmp(rowNode->first_attribute("name")->value(), "row") != 0)	{
			rowNode = rowNode->next_sibling("xsd:complexType");
		}

		rapidxml::xml_node<char> *schemaElementNode = rowNode->first_node()->first_node();
		std::vector<char *> rows = discoverResponse.return_->ns2__root->__union_ResultXmlRoot->row;
		Rcpp::DataFrame resultDataFrame;
		Rcpp::CharacterVector colNames;
		char *colName;

		while(schemaElementNode != NULL) {
			colName = schemaElementNode->first_attribute("name")->value();
			colNames.push_back(colName);
			if (schemaElementNode->first_attribute("type") != 0) {
				rowSetParseData(rows, &resultDataFrame, colName, true);
			}
			else {
				rowSetParseData(rows, &resultDataFrame, colName, false);
			}
			schemaElementNode = schemaElementNode->next_sibling();
		}
		resultDataFrame.attr("names") = colNames;
		service.destroy();
		return resultDataFrame;
	}

	else {
		std::cerr << service.fault->faultstring << std::endl;
	}
	service.destroy();
	return Rcpp::wrap(false);
}
Пример #9
0
RcppExport SEXP RXMLAExecute(SEXP handle, SEXP query, SEXP rPropertiesString)
{
	XmlaWebServiceSoapProxy service = XmlaWebServiceSoapProxy(SOAP_XML_DEFAULTNS, SOAP_XML_DEFAULTNS);

	Rcpp::XPtr<XMLAHandle> ptr(handle);
	const char *connectionString = ptr->connectionString;
	std::string propertiesString = CHAR(STRING_ELT(rPropertiesString,0));

	ns1__Session session;
	std::string sessionId = ptr->sessionID;
	session.SessionId = &sessionId;
	service.soap_header(NULL, NULL, &session, NULL);

	_ns1__Execute execute;
	ns1__CommandStatement command;
	ns1__Properties properties;
	ns1__PropertyList propertyList;
	_ns1__ExecuteResponse response;

	std::string statement = CHAR(STRING_ELT(query,0));
	command.Statement = &statement;
	execute.Command = &command;
	execute.Properties = &properties;
	properties.PropertyList = &propertyList;
	if (!propertiesString.empty()) {
		parseKeyValuePairs(&propertiesString, propertyList.__any);
	}
	service.userid = ptr->userName;
	service.passwd = ptr->password;

	if (service.Execute(connectionString, NULL, &execute, &response) == SOAP_OK) {
		// Parse MDDataSet
		if (response.return_->ns4__root != NULL && response.return_->ns4__root->__union_ResultXmlRoot != NULL && response.return_->ns4__root->__union_ResultXmlRoot->Axes != NULL) {
			if (response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis.size() < 3) {
				std::cerr << "Error: No data on Axis1" << std::endl;
				return Rcpp::wrap(false);
			}
			if (response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis.size() > 3) {
				std::cerr << "Error: More than 2 axes not supported" << std::endl;
				return Rcpp::wrap(false);
			}

			ns4__Axes *axes = response.return_->ns4__root->__union_ResultXmlRoot->Axes;
			std::vector<ns4__Cell *> cellDataVector = response.return_->ns4__root->__union_ResultXmlRoot->CellData->Cell;
			int numCols = response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis[0]->__union_Axis->Tuples->Tuple.size();
			int numRows = response.return_->ns4__root->__union_ResultXmlRoot->Axes->Axis[1]->__union_Axis->Tuples->Tuple.size();
			int cellDataVectorMember = 0;

			Rcpp::CharacterVector colNames;
			Rcpp::CharacterVector rowNames;
			Rcpp::NumericMatrix resultMatrix(numRows, numCols);

			for (int row = 0; row < numRows; row++)	{
				for (int col = 0; col < numCols; col++)	{
					if (cellDataVector[cellDataVectorMember]->CellOrdinal == ((row * numCols) + col)) {
						resultMatrix(row, col) = *cellDataVector[cellDataVectorMember]->Value;
						if (cellDataVectorMember < cellDataVector.size() - 1) {
							cellDataVectorMember += 1;
						}
					}
					else {
						resultMatrix(row, col) = NA_REAL;
					}
				}
				mdDataSetGetNames(rowNames, axes, row, true);
			}

			for (int col = 0; col < numCols; col++)	{
				mdDataSetGetNames(colNames, axes, col, false);
			}

			colNames.push_front("Row Names");
			Rcpp::DataFrame resultDataFrame(resultMatrix);
			resultDataFrame.push_front(rowNames);
			resultDataFrame.attr("names") = colNames;
			service.destroy();
			return resultDataFrame;
		}
		// Parse RowSet
		else if (response.return_->ns2__root != NULL 
			&& response.return_->ns2__root->xsd__schema != NULL 
			&& response.return_->ns2__root->__union_ResultXmlRoot != NULL 
			&& !response.return_->ns2__root->__union_ResultXmlRoot->row.empty()) {

				std::string rawXML = "<root xmlns=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\"><xsd:schema targetNamespace=\"urn:schemas-microsoft-com:xml-analysis:rowset\" xmlns:sql=\"urn:schemas-microsoft-com:xml-sql\" elementFormDefault=\"qualified\">";
				rawXML = rawXML + response.return_->ns2__root->xsd__schema + "</xsd:schema></root>";
				char *schema = strdup(rawXML.c_str());
				rapidxml::xml_document<> doc;
				doc.parse<0>(schema);

				// Find XML section containing column names
				rapidxml::xml_node<char> *rowNode = doc.first_node()->first_node()->first_node("xsd:complexType");
				while(rowNode != NULL && strcmp(rowNode->first_attribute("name")->value(), "row") != 0)	{
					rowNode = rowNode->next_sibling("xsd:complexType");
				}

				rapidxml::xml_node<char> *schemaElementNode = rowNode->first_node()->first_node();
				std::vector<char *> rows = response.return_->ns2__root->__union_ResultXmlRoot->row;
				Rcpp::DataFrame resultDataFrame;
				Rcpp::CharacterVector colNames;
				char *colName;

				while(schemaElementNode != NULL) {
					colName = schemaElementNode->first_attribute("name")->value();
					colNames.push_back(colName);
					if (schemaElementNode->first_attribute("type") != 0) {
						rowSetParseData(rows, &resultDataFrame, colName, true);
					}
					else {
						rowSetParseData(rows, &resultDataFrame, colName, false);
					}
					schemaElementNode = schemaElementNode->next_sibling();
				}
				resultDataFrame.attr("names") = colNames;
				service.destroy();
				return resultDataFrame;
		}
		service.destroy();
		return Rcpp::wrap(true);
	}
	else {
		char * errorMessage = service.fault->detail->__any;
		std::cerr << errorMessage << std::endl;
	}
	service.destroy();
	return Rcpp::wrap(false);
}