Ejemplo n.º 1
0
//' @rdname convert
//' @keywords internal manip
// [[Rcpp::export]]
Rcpp::List icd9ShortToPartsCpp(const Rcpp::CharacterVector icd9Short, const Rcpp::String minorEmpty) {

	Rcpp::CharacterVector major(icd9Short.size());
	Rcpp::CharacterVector minor(icd9Short.size());

	for (int i = 0; i < icd9Short.size(); ++i) {
		Rcpp::String thisShort = icd9Short[i];
		if (thisShort == NA_STRING) { // .is_na() is private?
			minor[i] = NA_STRING; // I think set_na() might be an alternative.
			continue;
		}

		std::string s(thisShort.get_cstring()); // TODO maybe better to use as?
		s = strimCpp(s); // in place or rewrite?
		std::string::size_type sz = s.size();

		if (icd9IsASingleE(s.c_str())) { // E code
			switch (sz) {
			case 2:
			case 3:
			case 4:
				major[i] = s.substr(0, sz);
				minor[i] = minorEmpty;
				break;
			case 5:
				major[i] = s.substr(0, 4);
				minor[i] = s.substr(4, 1);
				break;
			default:
				major[i] = NA_STRING;
				minor[i] = NA_STRING;
				continue;
			}
		} else { // not an E code
			switch (sz) {
			case 1:
			case 2:
			case 3:
				major[i] = s.substr(0, sz);
				minor[minorEmpty];
				continue;
			case 4:
			case 5:
				major[i] = s.substr(0, 3);
				minor[i] = s.substr(3, sz - 3);
				continue;
			default:
				major[i] = NA_STRING;
				minor[i] = NA_STRING;
				continue;
			}
		}

	} // for

	return icd9MajMinToParts(icd9AddLeadingZeroesMajor(major), minor);
}
Ejemplo n.º 2
0
// [[Rcpp::export]]
Rcpp::String icd9AddLeadingZeroesMajorSingle(Rcpp::String major) {
	if (major == NA_STRING) {
		return (NA_STRING);
	}
	std::string m(major);
	if (!icd9IsASingleVE(major.get_cstring())) {
		switch (strlen(major.get_cstring())) {
		case 0:
			return (NA_STRING);
		case 1:
			return ("00" + m);
		case 2:
			return ("0" + m);
		case 3:
			return (m);
		}
	} else {
		switch (strlen(major.get_cstring())) {
		case 1:
			return (NA_STRING);
		case 2:
			if (icd9IsASingleV(m.c_str())) {
				m.insert(1, "0");
				return (m);
			} else {
				m.insert(1, "00");
				return (m);
			}
		case 3:
			if (icd9IsASingleV(m.c_str())) {
				return (m);
			} else {
				m.insert(1, "0");
				return (m);
			}
		case 4:
			if (icd9IsASingleE(m.c_str()))
				return (m);
		}
	}
	return NA_STRING;
}
Ejemplo n.º 3
0
//' @rdname convert
//' @export
// [[Rcpp::export]]
Rcpp::CharacterVector icd9DecimalToShort(
		const Rcpp::CharacterVector icd9Decimal) {
	Rcpp::CharacterVector out = clone(icd9Decimal); // clone instead of pushing back thousands of times
  size_t ilen = icd9Decimal.length();
	if (ilen == 0)
		return out;
	for (size_t i = 0; i != ilen; ++i) {
		Rcpp::String strna = icd9Decimal[i]; // need to copy here? does it copy?
		if (strna == NA_STRING || strna == "")
			continue;
		// TODO: Rcpp::String doesn't implement many functions, so using STL. A FAST way
		// might be to use Rcpp::String's function get_cstring, and recode the trim
		// functions to take const char *. This would avoid the type change AND be
		// faster trimming.
		const char * thiscode_cstr = strna.get_cstring();
		std::string thiscode(thiscode_cstr);
		thiscode = trimLeftCpp(thiscode);
		// TODO consider rejecting grossly invalid codes as NA:
		std::size_t pos = thiscode.find_first_of(".");
		if (pos != std::string::npos) {
#ifdef ICD9_DEBUG_TRACE
			Rcpp::Rcout << "found .\n";
#endif
			// now we assume that the major is snug against the left side, so we can add zero padding
			thiscode.erase(pos, 1); // remove the decimal point
			// could do fewer tests on the code by doing this last, but most codes are not V or E...
			if (pos > 0 && pos < 4 && !icd9IsASingleVE(thiscode_cstr)) {
#ifdef ICD9_DEBUG_TRACE
				Rcpp::Rcout << "found numeric\n";
#endif
				thiscode.insert(0, 3 - pos, '0');
			} else if (pos == 2 && icd9IsASingleV(thiscode_cstr)) {
#ifdef ICD9_DEBUG_TRACE
				Rcpp::Rcout << "found V\n";
#endif
				thiscode.insert(1, 1, '0');
				out[i] = thiscode;
			} else if ((pos == 2 || pos == 3) && icd9IsASingleE(thiscode_cstr)) {
#ifdef ICD9_DEBUG_TRACE
				Rcpp::Rcout << "found E\n";
#endif
				thiscode.insert(1, 4 - pos, '0');
			}
			// otherwise leave the code alone
			out[i] = thiscode;

		} else {
			out[i] = Rcpp::String(icd9AddLeadingZeroesMajorSingleStd(thiscode));
		}
	}
	return out;
}
Ejemplo n.º 4
0
// [[Rcpp::export]]
bool guessShortPlusFactorCpp(SEXP x_, int n) {
  Rcpp::CharacterVector x;
  switch(TYPEOF(x_)) {
  case STRSXP: {
    x = Rcpp::as<Rcpp::CharacterVector>(x_);
    break;
  }
  case INTSXP: {
    if (Rf_isFactor(x_))
      x = Rf_getAttrib(x_, R_LevelsSymbol);
    break;
  }
  case LGLSXP: {
    // we will accept all logical values, if all are NA, which defauts to
    // logical unless otherwise specified. And we obviously don't know whether
    // these NAs would have been short or long, just default to short.
    Rcpp::LogicalVector xl = Rcpp::LogicalVector(x_);

    if (Rcpp::all(is_na(xl)))
      return true;
    // don't break, because if there were non-NA logicals, this is an error
  }
  default: {
    Rcpp::stop("Character vectors and factors are accepted");
  }
  }
  n = std::min((int)x.length(), n);
  const char * b;
  const char * ob;
  Rcpp::String bs;
  for (R_xlen_t i = 0; i != n; ++i) {
    bs = x[i];
    b = bs.get_cstring();
    ob = b;
    while (*b) {
      if (*b == '.') return false;
      ++b;
    }
    // stop when we first get a five digit code. There are four digit major E codes.
    if ((b - ob) == 5) return true;
  }
  return true;
}
Ejemplo n.º 5
0
// [[Rcpp::export]]
Rcpp::CharacterVector icd9MajMinToCode(const Rcpp::CharacterVector major,
		const Rcpp::CharacterVector minor, bool isShort) {
#ifdef ICD9_DEBUG_TRACE
  Rcpp::Rcout << "icd9MajMinToCode: major.size() = " << major.size()
			<< " and minor.size() = " << minor.size() << "\n";
#endif

	if (major.size() != minor.size())
		Rcpp::stop("major and minor lengths differ");

#ifdef ICD9_DEBUG_TRACE
	Rcpp::Rcout << "major and minor are the same?\n";
#endif

	Rcpp::CharacterVector out; // wish I could reserve space for this
	Rcpp::CharacterVector::const_iterator j = major.begin();
	Rcpp::CharacterVector::const_iterator n = minor.begin();

	for (; j != major.end() && n != minor.end(); ++j, ++n) {
		Rcpp::String mjrelem = *j;
		if (mjrelem == NA_STRING) {
			out.push_back(NA_STRING);
			continue;
		}
		// work around Rcpp bug with push_front: convert to string just for this
		// TODO: try to do this with C string instead
		const char* smj_c = mjrelem.get_cstring();
		std::string smj = std::string(smj_c);
		switch (strlen(smj_c)) {
		case 0:
			out.push_back(NA_STRING);
			continue;
		case 1:
			if (!icd9IsASingleVE(smj_c)) {
				smj.insert(0, "00");
			}
			break;
		case 2:
			if (!icd9IsASingleVE(smj_c)) {
				smj.insert(0, "0");
			} else {
				smj.insert(1, "0");
			}
			// default: // major is 3 (or more) chars already
		}
		Rcpp::String mnrelem = *n;
		if (mnrelem == NA_STRING) {
			//out.push_back(mjrelem);
			out.push_back(smj);
			continue;
		}
		// this can probably be done more quickly:
		//std::string smj(mjrelem);
		if (!isShort && mnrelem != "") {
			smj.append(".");
		}
		smj.append(mnrelem);
		out.push_back(smj);

	}
	// ?slow step somewhere around here, with use of Rcpp::String, maybe in the wrapping? Maybe in the multiple push_back calls

	//return wrap(out);
	return out;
}