//' @rdname convert //' @keywords internal manip // [[Rcpp::export]] Rcpp::List icd9ShortToPartsCpp(const Rcpp::CharacterVector icd9Short, const Rcpp::String minorEmpty) { Rcpp::CharacterVector major(icd9Short.size()); Rcpp::CharacterVector minor(icd9Short.size()); for (int i = 0; i < icd9Short.size(); ++i) { Rcpp::String thisShort = icd9Short[i]; if (thisShort == NA_STRING) { // .is_na() is private? minor[i] = NA_STRING; // I think set_na() might be an alternative. continue; } std::string s(thisShort.get_cstring()); // TODO maybe better to use as? s = strimCpp(s); // in place or rewrite? std::string::size_type sz = s.size(); if (icd9IsASingleE(s.c_str())) { // E code switch (sz) { case 2: case 3: case 4: major[i] = s.substr(0, sz); minor[i] = minorEmpty; break; case 5: major[i] = s.substr(0, 4); minor[i] = s.substr(4, 1); break; default: major[i] = NA_STRING; minor[i] = NA_STRING; continue; } } else { // not an E code switch (sz) { case 1: case 2: case 3: major[i] = s.substr(0, sz); minor[minorEmpty]; continue; case 4: case 5: major[i] = s.substr(0, 3); minor[i] = s.substr(3, sz - 3); continue; default: major[i] = NA_STRING; minor[i] = NA_STRING; continue; } } } // for return icd9MajMinToParts(icd9AddLeadingZeroesMajor(major), minor); }
// [[Rcpp::export]] Rcpp::String icd9AddLeadingZeroesMajorSingle(Rcpp::String major) { if (major == NA_STRING) { return (NA_STRING); } std::string m(major); if (!icd9IsASingleVE(major.get_cstring())) { switch (strlen(major.get_cstring())) { case 0: return (NA_STRING); case 1: return ("00" + m); case 2: return ("0" + m); case 3: return (m); } } else { switch (strlen(major.get_cstring())) { case 1: return (NA_STRING); case 2: if (icd9IsASingleV(m.c_str())) { m.insert(1, "0"); return (m); } else { m.insert(1, "00"); return (m); } case 3: if (icd9IsASingleV(m.c_str())) { return (m); } else { m.insert(1, "0"); return (m); } case 4: if (icd9IsASingleE(m.c_str())) return (m); } } return NA_STRING; }
//' @rdname convert //' @export // [[Rcpp::export]] Rcpp::CharacterVector icd9DecimalToShort( const Rcpp::CharacterVector icd9Decimal) { Rcpp::CharacterVector out = clone(icd9Decimal); // clone instead of pushing back thousands of times size_t ilen = icd9Decimal.length(); if (ilen == 0) return out; for (size_t i = 0; i != ilen; ++i) { Rcpp::String strna = icd9Decimal[i]; // need to copy here? does it copy? if (strna == NA_STRING || strna == "") continue; // TODO: Rcpp::String doesn't implement many functions, so using STL. A FAST way // might be to use Rcpp::String's function get_cstring, and recode the trim // functions to take const char *. This would avoid the type change AND be // faster trimming. const char * thiscode_cstr = strna.get_cstring(); std::string thiscode(thiscode_cstr); thiscode = trimLeftCpp(thiscode); // TODO consider rejecting grossly invalid codes as NA: std::size_t pos = thiscode.find_first_of("."); if (pos != std::string::npos) { #ifdef ICD9_DEBUG_TRACE Rcpp::Rcout << "found .\n"; #endif // now we assume that the major is snug against the left side, so we can add zero padding thiscode.erase(pos, 1); // remove the decimal point // could do fewer tests on the code by doing this last, but most codes are not V or E... if (pos > 0 && pos < 4 && !icd9IsASingleVE(thiscode_cstr)) { #ifdef ICD9_DEBUG_TRACE Rcpp::Rcout << "found numeric\n"; #endif thiscode.insert(0, 3 - pos, '0'); } else if (pos == 2 && icd9IsASingleV(thiscode_cstr)) { #ifdef ICD9_DEBUG_TRACE Rcpp::Rcout << "found V\n"; #endif thiscode.insert(1, 1, '0'); out[i] = thiscode; } else if ((pos == 2 || pos == 3) && icd9IsASingleE(thiscode_cstr)) { #ifdef ICD9_DEBUG_TRACE Rcpp::Rcout << "found E\n"; #endif thiscode.insert(1, 4 - pos, '0'); } // otherwise leave the code alone out[i] = thiscode; } else { out[i] = Rcpp::String(icd9AddLeadingZeroesMajorSingleStd(thiscode)); } } return out; }
// [[Rcpp::export]] bool guessShortPlusFactorCpp(SEXP x_, int n) { Rcpp::CharacterVector x; switch(TYPEOF(x_)) { case STRSXP: { x = Rcpp::as<Rcpp::CharacterVector>(x_); break; } case INTSXP: { if (Rf_isFactor(x_)) x = Rf_getAttrib(x_, R_LevelsSymbol); break; } case LGLSXP: { // we will accept all logical values, if all are NA, which defauts to // logical unless otherwise specified. And we obviously don't know whether // these NAs would have been short or long, just default to short. Rcpp::LogicalVector xl = Rcpp::LogicalVector(x_); if (Rcpp::all(is_na(xl))) return true; // don't break, because if there were non-NA logicals, this is an error } default: { Rcpp::stop("Character vectors and factors are accepted"); } } n = std::min((int)x.length(), n); const char * b; const char * ob; Rcpp::String bs; for (R_xlen_t i = 0; i != n; ++i) { bs = x[i]; b = bs.get_cstring(); ob = b; while (*b) { if (*b == '.') return false; ++b; } // stop when we first get a five digit code. There are four digit major E codes. if ((b - ob) == 5) return true; } return true; }
// [[Rcpp::export]] Rcpp::CharacterVector icd9MajMinToCode(const Rcpp::CharacterVector major, const Rcpp::CharacterVector minor, bool isShort) { #ifdef ICD9_DEBUG_TRACE Rcpp::Rcout << "icd9MajMinToCode: major.size() = " << major.size() << " and minor.size() = " << minor.size() << "\n"; #endif if (major.size() != minor.size()) Rcpp::stop("major and minor lengths differ"); #ifdef ICD9_DEBUG_TRACE Rcpp::Rcout << "major and minor are the same?\n"; #endif Rcpp::CharacterVector out; // wish I could reserve space for this Rcpp::CharacterVector::const_iterator j = major.begin(); Rcpp::CharacterVector::const_iterator n = minor.begin(); for (; j != major.end() && n != minor.end(); ++j, ++n) { Rcpp::String mjrelem = *j; if (mjrelem == NA_STRING) { out.push_back(NA_STRING); continue; } // work around Rcpp bug with push_front: convert to string just for this // TODO: try to do this with C string instead const char* smj_c = mjrelem.get_cstring(); std::string smj = std::string(smj_c); switch (strlen(smj_c)) { case 0: out.push_back(NA_STRING); continue; case 1: if (!icd9IsASingleVE(smj_c)) { smj.insert(0, "00"); } break; case 2: if (!icd9IsASingleVE(smj_c)) { smj.insert(0, "0"); } else { smj.insert(1, "0"); } // default: // major is 3 (or more) chars already } Rcpp::String mnrelem = *n; if (mnrelem == NA_STRING) { //out.push_back(mjrelem); out.push_back(smj); continue; } // this can probably be done more quickly: //std::string smj(mjrelem); if (!isShort && mnrelem != "") { smj.append("."); } smj.append(mnrelem); out.push_back(smj); } // ?slow step somewhere around here, with use of Rcpp::String, maybe in the wrapping? Maybe in the multiple push_back calls //return wrap(out); return out; }