Exemplo n.º 1
0
int longToRagged(const SEXP& icd9df, VecVecStr& ragged, VecStr& visitIds,
		const std::string visitId, const std::string icd9Field =
				"icd9", bool aggregate = true) {
#ifdef ICD9_VALGRIND
	CALLGRIND_START_INSTRUMENTATION;
#endif
	SEXP icds = PROTECT(getRListOrDfElement(icd9df, icd9Field.c_str()));
	SEXP vsexp = PROTECT(getRListOrDfElement(icd9df, visitId.c_str()));
	const int approx_cmb_per_visit = 15; // just an estimate. Prob best to overestimate.
	int vlen = Rf_length(icds);
	visitIds.reserve(vlen / approx_cmb_per_visit);
	ragged.reserve(vlen / approx_cmb_per_visit);
	int max_per_pt = 1;
	if (TYPEOF(vsexp) != STRSXP)
	  Rcpp::stop("need string input to longToRagged\n");
#ifdef ICD9_DEBUG
	Rcpp::Rcout << "longToRagged SEXP is STR\n";
#endif

	const char* lastVisitId = "";
	for (int i = 0; i < vlen; ++i) {
	  // always STRING? may get numeric, integer, factor? Can always handle this on R side
		const char* icd = CHAR(STRING_ELT(icds, i));
		const char* vi = CHAR(STRING_ELT(vsexp, i));

		if (strcmp(lastVisitId, vi) != 0
				&& (!aggregate
						|| std::find(visitIds.rbegin(), visitIds.rend(), vi)
								== visitIds.rend())) {
			VecStr vcodes;
			vcodes.reserve(approx_cmb_per_visit); // estimate of number of codes per patient.
			vcodes.push_back(icd); // new vector of ICD codes with this first item
			ragged.push_back(vcodes); // and add that vector to the intermediate structure
			visitIds.push_back(vi);
		} else {
#ifdef ICD9_DEBUG
			if (ragged.size()==0) {
				Rcout << "ragged size is ZERO! aborting\n";
				break;
			}
#endif
			ragged[ragged.size() - 1].push_back(icd); // augment vec for current visit and N/V/E type // EXPENSIVE.
			int len = ragged[ragged.size() - 1].size(); // get new count of cmb for one patient
			if (len > max_per_pt)
				max_per_pt = len;
		}
#ifdef ICD9_DEBUG_TRACE
		Rcout << "ragged size is " << ragged.size() << "\n";
#endif

		lastVisitId = vi;
	} // end loop through all visit-code input data

#ifdef ICD9_VALGRIND
	CALLGRIND_STOP_INSTRUMENTATION;
	//        CALLGRIND_DUMP_STATS;
#endif
	UNPROTECT(2); // do sooner if possible?
	return max_per_pt;
}