void samplePointsWithOtherReference(Flower *flower, MetaSequence *metaSequence, const char *eventString, const char *otherEventString, int64_t sampleNumber, int64_t *correct, int64_t *aligned, int64_t *samples, int64_t bucketNumber, double bucketSize, stSortedSet *sortedSegments, bool duplication, double proportionOfSequence) { if(metaSequence_getLength(metaSequence) <= 1) { return; } for (int64_t i = 0; i < sampleNumber; i++) { int64_t x, y; pickAPairOfPointsP(metaSequence, &x, &y, proportionOfSequence); int64_t diff = y - x; assert(diff >= 1); int64_t bucket = log10(diff) * bucketSize; //st_uglyf("I have %" PRIi64 " %f %" PRIi64 " %" PRIi64 "\n", (int64_t)diff, bucketSize, bucket, bucketNumber); assert(bucket < bucketNumber); assert(bucket >= 0); samples[bucket]++; Segment *segmentX = getSegment(sortedSegments, x, metaSequence); if (segmentX != NULL && (duplication || !duplicated(segmentX))) { Segment *segmentY = getSegment(sortedSegments, y, metaSequence); if (segmentY != NULL && (duplication || !duplicated(segmentX))) { bool b; linked(segmentX, segmentY, diff, otherEventString, &b); if(b) { if(linked(segmentX, segmentY, diff, eventString, &b)) { correct[bucket]++; } if(b) { aligned[bucket]++; } } } } } }
SEXP checkVars(SEXP DT, SEXP id, SEXP measure, Rboolean verbose) { int i, ncol=LENGTH(DT), targetcols=0, protecti=0, u=0, v=0; SEXP thiscol, idcols = R_NilValue, valuecols = R_NilValue, tmp, booltmp, unqtmp, ans; SEXP dtnames = getAttrib(DT, R_NamesSymbol); if (isNull(id) && isNull(measure)) { for (i=0; i<ncol; i++) { thiscol = VECTOR_ELT(DT, i); if ((isInteger(thiscol) || isNumeric(thiscol) || isLogical(thiscol)) && !isFactor(thiscol)) targetcols++; } PROTECT(idcols = allocVector(INTSXP, ncol-targetcols)); protecti++; PROTECT(valuecols = allocVector(INTSXP, targetcols)); protecti++; for (i=0; i<ncol; i++) { thiscol = VECTOR_ELT(DT, i); if ((isInteger(thiscol) || isNumeric(thiscol) || isLogical(thiscol)) && !isFactor(thiscol)) { INTEGER(valuecols)[u++] = i+1; } else INTEGER(idcols)[v++] = i+1; } warning("To be consistent with reshape2's melt, id.vars and measure.vars are internally guessed when both are 'NULL'. All non-numeric/integer/logical type columns are conisdered id.vars, which in this case are columns '%s'. Consider providing at least one of 'id' or 'measure' vars in future.", CHAR(STRING_ELT(concat(dtnames, idcols), 0))); } else if (!isNull(id) && isNull(measure)) { switch(TYPEOF(id)) { case STRSXP : PROTECT(tmp = chmatch(id, dtnames, 0, FALSE)); protecti++; break; case REALSXP : PROTECT(tmp = coerceVector(id, INTSXP)); protecti++; break; case INTSXP : PROTECT(tmp = id); protecti++; break; default : error("Unknown 'id.var' type %s, must be character or integer vector", type2char(TYPEOF(id))); } PROTECT(booltmp = duplicated(tmp, FALSE)); protecti++; for (i=0; i<length(tmp); i++) { if (INTEGER(tmp)[i] <= 0) error("Column '%s' not found in 'data'", CHAR(STRING_ELT(id, i))); else if (INTEGER(tmp)[i] > ncol) error("id.var value exceeds ncol(data)"); else if (!LOGICAL(booltmp)[i]) targetcols++; else continue; } PROTECT(unqtmp = allocVector(INTSXP, targetcols)); protecti++; u = 0; for (i=0; i<length(booltmp); i++) { if (!LOGICAL(booltmp)[i]) { INTEGER(unqtmp)[u++] = INTEGER(tmp)[i]; } } PROTECT(valuecols = set_diff(unqtmp, ncol)); protecti++; PROTECT(idcols = tmp); protecti++; if (verbose) Rprintf("'measure.var' is missing. Assigning all columns other than 'id.var' columns which are %s as 'measure.var'.\n", CHAR(STRING_ELT(concat(dtnames, idcols), 0))); } else if (isNull(id) && !isNull(measure)) { switch(TYPEOF(measure)) { case STRSXP : PROTECT(tmp = chmatch(measure, dtnames, 0, FALSE)); protecti++; break; case REALSXP : PROTECT(tmp = coerceVector(measure, INTSXP)); protecti++; break; case INTSXP : PROTECT(tmp = measure); protecti++; break; default : error("Unknown 'measure.var' type %s, must be character or integer vector", type2char(TYPEOF(measure))); } PROTECT(booltmp = duplicated(tmp, FALSE)); protecti++; for (i=0; i<length(tmp); i++) { if (INTEGER(tmp)[i] <= 0) error("Column '%s' not found in 'data'", CHAR(STRING_ELT(id, i))); else if (INTEGER(tmp)[i] > ncol) error("measure.var value exceeds ncol(data)"); else if (!LOGICAL(booltmp)[i]) targetcols++; else continue; } PROTECT(unqtmp = allocVector(INTSXP, targetcols)); protecti++; u = 0; for (i=0; i<length(booltmp); i++) { if (!LOGICAL(booltmp)[i]) { INTEGER(unqtmp)[u++] = INTEGER(tmp)[i]; } } PROTECT(idcols = set_diff(unqtmp, ncol)); protecti++; PROTECT(valuecols = tmp); protecti++; if (verbose) Rprintf("'id.var' is missing. Assigning all columns other than 'measure.var' columns as 'id.var'. Assigned 'id.var's are %s.\n", CHAR(STRING_ELT(concat(dtnames, idcols), 0))); } else if (!isNull(id) && !isNull(measure)) { switch(TYPEOF(id)) { case STRSXP : PROTECT(tmp = chmatch(id, dtnames, 0, FALSE)); protecti++; break; case REALSXP : PROTECT(tmp = coerceVector(id, INTSXP)); protecti++; break; case INTSXP : PROTECT(tmp = id); protecti++; break; default : error("Unknown 'id.var' type %s, must be character or integer vector", type2char(TYPEOF(id))); } for (i=0; i<length(tmp); i++) { if (INTEGER(tmp)[i] <= 0) error("Column '%s' or not found in 'data'", CHAR(STRING_ELT(id, i))); else if (INTEGER(tmp)[i] > ncol) error("measure.var value exceeds ncol(data)"); } PROTECT(idcols = allocVector(INTSXP, length(tmp))); protecti++; idcols = tmp; switch(TYPEOF(measure)) { case STRSXP : PROTECT(tmp = chmatch(measure, dtnames, 0, FALSE)); protecti++; break; case REALSXP : PROTECT(tmp = coerceVector(measure, INTSXP)); protecti++; break; case INTSXP : PROTECT(tmp = measure); protecti++; break; default : error("Unknown 'measure.var' type %s, must be character or integer vector", type2char(TYPEOF(measure))); } for (i=0; i<length(tmp); i++) { if (INTEGER(tmp)[i] <= 0) error("Column '%s' not found in 'data'", CHAR(STRING_ELT(id, i))); else if (INTEGER(tmp)[i] > ncol) error("measure.var value exceeds ncol(data)"); } PROTECT(valuecols = allocVector(INTSXP, length(measure))); protecti++; valuecols = tmp; } PROTECT(ans = allocVector(VECSXP, 2)); protecti++; SET_VECTOR_ELT(ans, 0, idcols); SET_VECTOR_ELT(ans, 1, valuecols); UNPROTECT(protecti); return(ans); }
SEXP typeconvert(SEXP call, SEXP op, SEXP args, SEXP env) { SEXP cvec, a, dup, levs, dims, names, dec; SEXP rval = R_NilValue; /* -Wall */ int i, j, len, asIs; Rboolean done = FALSE; char *endp; const char *tmp = NULL; LocalData data = {NULL, 0, 0, '.', NULL, NO_COMCHAR, 0, NULL, FALSE, FALSE, 0, FALSE, FALSE}; Typecvt_Info typeInfo; /* keep track of possible types of cvec */ typeInfo.islogical = TRUE; /* we can't rule anything out initially */ typeInfo.isinteger = TRUE; typeInfo.isreal = TRUE; typeInfo.iscomplex = TRUE; data.NAstrings = R_NilValue; args = CDR(args); if (!isString(CAR(args))) error(_("the first argument must be of mode character")); data.NAstrings = CADR(args); if (TYPEOF(data.NAstrings) != STRSXP) error(_("invalid '%s' argument"), "na.strings"); asIs = asLogical(CADDR(args)); if (asIs == NA_LOGICAL) asIs = 0; dec = CADDDR(args); if (isString(dec) || isNull(dec)) { if (length(dec) == 0) data.decchar = '.'; else data.decchar = translateChar(STRING_ELT(dec, 0))[0]; } cvec = CAR(args); len = length(cvec); /* save the dim/dimnames attributes */ PROTECT(dims = getAttrib(cvec, R_DimSymbol)); if (isArray(cvec)) PROTECT(names = getAttrib(cvec, R_DimNamesSymbol)); else PROTECT(names = getAttrib(cvec, R_NamesSymbol)); /* Use the first non-NA to screen */ for (i = 0; i < len; i++) { tmp = CHAR(STRING_ELT(cvec, i)); if (!(STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 || isNAstring(tmp, 1, &data) || isBlankString(tmp))) break; } if (i < len) { /* not all entries are NA */ ruleout_types(tmp, &typeInfo, &data); } if (typeInfo.islogical) { PROTECT(rval = allocVector(LGLSXP, len)); for (i = 0; i < len; i++) { tmp = CHAR(STRING_ELT(cvec, i)); if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 || isNAstring(tmp, 1, &data) || isBlankString(tmp)) LOGICAL(rval)[i] = NA_LOGICAL; else { if (strcmp(tmp, "F") == 0 || strcmp(tmp, "FALSE") == 0) LOGICAL(rval)[i] = 0; else if(strcmp(tmp, "T") == 0 || strcmp(tmp, "TRUE") == 0) LOGICAL(rval)[i] = 1; else { typeInfo.islogical = FALSE; ruleout_types(tmp, &typeInfo, &data); break; } } } if (typeInfo.islogical) done = TRUE; else UNPROTECT(1); } if (!done && typeInfo.isinteger) { PROTECT(rval = allocVector(INTSXP, len)); for (i = 0; i < len; i++) { tmp = CHAR(STRING_ELT(cvec, i)); if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 || isNAstring(tmp, 1, &data) || isBlankString(tmp)) INTEGER(rval)[i] = NA_INTEGER; else { INTEGER(rval)[i] = Strtoi(tmp, 10); if (INTEGER(rval)[i] == NA_INTEGER) { typeInfo.isinteger = FALSE; ruleout_types(tmp, &typeInfo, &data); break; } } } if(typeInfo.isinteger) done = TRUE; else UNPROTECT(1); } if (!done && typeInfo.isreal) { PROTECT(rval = allocVector(REALSXP, len)); for (i = 0; i < len; i++) { tmp = CHAR(STRING_ELT(cvec, i)); if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 || isNAstring(tmp, 1, &data) || isBlankString(tmp)) REAL(rval)[i] = NA_REAL; else { REAL(rval)[i] = Strtod(tmp, &endp, FALSE, &data); if (!isBlankString(endp)) { typeInfo.isreal = FALSE; ruleout_types(tmp, &typeInfo, &data); break; } } } if(typeInfo.isreal) done = TRUE; else UNPROTECT(1); } if (!done && typeInfo.iscomplex) { PROTECT(rval = allocVector(CPLXSXP, len)); for (i = 0; i < len; i++) { tmp = CHAR(STRING_ELT(cvec, i)); if (STRING_ELT(cvec, i) == NA_STRING || strlen(tmp) == 0 || isNAstring(tmp, 1, &data) || isBlankString(tmp)) COMPLEX(rval)[i].r = COMPLEX(rval)[i].i = NA_REAL; else { COMPLEX(rval)[i] = strtoc(tmp, &endp, FALSE, &data); if (!isBlankString(endp)) { typeInfo.iscomplex = FALSE; /* this is not needed, unless other cases are added */ ruleout_types(tmp, &typeInfo, &data); break; } } } if(typeInfo.iscomplex) done = TRUE; else UNPROTECT(1); } if (!done) { if (asIs) { PROTECT(rval = duplicate(cvec)); for (i = 0; i < len; i++) if(isNAstring(CHAR(STRING_ELT(rval, i)), 1, &data)) SET_STRING_ELT(rval, i, NA_STRING); } else { PROTECT(dup = duplicated(cvec, FALSE)); j = 0; for (i = 0; i < len; i++) { /* <NA> is never to be a level here */ if (STRING_ELT(cvec, i) == NA_STRING) continue; if (LOGICAL(dup)[i] == 0 && !isNAstring(CHAR(STRING_ELT(cvec, i)), 1, &data)) j++; } PROTECT(levs = allocVector(STRSXP,j)); j = 0; for (i = 0; i < len; i++) { if (STRING_ELT(cvec, i) == NA_STRING) continue; if (LOGICAL(dup)[i] == 0 && !isNAstring(CHAR(STRING_ELT(cvec, i)), 1, &data)) SET_STRING_ELT(levs, j++, STRING_ELT(cvec, i)); } /* We avoid an allocation by reusing dup, * a LGLSXP of the right length */ rval = dup; SET_TYPEOF(rval, INTSXP); /* put the levels in lexicographic order */ sortVector(levs, FALSE); PROTECT(a = matchE(levs, cvec, NA_INTEGER, env)); for (i = 0; i < len; i++) INTEGER(rval)[i] = INTEGER(a)[i]; setAttrib(rval, R_LevelsSymbol, levs); PROTECT(a = mkString("factor")); setAttrib(rval, R_ClassSymbol, a); UNPROTECT(3); } } setAttrib(rval, R_DimSymbol, dims); setAttrib(rval, isArray(cvec) ? R_DimNamesSymbol : R_NamesSymbol, names); UNPROTECT(3); return rval; }