//////////////////////////////////////////////////////////// // C'tor RootChainManager::RootChainManager(SEXP treeName, SEXP fileList, bool verbose, bool trace) : m_chain(0), m_verbose(verbose), m_trace(trace) { // Check arguments if ( ! IS_CHARACTER(treeName) ) error("treeName must be a string"); if ( GET_LENGTH(treeName) != 1) error("treeName must have length 1"); if ( ! IS_CHARACTER(fileList) ) error("fileList must be a list of strings"); // Get the tree name std::string treeNameC = CHAR(STRING_ELT(treeName, 0)); if (m_verbose) REprintf("Will read tree %s\n", treeNameC.c_str()); // Get the list of files to chain if (m_verbose) REprintf("There are %d files to add to the chain\n", GET_LENGTH(fileList) ); // Form the chain from the file lists m_chain = new TChain(treeNameC.c_str()); // Add files for ( unsigned int i = 0; i < GET_LENGTH(fileList); ++i ) { std::string fileNameC = CHAR(STRING_ELT(fileList, i) ); if (m_verbose) REprintf("Adding file %s to chain\n", fileNameC.c_str()); m_chain->Add( fileNameC.c_str(), 0 ); } }
SEXP scan_bam_template(SEXP rname, SEXP tag) { if (R_NilValue != tag) if (!IS_CHARACTER(tag)) Rf_error("'tag' must be NULL or 'character()'"); SEXP tmpl = PROTECT(NEW_LIST(N_TMPL_ELTS)); SET_VECTOR_ELT(tmpl, QNAME_IDX, NEW_CHARACTER(0)); SET_VECTOR_ELT(tmpl, FLAG_IDX, NEW_INTEGER(0)); SET_VECTOR_ELT(tmpl, RNAME_IDX, rname); SET_VECTOR_ELT(tmpl, STRAND_IDX, _tmpl_strand()); SET_VECTOR_ELT(tmpl, POS_IDX, NEW_INTEGER(0)); SET_VECTOR_ELT(tmpl, QWIDTH_IDX, NEW_INTEGER(0)); SET_VECTOR_ELT(tmpl, MAPQ_IDX, NEW_INTEGER(0)); SET_VECTOR_ELT(tmpl, CIGAR_IDX, NEW_CHARACTER(0)); SET_VECTOR_ELT(tmpl, MRNM_IDX, rname); SET_VECTOR_ELT(tmpl, MPOS_IDX, NEW_INTEGER(0)); SET_VECTOR_ELT(tmpl, ISIZE_IDX, NEW_INTEGER(0)); SET_VECTOR_ELT(tmpl, SEQ_IDX, _tmpl_DNAStringSet()); SET_VECTOR_ELT(tmpl, QUAL_IDX, _tmpl_PhredQuality()); SET_VECTOR_ELT(tmpl, PARTITION_IDX, NEW_INTEGER(0)); SET_VECTOR_ELT(tmpl, MATES_IDX, NEW_INTEGER(0)); if (R_NilValue == tag) { SET_VECTOR_ELT(tmpl, TAG_IDX, R_NilValue); } else { SET_VECTOR_ELT(tmpl, TAG_IDX, NEW_LIST(LENGTH(tag))); SET_ATTR(VECTOR_ELT(tmpl, TAG_IDX), R_NamesSymbol, tag); } SEXP names = PROTECT(NEW_CHARACTER(N_TMPL_ELTS)); for (int i = 0; i < N_TMPL_ELTS; ++i) SET_STRING_ELT(names, i, mkChar(TMPL_ELT_NMS[i])); SET_ATTR(tmpl, R_NamesSymbol, names); UNPROTECT(2); return tmpl; }
USER_OBJECT_ RS_discardPerlForeignReference(USER_OBJECT_ obj) { const char *key; USER_OBJECT_ ans = NEW_LOGICAL(1); #ifndef USE_NEW_PERL_REFERENCES if(IS_CHARACTER(obj)) { key = CHAR_DEREF(STRING_ELT(obj, 0)); } else { key = CHAR_DEREF(STRING_ELT(VECTOR_ELT(obj, 0), 0)); } LOGICAL_DATA(ans)[0] = discardPerlForeignReference(key, NULL); #else SV *el; dTHX; el = getForeignPerlReference(obj); if(el) { SvREFCNT_dec(obj); LOGICAL_DATA(ans)[0] = 1; } #endif return(ans); }
gint asCEnum(USER_OBJECT_ s_enum, GType etype) { GEnumClass *eclass = g_type_class_ref(etype); GEnumValue *evalue = NULL; gint eval = 0; if (IS_INTEGER(s_enum) || IS_NUMERIC(s_enum)) { evalue = g_enum_get_value(eclass, asCInteger(s_enum)); } else if (IS_CHARACTER(s_enum)) { const gchar* ename = asCString(s_enum); evalue = g_enum_get_value_by_name(eclass, ename); if (!evalue) evalue = g_enum_get_value_by_nick(eclass, ename); if (!evalue) evalue = g_enum_get_value(eclass, atoi(ename)); } if (!evalue) { PROBLEM "Could not parse enum value %s", asCString(s_enum) ERROR; } else eval = evalue->value; return(eval); }
SEXP filter_bamfile(SEXP ext, SEXP space, SEXP keepFlags, SEXP isSimpleCigar, SEXP tagFilter, SEXP mapqFilter, SEXP fout_name, SEXP fout_mode) { _checkext(ext, BAMFILE_TAG, "filterBam"); _checkparams(space, keepFlags, isSimpleCigar); if (!IS_CHARACTER(fout_name) || 1 != LENGTH(fout_name)) Rf_error("'fout_name' must be character(1)"); if (!IS_CHARACTER(fout_mode) || 1 != LENGTH(fout_mode)) Rf_error("'fout_mode' must be character(1)"); SEXP result = _filter_bam(ext, space, keepFlags, isSimpleCigar, tagFilter, mapqFilter, fout_name, fout_mode); if (R_NilValue == result) Rf_error("'filterBam' failed"); return result; }
SEXP R_match_soundex(SEXP x, SEXP table, SEXP nomatch, SEXP matchNA) { int nx = length(x); int ntable = length(table); int no_match = INTEGER(nomatch)[0]; int match_na = INTEGER(matchNA)[0]; int bytes = IS_CHARACTER(x); // when a and b are character vectors; create unsigned int vectors in which // the elements of and b will be copied unsigned int *s = NULL, *t = NULL; if (bytes) { int ml_x = max_length(x); int ml_t = max_length(table); s = (unsigned int *) malloc((ml_x + ml_t) * sizeof(unsigned int)); t = s + ml_x; if (s == NULL) { free(s); error("Unable to allocate enough memory"); } } // output vector SEXP yy = allocVector(INTSXP, nx); PROTECT(yy); int* y = INTEGER(yy); int index, isna_s, isna_t, len_s, len_t; unsigned int nfail = 0; double d; for (int i=0; i<nx; ++i) { index = no_match; s = get_elem(x, i, bytes, &len_s, &isna_s, s); for (int j=0; j<ntable; ++j) { t = get_elem(table, j, bytes, &len_t, &isna_t, t); if (!isna_s && !isna_t) { // both are char (usual case) d = soundex_dist(s, t, len_s, len_t, &nfail); if (d < 0.5) { // exact match as d can only take on values 0 and 1 index = j + 1; break; } } else if (isna_s && isna_t) { // both are NA index = match_na ? j + 1 : no_match; break; } } y[i] = index; } // cleanup and return check_fail(nfail); if (bytes) free(s); UNPROTECT(1); return(yy); }
void _bam_check_template_list(SEXP template_list) { if (!IS_LIST(template_list) || LENGTH(template_list) != N_TMPL_ELTS) Rf_error("'template' must be list(%d)", N_TMPL_ELTS); SEXP names = GET_ATTR(template_list, R_NamesSymbol); if (!IS_CHARACTER(names) || LENGTH(names) != N_TMPL_ELTS) Rf_error("'names(template)' must be character(%d)", N_TMPL_ELTS); for (int i = 0; i < LENGTH(names); ++i) if (strcmp(TMPL_ELT_NMS[i], CHAR(STRING_ELT(names, i))) != 0) Rf_error("'template' names do not match scan_bam_template\n'"); }
USER_OBJECT_ RS_PerlNames(USER_OBJECT_ obj) { HV* hv; SV *el; int n, i; USER_OBJECT_ names; char *key; I32 len; dTHX; if(IS_CHARACTER(obj)) { hv = get_hv(CHAR_DEREF(STRING_ELT(obj,0)), FALSE); } else hv = (HV *) RS_PerlGetSV(obj); if(hv == NULL) { PROBLEM "identifier does not refer to a Perl hashtable object" ERROR; } if(SvTYPE(hv) != SVt_PVHV) { if(SvROK(hv) && SvTYPE(SvRV(hv)) == SVt_PVHV) { hv = (HV *) SvRV(hv); } else { PROBLEM "identifier is not a Perl hashtable object, but some other type %s", getPerlType((SV*)hv) ERROR; } } n = hv_iterinit(hv); if(n == 0) return(NULL_USER_OBJECT); PROTECT(names = NEW_CHARACTER(n)); i = 0; while(i < n) { el = hv_iternextsv(hv, &key, &len); if(key == NULL) break; SET_STRING_ELT(names, i, COPY_TO_USER_STRING(key)); i++; } UNPROTECT(1); return(names); }
SEXP R_soundex_dist(SEXP a, SEXP b) { int na = length(a); int nb = length(b); int nt = MAX(na,nb); int bytes = IS_CHARACTER(a); // when a and b are character vectors; create unsigned int vectors in which // the elements of and b will be copied unsigned int *s = NULL, *t = NULL; if (bytes) { int ml_a = max_length(a); int ml_b = max_length(b); s = (unsigned int *) malloc((ml_a + ml_b) * sizeof(unsigned int)); t = s + ml_a; if (s == NULL) { free(s); error("Unable to allocate enough memory"); } } // create output variable SEXP yy = allocVector(REALSXP, nt); PROTECT(yy); double *y = REAL(yy); // compute distances, skipping NA's unsigned int nfail = 0; int len_s, len_t, isna_s, isna_t; for (int k=0; k < nt; ++k, ++y) { s = get_elem(a, k % na, bytes, &len_s, &isna_s, s); t = get_elem(b, k % nb, bytes, &len_t, &isna_t, t); if (isna_s || isna_t) { (*y) = NA_REAL; } else { (*y) = soundex_dist(s, t, len_s, len_t, &nfail); } } // cleanup and return check_fail(nfail); if (bytes) free(s); UNPROTECT(1); return yy; }
/* Returns the Perl reference to the Code object (sub-routine, method, etc.) identified by the S character vector `name'. */ SV * getPerlCodeObject(USER_OBJECT_ name) { SV *val; SV *tmp; Rboolean recurse = FALSE; dTHX; if(IS_CHARACTER(name)) { val = (SV *) get_cv(CHAR_DEREF(STRING_ELT(name, 0)), FALSE); return(val); } else tmp = getForeignPerlReference(name); if(tmp == NULL) return(NULL); do { recurse = FALSE; switch(SvTYPE(tmp)) { case SVt_PVGV: /*XXX Is this correct? or should it be GvCV() */ tmp = GvSV(tmp); recurse = TRUE; break; case SVt_PVCV: val = tmp; break; #if 0 /* SVt_RV no longer exists in modern perl versions */ case SVt_RV: val = SvRV(tmp); break; #endif default: val = NULL; getPerlType(tmp); break; } } while(recurse); return(val); }
USER_OBJECT_ RS_getAV(USER_OBJECT_ name, USER_OBJECT_ convert, USER_OBJECT_ elFilter, USER_OBJECT_ interpreter) { USER_OBJECT_ ans = NULL_USER_OBJECT; AV *arr; dTHX; if(!IS_CHARACTER(name)) { SV *tmp = getForeignPerlReference(name); if(tmp != NULL && SvTYPE(tmp) == SVt_RV) { tmp = SvRV(tmp); } if(tmp == NULL || SvTYPE(tmp) != SVt_PVAV) { PROBLEM "non-array reference passed to RS_getAV" ERROR; } arr = (AV*) tmp; } else { arr = get_av(CHAR_DEREF(STRING_ELT(name,0)), FALSE); } if(arr != NULL) { if(TYPEOF(convert) == LGLSXP || TYPEOF(convert) == INTSXP) { unsigned int depth; depth = (TYPEOF(convert) == LGLSXP ? LOGICAL(convert)[0] : INTEGER(convert)[0]); if(depth) { SV *filter = NULL; if(GET_LENGTH(elFilter)) filter = getPerlCodeObject(elFilter); ans = fromPerlAV(arr, filter, depth); } else { /* ans = fromPerl((SV*) arr); */ ans = makeForeignPerlReference((SV*) arr, makeRSPerlClassVector("PerlArrayReference"), &exportReferenceTable); } } else ans = directConvertFromPerl((SV *) arr, convert); } return(ans); }
SV * RS_PerlGetSV(USER_OBJECT_ obj) { SV *sv = NULL; if(IS_CHARACTER(obj)) { dTHX; HV *table = gv_stashpv("main", FALSE); const char *key = CHAR_DEREF(STRING_ELT(obj, 0)); SV **tmp = hv_fetch(table, key, strlen(key), 0); if(tmp && *tmp) sv = *tmp; else { PROBLEM "No such object %s in Perl's main::", key ERROR; } } else { sv = getForeignPerlReference(obj); } return(sv); }
USER_OBJECT_ RS_getHV(USER_OBJECT_ name, USER_OBJECT_ convert, USER_OBJECT_ interpreter) { USER_OBJECT_ ans = NULL_USER_OBJECT; HV *table; dTHX; if(!IS_CHARACTER(name)) { SV *tmp = getForeignPerlReference(name); if(tmp == NULL || SvTYPE(tmp) != SVt_PVHV) { PROBLEM "non-array reference passed to RS_getHV" ERROR; } table = (HV*) tmp; } else { table = get_hv(CHAR_DEREF(STRING_ELT(name,0)), FALSE); } if(table != NULL) { if(TYPEOF(convert) == LGLSXP || TYPEOF(convert) == INTSXP) { unsigned int depth; depth = (TYPEOF(convert) == LGLSXP ? LOGICAL(convert)[0] : INTEGER(convert)[0]); if(depth) { ans = fromPerlHV(table, depth); } else { /* ans = fromPerl((SV*) table); */ ans = makeForeignPerlReference((SV*) table, makeRSPerlClassVector("PerlHashReference"), &exportReferenceTable); } } else { ans = directConvertFromPerl((SV*) table, convert); } } return(ans); }
SEXP R_match_dl(SEXP x, SEXP table, SEXP nomatch, SEXP matchNA, SEXP weight, SEXP maxDistance){ PROTECT(x); PROTECT(table); PROTECT(nomatch); PROTECT(matchNA); PROTECT(weight); PROTECT(maxDistance); int nx = length(x) , ntable = length(table) , no_match = INTEGER(nomatch)[0] , match_na = INTEGER(matchNA)[0] , bytes = IS_CHARACTER(x) , ml_x = max_length(x) , ml_t = max_length(table); double *w = REAL(weight); double maxDist = REAL(maxDistance)[0]; /* claim space for workhorse */ dictionary *dict = new_dictionary( ml_x + ml_t + 1 ); double *scores = (double *) malloc( (ml_x + 3) * (ml_t + 2) * sizeof(double) ); unsigned int *X = NULL, *T = NULL; X = (unsigned int *) malloc( (ml_x + ml_t + 2) * sizeof(int) ); if ( (scores == NULL) || (X == NULL) ){ UNPROTECT(6); free(X); free(scores); error("Unable to allocate enough memory"); } T = X + ml_x + 1; memset(X, 0, (ml_x + ml_t + 2)*sizeof(int)); // output vector SEXP yy; PROTECT(yy = allocVector(INTSXP, nx)); int *y = INTEGER(yy); double d = R_PosInf, d1 = R_PosInf; int index, len_X, len_T, isna_X, isna_T; unsigned int *X1, *T1; for ( int i=0; i<nx; i++){ index = no_match; if ( bytes ){ X = get_elem(x, i , bytes, &len_X, &isna_X, X); } else { X1 = get_elem(x, i , bytes, &len_X, &isna_X, X); memcpy(X, X1, len_X*sizeof(int)); } d1 = R_PosInf; for ( int j=0; j<ntable; j++){ if ( bytes ){ T = get_elem(table, j, bytes, &len_T, &isna_T, T); } else { T1 = get_elem(table, j, bytes, &len_T, &isna_T, T); memcpy(T, T1, len_T * sizeof(int)); } if ( !isna_X && !isna_T ){ // both are char (usual case) d = distance( X, T, len_X, len_T, w, dict, scores ); memset(T,0, (ml_t+1)*sizeof(int)); if ( d <= maxDist && d < d1){ index = j + 1; if ( abs(d) < 1e-14 ) break; d1 = d; } } else if ( isna_X && isna_T ) { // both are NA index = match_na ? j + 1 : no_match; break; } } y[i] = index; memset(X,0,(ml_x + 1)*sizeof(int)); } UNPROTECT(7); free(X); free_dictionary(dict); free(scores); return(yy); }
SEXP R_dl(SEXP a, SEXP b, SEXP weight){ PROTECT(a); PROTECT(b); PROTECT(weight); int na = length(a) , nb = length(b) , nt = (na > nb) ? na : nb , bytes = IS_CHARACTER(a) , ml_a = max_length(a) , ml_b = max_length(b); double *w = REAL(weight); /* claim space for workhorse */ unsigned int *s=NULL, *t=NULL; dictionary *dict = new_dictionary( ml_a + ml_b + 1 ); double *scores = (double *) malloc( (ml_a + 3) * (ml_b + 2) * sizeof(double) ); int slen = (ml_a + ml_b + 2) * sizeof(int); s = (unsigned int *) malloc(slen); if ( (scores == NULL) | ( s == NULL ) ){ UNPROTECT(3); free(scores); free(s); error("Unable to allocate enough memory"); } t = s + ml_a + 1; memset(s, 0, slen); // output SEXP yy; PROTECT(yy = allocVector(REALSXP, nt)); double *y = REAL(yy); int i=0, j=0, len_s, len_t, isna_s, isna_t; unsigned int *s1, *t1; for ( int k=0; k < nt; ++k ){ if (bytes){ s = get_elem(a, i, bytes, &len_s, &isna_s, s); t = get_elem(b, j, bytes, &len_t, &isna_t, t); } else { // make sure there's an extra 0 at the end of the string. s1 = get_elem(a, i, bytes, &len_s, &isna_s, s); t1 = get_elem(b, j, bytes, &len_t, &isna_t, t); memcpy(s,s1,len_s*sizeof(int)); memcpy(t,t1,len_t*sizeof(int)); } if ( isna_s || isna_t ){ y[k] = NA_REAL; continue; } y[k] = distance( s, t, len_s, len_t, w, dict, scores ); if (y[k] < 0 ) y[k] = R_PosInf; i = RECYCLE(i+1,na); j = RECYCLE(j+1,nb); memset(s, 0, slen); } free_dictionary(dict); free(scores); free(s); UNPROTECT(4); return yy; }
SEXP R_match_qgram_tree(SEXP x, SEXP table, SEXP nomatch, SEXP matchNA, SEXP qq, SEXP maxDist, SEXP distance){ PROTECT(x); PROTECT(table); PROTECT(nomatch); PROTECT(matchNA); PROTECT(qq); PROTECT(maxDist); PROTECT(distance); double max_dist = REAL(maxDist)[0] == 0.0 ? R_PosInf : REAL(maxDist)[0]; int dist = INTEGER(distance)[0] // choose distance function , q = INTEGER(qq)[0] , nx = length(x) , ntable = length(table) , no_match = INTEGER(nomatch)[0] , match_na = INTEGER(matchNA)[0] , bytes = IS_CHARACTER(x) , ml_x = max_length(x) , ml_t = max_length(table); // set up a qtree; qtree *Q = new_qtree(q, 2); unsigned int *X = NULL, *T = NULL; if (bytes){ X = (unsigned int *) malloc( (ml_x + ml_t) * sizeof(int)); if ( X == NULL){ UNPROTECT(7); error("Unable to allocate enough memory"); } T = X + ml_x; } // output vector SEXP yy; PROTECT(yy = allocVector(INTSXP, nx)); int *y = INTEGER(yy); double d = R_PosInf, d1 = R_PosInf; int index, isna_X, isna_T, len_X, len_T; for ( int i=0; i<nx; i++){ index = no_match; X = get_elem(x, i, bytes, &len_X, &isna_X, X); d1 = R_PosInf; for ( int j=0; j<ntable; j++){ T = get_elem(table, j, bytes, &len_T, &isna_T,T); if ( !isna_X && !isna_T ){ // both are char (usual case) d = qgram_tree( X, T, len_X, len_T, q, Q, dist ); if ( d == -2.0 ){ UNPROTECT(7); error("Unable to allocate enough memory for qgram storage"); } if ( d > max_dist ){ continue; } else if ( d > -1 && d < d1){ index = j + 1; if ( abs(d) < 1e-14 ) break; d1 = d; } } else if ( isna_X && isna_T ) { // both are NA index = match_na ? j + 1 : no_match; break; } } y[i] = index; } if ( bytes ) free(X); free_qtree(); UNPROTECT(8); return(yy); }
SV * toPerl(USER_OBJECT_ val, Rboolean perlOwned) { int n = GET_LENGTH(val); dTHX; SV *sv = &sv_undef; if(val == NULL_USER_OBJECT) return(sv); if(isRSReferenceObject(val)){ return(getForeignPerlReference(val)); } if(GET_LENGTH(GET_CLASS(val))) { SV *o = userLevelConversionToPerl(val); if(!o) return(o); } if(n == 1) { if(IS_CHARACTER(val)) sv = newSVpv(CHAR_DEREF(STRING_ELT(val, 0)), 0); else if(IS_LOGICAL(val)) sv = newSViv(LOGICAL_DATA(val)[0]); else if(IS_INTEGER(val)) sv = newSViv(INTEGER_DATA(val)[0]); else if(IS_NUMERIC(val)) sv = newSVnv(NUMERIC_DATA(val)[0]); else if(IS_FUNCTION(val)) sv = RPerl_createRProxy(val); } else { AV *arr; int i; arr = newAV(); SvREFCNT_inc(arr); if(n > 0) av_extend(arr, n); /* Did try using av_make() and storing the SVs in an array first, but didn't fix the problem of bizarre array. */ for(i = 0; i < n ; i++) { if(IS_CHARACTER(val)) sv = newSVpv(CHAR_DEREF(STRING_ELT(val, i)), 0); else if(IS_LOGICAL(val)) sv = newSViv(LOGICAL_DATA(val)[i]); else if(IS_INTEGER(val)) sv = newSViv(INTEGER_DATA(val)[i]); else if(IS_NUMERIC(val)) sv = newSVnv(NUMERIC_DATA(val)[i]); SvREFCNT_inc(sv); av_push(arr, sv); } sv = (SV *) arr; SvREFCNT_dec(arr); #if 0 {SV *rv = newSVrv(arr, NULL); sv = rv; } #endif } if(perlOwned) #if 0 /*XXX Just experimenting */ sv = sv_2mortal(sv); #else sv = SvREFCNT_inc(sv); #endif return(sv); }
int charp(int x){ if(IS_CHARACTER(x)) return(1); else return(0); }
int RS_XML_readConnectionInput(void *context, char *buffer, int len) { SEXP e, tmp, arg; int n; int errorOccurred; const char *str; int left = len-1, count; #ifdef R_XML_DEBUG char *orig = buffer; #endif SEXP fun; xmlParserCtxtPtr ctx; #ifndef LIBXML2_NEW_BUFFER ctx = (xmlParserCtxtPtr) context; fun = ctx->_private; #else RFunCtxData *user = (RFunCtxData *) context; ctx = user->ctx; fun = user->fun; #endif if(len == -1) return(0); /* Setup the expression to call the user-supplied R function or call readLines(con, 1) if they gave us a connection. */ if(isFunction(fun)) { /* Invoke the user-provided function to get the next line. */ PROTECT(e = allocVector(LANGSXP, 2)); SETCAR(e, fun); PROTECT(arg = NEW_INTEGER(1)); INTEGER_DATA(arg)[0] = len; SETCAR(CDR(e), arg); UNPROTECT(1); } else e = fun; n = count = 0; while(n == 0 && left > 0) { str = NULL; /* Update the argument to the user-defined function to say how much is left. */ if(isFunction(fun)) INTEGER_DATA(arg)[0] = left; tmp = R_tryEval(e, R_GlobalEnv, &errorOccurred); if(errorOccurred || !IS_CHARACTER(tmp)) { UNPROTECT(1); if ((ctx->sax != NULL) && (ctx->sax->error != NULL)) /* throw an XML error. */ ctx->sax->error(ctx->userData, "Failed to call read on XML connection"); return(-1); } if(GET_LENGTH(tmp)) { str = CHAR_DEREF(STRING_ELT(tmp, 0)); n = strlen(str); if(n != 0) { /* Just add a new line and do it again. */ if(n > left) { PROBLEM "string read from XML connection too long for buffer: truncating %s to %d characters", str, left WARN; } strncpy(buffer, str, left); left -= n; count += n ; } } else { /* Notice that we may have actually added something to the buffer, specifically a sequence of empty lines \n, and these will be discarded and not passed to the XML parser but these are extraneous anyway. Are they? */ n = count = 0; break; } } #ifdef R_XML_DEBUG fprintf(stderr, "size (len = %d, n=%d, count=%d)\nbuffer= '%s'\nRstring='%s'\n", len, n, count, buffer, str);fflush(stderr); /* fprintf(stderr, "size (n=%d, count=%d) %s '%s'\n", n, count, str, orig);fflush(stderr); */ #endif UNPROTECT(1); return(count); /* return(count == 0 ? -1 : count); */ }
SEXP R_soundex(SEXP x) { int n = length(x); int bytes = IS_CHARACTER(x); // when a and b are character vectors; create unsigned int vectors in which // the elements of and b will be copied unsigned int *s = NULL; if (bytes) { int ml = max_length(x); s = (unsigned int *) malloc(ml*sizeof(unsigned int)); if (s == NULL) { free(s); error("Unable to allocate enough memory"); } } if (bytes) { // create output variable SEXP y = allocVector(STRSXP, n); PROTECT(y); // compute soundexes, skipping NA's unsigned int nfail = 0; int len_s, isna_s; char sndx[5]; unsigned int sndx_int[4]; for (int i = 0; i < n; ++i) { s = get_elem(x, i, bytes, &len_s, &isna_s, s); if (isna_s) { SET_STRING_ELT(y, i, R_NaString); } else { nfail += soundex(s, len_s, sndx_int); for (unsigned int j = 0; j < 4; ++j) sndx[j] = sndx_int[j]; sndx[4] = 0; SET_STRING_ELT(y, i, mkChar(sndx)); } } // cleanup and return check_fail(nfail); free(s); UNPROTECT(1); return y; } else { // create output variable SEXP y = allocVector(VECSXP, n); PROTECT(y); // compute soundexes, skipping NA's unsigned int nfail = 0; int len_s, isna_s; for (int i = 0; i < n; ++i) { s = get_elem(x, i, bytes, &len_s, &isna_s, s); if (isna_s) { SEXP sndx = allocVector(INTSXP, 1); PROTECT(sndx); INTEGER(sndx)[0] = NA_INTEGER; SET_VECTOR_ELT(y, i, sndx); UNPROTECT(1); } else { SEXP sndx = allocVector(INTSXP, 4); PROTECT(sndx); nfail += soundex(s, len_s, (unsigned int *)INTEGER(sndx)); SET_VECTOR_ELT(y, i, sndx); UNPROTECT(1); } } // cleanup and return check_fail(nfail); UNPROTECT(1); return y; } }
SEXP rph_phyloFit(SEXP msaP, SEXP treeStrP, SEXP substModP, SEXP scaleOnlyP, SEXP scaleSubtreeP, SEXP nratesP, SEXP alphaP, SEXP rateConstantsP, SEXP initModP, SEXP initBackgdFromDataP, SEXP initRandomP, SEXP initParsimonyP, SEXP clockP, SEXP emP, SEXP maxEmItsP, SEXP precisionP, SEXP gffP, SEXP ninfSitesP, SEXP quietP, SEXP noOptP, SEXP boundP, SEXP logFileP, SEXP selectionP) { struct phyloFit_struct *pf; int numProtect=0, i; double *doubleP; char *die_message=NULL; SEXP rv=R_NilValue; List *new_rate_consts = NULL; List *new_rate_weights = NULL; GetRNGstate(); //seed R's random number generator pf = phyloFit_struct_new(1); //sets appropriate defaults for RPHAST mode pf->msa = (MSA*)EXTPTR_PTR(msaP); if (treeStrP != R_NilValue) pf->tree = rph_tree_new(treeStrP); pf->use_em = LOGICAL_VALUE(emP); if (rateConstantsP != R_NilValue) { PROTECT(rateConstantsP = AS_NUMERIC(rateConstantsP)); numProtect++; doubleP = NUMERIC_POINTER(rateConstantsP); new_rate_consts = lst_new_dbl(LENGTH(rateConstantsP)); for (i=0; i < LENGTH(rateConstantsP); i++) lst_push_dbl(new_rate_consts, doubleP[i]); // pf->use_em = 1; } if (initModP != R_NilValue) { pf->input_mod = (TreeModel*)EXTPTR_PTR(initModP); pf->subst_mod = pf->input_mod->subst_mod; tm_register_protect(pf->input_mod); if (new_rate_consts == NULL && pf->input_mod->rK != NULL && pf->input_mod->nratecats > 1) { new_rate_consts = lst_new_dbl(pf->input_mod->nratecats); for (i=0; i < pf->input_mod->nratecats; i++) lst_push_dbl(new_rate_consts, pf->input_mod->rK[i]); // pf-> = 1; } if (pf->input_mod->empirical_rates && pf->input_mod->freqK != NULL && pf->input_mod->nratecats > 1) { new_rate_weights = lst_new_dbl(pf->input_mod->nratecats); for (i=0; i < pf->input_mod->nratecats; i++) lst_push_dbl(new_rate_weights, pf->input_mod->freqK[i]); } tm_reinit(pf->input_mod, rph_get_subst_mod(substModP), nratesP == R_NilValue ? pf->input_mod->nratecats : INTEGER_VALUE(nratesP), NUMERIC_VALUE(alphaP), new_rate_consts, new_rate_weights); } else { if (nratesP != R_NilValue) pf->nratecats = INTEGER_VALUE(nratesP); if (alphaP != R_NilValue) pf->alpha = NUMERIC_VALUE(alphaP); if (rateConstantsP != R_NilValue) { pf->rate_consts = new_rate_consts; if (nratesP == R_NilValue) pf->nratecats = lst_size(new_rate_consts); else if (lst_size(new_rate_consts) != pf->nratecats) die("length of new_rate_consts does not match nratecats\n"); } } pf->subst_mod = rph_get_subst_mod(substModP); pf->estimate_scale_only = LOGICAL_VALUE(scaleOnlyP); if (scaleSubtreeP != R_NilValue) { pf->subtree_name = smalloc((1+strlen(CHARACTER_VALUE(scaleSubtreeP)))*sizeof(char)); strcpy(pf->subtree_name, CHARACTER_VALUE(scaleSubtreeP)); } pf->random_init = LOGICAL_VALUE(initRandomP); pf->init_backgd_from_data = LOGICAL_VALUE(initBackgdFromDataP); pf->init_parsimony = LOGICAL_VALUE(initParsimonyP); pf->assume_clock = LOGICAL_VALUE(clockP); if (maxEmItsP != R_NilValue) pf->max_em_its = INTEGER_VALUE(maxEmItsP); pf->precision = get_precision(CHARACTER_VALUE(precisionP)); if (pf->precision == OPT_UNKNOWN_PREC) { die_message = "invalid precision"; goto rph_phyloFit_end; } if (gffP != R_NilValue) { pf->gff = (GFF_Set*)EXTPTR_PTR(gffP); gff_register_protect(pf->gff); } if (ninfSitesP != R_NilValue) pf->nsites_threshold = INTEGER_VALUE(ninfSitesP); pf->quiet = LOGICAL_VALUE(quietP); if (noOptP != R_NilValue) { int len=LENGTH(noOptP), pos=0; char *temp; for (i=0; i < LENGTH(noOptP); i++) len += strlen(CHARACTER_VALUE(STRING_ELT(noOptP, i))); temp = smalloc(len*sizeof(char)); for (i=0; i < LENGTH(noOptP); i++) { if (i != 0) temp[pos++] = ','; sprintf(&temp[pos], "%s", CHARACTER_VALUE(STRING_ELT(noOptP, i))); pos += strlen(CHARACTER_VALUE(STRING_ELT(noOptP, i))); } if (pos != len-1) die("ERROR parsing noOpt len=%i pos=%i\n", len, pos); temp[pos] = '\0'; pf->nooptstr = str_new_charstr(temp); } if (boundP != R_NilValue) { pf->bound_arg = lst_new_ptr(LENGTH(boundP)); for (i=0; i < LENGTH(boundP); i++) { String *temp = str_new_charstr(CHARACTER_VALUE(STRING_ELT(boundP, i))); lst_push_ptr(pf->bound_arg, temp); } } if (logFileP != R_NilValue) { if (IS_CHARACTER(logFileP)) pf->logf = phast_fopen(CHARACTER_VALUE(logFileP), "w+"); else if (IS_LOGICAL(logFileP) && LOGICAL_VALUE(logFileP)) { pf->logf = stdout; } } if (selectionP != R_NilValue) { pf->use_selection = TRUE; pf->selection = NUMERIC_VALUE(selectionP); } msa_register_protect(pf->msa); run_phyloFit(pf); rv = PROTECT(rph_listOfLists_to_SEXP(pf->results)); numProtect++; rph_phyloFit_end: if (pf->logf != NULL && pf->logf != stdout && pf->logf != stderr) phast_fclose(pf->logf); PutRNGstate(); if (die_message != NULL) die(die_message); if (numProtect > 0) UNPROTECT(numProtect); return rv; }
/* R interface to qgram distance */ SEXP R_qgram_tree(SEXP a, SEXP b, SEXP qq, SEXP distance){ PROTECT(a); PROTECT(b); PROTECT(qq); PROTECT(distance); // choose distance function int dist = INTEGER(distance)[0] , q = INTEGER(qq)[0] , na = length(a) , nb = length(b) , ml_a = max_length(a) , ml_b = max_length(b) , bytes = IS_CHARACTER(a); // set up a qtree; qtree *Q = new_qtree(q, 2L); unsigned int *s = NULL, *t = NULL; if ( bytes ){ s = (unsigned int *) malloc( (ml_a + ml_b) * sizeof(int) ); if ( s == NULL ){ UNPROTECT(4); error("Unable to allocate enough memory"); } t = s + ml_a; } // output int nt = (na > nb) ? na : nb; SEXP yy; PROTECT(yy = allocVector(REALSXP, nt)); double *y = REAL(yy); int i=0, j=0, len_s, len_t, isna_s, isna_t; for ( int k=0; k < nt; ++k , i = RECYCLE(i+1,na) , j = RECYCLE(j+1,nb) ){ s = get_elem(a, i, bytes, &len_s, &isna_s, s); t = get_elem(b, j, bytes, &len_t, &isna_t, t); if ( isna_s || isna_t ){ y[k] = NA_REAL; continue; } y[k] = qgram_tree(s, t, len_s, len_t, q, Q, dist); if (y[k] == -2.0){ UNPROTECT(5); error("Unable to allocate enough memory"); } if (y[k] == -1.0){ y[k] = R_PosInf; } } free_qtree(); if ( bytes ) free(s); UNPROTECT(5); return yy; }