SEXP R_hm(SEXP a, SEXP b, SEXP maxDistance){ PROTECT(a); PROTECT(b); PROTECT(maxDistance); int na = length(a); int nb = length(b); int nt = ( na > nb) ? na : nb; SEXP yy; PROTECT(yy = allocVector(REALSXP,nt)); double *y = REAL(yy); int i=0, j=0, k=0, nchar; int maxDist = INTEGER(maxDistance)[0]; for ( k=0; k<nt; ++k){ if ( INTEGER(VECTOR_ELT(a,i))[0] == NA_INTEGER || INTEGER(VECTOR_ELT(b,j))[0] == NA_INTEGER ){ y[k] = NA_REAL; continue; } nchar = length(VECTOR_ELT(a,i)); if ( nchar != length(VECTOR_ELT(b,j)) ){ y[k] = R_PosInf; continue; } y[k] = (double) hamming( (unsigned int *) INTEGER(VECTOR_ELT(a,i)), (unsigned int *) INTEGER(VECTOR_ELT(b,j)), nchar, maxDist ); if (y[k] < 0) y[k] = R_PosInf; i = RECYCLE(i+1,na); j = RECYCLE(j+1,nb); } UNPROTECT(4); return yy; }
SEXP R_dl(SEXP a, SEXP b, SEXP weight){ PROTECT(a); PROTECT(b); PROTECT(weight); int na = length(a) , nb = length(b) , nt = (na > nb) ? na : nb , bytes = IS_CHARACTER(a) , ml_a = max_length(a) , ml_b = max_length(b); double *w = REAL(weight); /* claim space for workhorse */ unsigned int *s=NULL, *t=NULL; dictionary *dict = new_dictionary( ml_a + ml_b + 1 ); double *scores = (double *) malloc( (ml_a + 3) * (ml_b + 2) * sizeof(double) ); int slen = (ml_a + ml_b + 2) * sizeof(int); s = (unsigned int *) malloc(slen); if ( (scores == NULL) | ( s == NULL ) ){ UNPROTECT(3); free(scores); free(s); error("Unable to allocate enough memory"); } t = s + ml_a + 1; memset(s, 0, slen); // output SEXP yy; PROTECT(yy = allocVector(REALSXP, nt)); double *y = REAL(yy); int i=0, j=0, len_s, len_t, isna_s, isna_t; unsigned int *s1, *t1; for ( int k=0; k < nt; ++k ){ if (bytes){ s = get_elem(a, i, bytes, &len_s, &isna_s, s); t = get_elem(b, j, bytes, &len_t, &isna_t, t); } else { // make sure there's an extra 0 at the end of the string. s1 = get_elem(a, i, bytes, &len_s, &isna_s, s); t1 = get_elem(b, j, bytes, &len_t, &isna_t, t); memcpy(s,s1,len_s*sizeof(int)); memcpy(t,t1,len_t*sizeof(int)); } if ( isna_s || isna_t ){ y[k] = NA_REAL; continue; } y[k] = distance( s, t, len_s, len_t, w, dict, scores ); if (y[k] < 0 ) y[k] = R_PosInf; i = RECYCLE(i+1,na); j = RECYCLE(j+1,nb); memset(s, 0, slen); } free_dictionary(dict); free(scores); free(s); UNPROTECT(4); return yy; }
/* R interface to qgram distance */ SEXP R_qgram_tree(SEXP a, SEXP b, SEXP qq, SEXP distance){ PROTECT(a); PROTECT(b); PROTECT(qq); PROTECT(distance); // choose distance function int dist = INTEGER(distance)[0] , q = INTEGER(qq)[0] , na = length(a) , nb = length(b) , ml_a = max_length(a) , ml_b = max_length(b) , bytes = IS_CHARACTER(a); // set up a qtree; qtree *Q = new_qtree(q, 2L); unsigned int *s = NULL, *t = NULL; if ( bytes ){ s = (unsigned int *) malloc( (ml_a + ml_b) * sizeof(int) ); if ( s == NULL ){ UNPROTECT(4); error("Unable to allocate enough memory"); } t = s + ml_a; } // output int nt = (na > nb) ? na : nb; SEXP yy; PROTECT(yy = allocVector(REALSXP, nt)); double *y = REAL(yy); int i=0, j=0, len_s, len_t, isna_s, isna_t; for ( int k=0; k < nt; ++k , i = RECYCLE(i+1,na) , j = RECYCLE(j+1,nb) ){ s = get_elem(a, i, bytes, &len_s, &isna_s, s); t = get_elem(b, j, bytes, &len_t, &isna_t, t); if ( isna_s || isna_t ){ y[k] = NA_REAL; continue; } y[k] = qgram_tree(s, t, len_s, len_t, q, Q, dist); if (y[k] == -2.0){ UNPROTECT(5); error("Unable to allocate enough memory"); } if (y[k] == -1.0){ y[k] = R_PosInf; } } free_qtree(); if ( bytes ) free(s); UNPROTECT(5); return yy; }