void free_qtree(qNode *qtree_node) {
    if(qtree_node) {
        if(!qtree_node->leaf){
            free_qtree(qtree_node->child_NW);
            free_qtree(qtree_node->child_NE);
            free_qtree(qtree_node->child_SE);
            free_qtree(qtree_node->child_SW);
        }
        free(qtree_node);
    }
}
Beispiel #2
0
/* push qgrams of a string into binary tree */
static qtree *push_string(unsigned int *str, int strlen, unsigned int q, qtree *Q, int iLoc, int nLoc){
  qtree *P;
  for ( int i=0; i < strlen - q + 1; ++i ){
    P = push(Q, str + i, q, iLoc, nLoc);
    if ( P == NULL ){ 
      free_qtree();
      return NULL;
    }
    Q = P;
  }
  return Q;
}
Beispiel #3
0
void close_stringdist(Stringdist *S){
  free(S->work);
  free(S->weight);

  if (S->distance == dl){
    free_dictionary(S->dict);
  }
  if (S->distance == qgram || S->distance == cosine || S->distance == jaccard){
    free_qtree(S->tree);
  }
  free(S);
}
Beispiel #4
0
SEXP R_get_qgrams(SEXP a, SEXP qq){
  PROTECT(a);
  PROTECT(qq);

  int q = INTEGER(qq)[0];

  if ( q < 0 ){
    UNPROTECT(2);
    error("q must be a nonnegative integer");
  }


  SEXP strlist;
  int nstr, nchar, nLoc = length(a);
  unsigned int *str;
  
  // set up a tree; push all the qgrams.
  qtree *Q = new_qtree( q, nLoc);
  
  for ( int iLoc = 0; iLoc < nLoc; ++iLoc ){
    strlist = VECTOR_ELT(a, iLoc);
    nstr    = length(strlist);
 
    for ( int i=0; i < nstr; ++i ){
      str   = (unsigned int *) INTEGER(VECTOR_ELT(strlist,i));
      nchar = length(VECTOR_ELT(strlist,i));
      if ( str[0] == NA_INTEGER 
          || q > nchar
          || ( q == 0 && nchar > 0 )
        ){
        continue ;
      }
      Q = push_string(str, nchar, q, Q, iLoc, nLoc);
      if ( Q == NULL ){
        UNPROTECT(2);
        error("could not allocate enough memory");
      }
    }
  }
  // pick and delete the tree

  int nqgram[1] = {0};

  // helper variable for get_counts 
  int index[1] = {0};

  count_qtree(Q,nqgram);  

  SEXP qgrams, qcount;
  PROTECT(qgrams = allocVector(INTSXP, q*nqgram[0]));
  PROTECT(qcount = allocVector(REALSXP, nLoc*nqgram[0]));

  get_counts(Q, q, INTEGER(qgrams), nLoc, index, REAL(qcount));
  
  setAttrib(qcount, install("qgrams"), qgrams);
  
  free_qtree();
  UNPROTECT(4);

  return(qcount);
}
Beispiel #5
0
SEXP R_match_qgram_tree(SEXP x, SEXP table, SEXP nomatch, SEXP matchNA, SEXP qq, SEXP maxDist, SEXP distance){
  PROTECT(x);
  PROTECT(table);
  PROTECT(nomatch);
  PROTECT(matchNA);
  PROTECT(qq);
  PROTECT(maxDist);
  PROTECT(distance);

  double max_dist = REAL(maxDist)[0] == 0.0 ? R_PosInf : REAL(maxDist)[0];
  
  
  int dist = INTEGER(distance)[0] // choose distance function
    , q = INTEGER(qq)[0]
    , nx = length(x)
    , ntable = length(table)
    , no_match = INTEGER(nomatch)[0]
    , match_na = INTEGER(matchNA)[0]
    , bytes = IS_CHARACTER(x)
    , ml_x = max_length(x)
    , ml_t = max_length(table);
  
  // set up a qtree;
  qtree *Q = new_qtree(q, 2);

  unsigned int *X = NULL, *T = NULL;
  if (bytes){
    X = (unsigned int *) malloc( (ml_x + ml_t) * sizeof(int));
    if ( X == NULL){
      UNPROTECT(7);
      error("Unable to allocate enough memory");
    }
    T = X + ml_x;
  }

  // output vector
  SEXP yy;
  PROTECT(yy = allocVector(INTSXP, nx));
  int *y = INTEGER(yy);


  double d = R_PosInf, d1 = R_PosInf;
  int index, isna_X, isna_T, len_X, len_T;

  for ( int i=0; i<nx; i++){
    index = no_match;
    X = get_elem(x, i, bytes, &len_X, &isna_X, X);
    d1 = R_PosInf;
    for ( int j=0; j<ntable; j++){

      T = get_elem(table, j, bytes, &len_T, &isna_T,T);

      if ( !isna_X && !isna_T ){        // both are char (usual case)
        d = qgram_tree(
          X, T, len_X, len_T, q, Q, dist
        );
        if ( d == -2.0 ){
          UNPROTECT(7);
          error("Unable to allocate enough memory for qgram storage");
        }
        if ( d > max_dist ){
          continue;
        } else if ( d > -1 && d < d1){ 
          index = j + 1;
          if ( abs(d) < 1e-14 ) break; 
          d1 = d;
        }
      } else if ( isna_X && isna_T ) {  // both are NA
        index = match_na ? j + 1 : no_match;
        break;
      }
    }
    
    y[i] = index;
  } 
  if ( bytes ) free(X);
  free_qtree();
  UNPROTECT(8);
  return(yy);
}
Beispiel #6
0
/* R interface to qgram distance */
SEXP R_qgram_tree(SEXP a, SEXP b, SEXP qq, SEXP distance){
  PROTECT(a);
  PROTECT(b);
  PROTECT(qq);
  PROTECT(distance);
  // choose distance function

  int dist = INTEGER(distance)[0]
    , q = INTEGER(qq)[0]
    , na = length(a)
    , nb = length(b)
    , ml_a = max_length(a)
    , ml_b = max_length(b)
    , bytes = IS_CHARACTER(a);

  // set up a qtree; 
  qtree *Q = new_qtree(q, 2L);
  unsigned int *s = NULL, *t = NULL;
  if ( bytes ){
    s = (unsigned int *) malloc( (ml_a + ml_b) * sizeof(int) );
    if ( s == NULL ){ 
      UNPROTECT(4);
      error("Unable to allocate enough memory");
    }
    t = s + ml_a;
  }

  // output
  int nt = (na > nb) ? na : nb;
  SEXP yy; 
  PROTECT(yy = allocVector(REALSXP, nt));
  double *y = REAL(yy);

  
 
  int i=0, j=0, len_s, len_t, isna_s, isna_t;
  for ( int k=0; k < nt; ++k 
      , i = RECYCLE(i+1,na)
      , j = RECYCLE(j+1,nb) ){

    s = get_elem(a, i, bytes, &len_s, &isna_s, s);
    t = get_elem(b, j, bytes, &len_t, &isna_t, t);

    if ( isna_s || isna_t ){
      y[k] = NA_REAL;
      continue;
    }
    y[k] = qgram_tree(s, t, len_s, len_t, q, Q, dist);
    if (y[k] == -2.0){
      UNPROTECT(5);
      error("Unable to allocate enough memory");
    }
    if (y[k] == -1.0){
      y[k] = R_PosInf;
    }
  }


  free_qtree();
  if ( bytes ) free(s);
  UNPROTECT(5);
  return yy;
}