static void tabledone( table_t *t ) { if (!t) { return; } wgmempool_Done(t->pool); wg_free(t->table); wg_free(t->heap); wg_free(t); }
extern void textcat_Done( void *handle ) { textcat_t *h = (textcat_t *)handle; uint4 i; for (i=0; i<h->size; i++) { fp_Done( h->fprint[i] ); } wg_free( h->fprint ); wg_free( h ); }
extern void fp_Done( void *handle ) { fp_t *h = (fp_t *)handle; if ( h->name ) { wg_free( (void *)h->name ); } if ( h->fprint ) { wg_free( h->fprint ); } wg_free( h ); }
void mnode_free(modulenode_t *node) { wg_free(node->out); if (node->parent != NULL) free(node->parent); if (node->child != NULL) free(node->child); free(node); }
/** * Create a fingerprint: * - record the frequency of each unique n-gram in a hash table * - take the most frequent n-grams * - sort them alphabetically, recording their relative rank */ extern int fp_Create( void *handle, const char *buffer, uint4 bufsize, uint4 maxngrams ) { sint4 i = 0; fp_t *h = NULL; table_t *t = NULL; char *tmp = NULL; if ( bufsize < MINDOCSIZE ) { return 0; } /*** Throw out all invalid chars ***/ tmp = prepbuffer( buffer, bufsize ); if ( tmp == NULL ) { return 0; } h = (fp_t*)handle; t = inittable(maxngrams); /*** Create a hash table containing n-gram counts ***/ createngramtable(t, tmp); /*** Take the top N n-grams and add them to the profile ***/ table2heap(t); maxngrams = WGMIN( maxngrams, t->size ); h->fprint = (ngram_t *)wg_malloc( sizeof(ngram_t) * maxngrams ); h->size = maxngrams; /*** Pull n-grams out of heap (backwards) ***/ for (i=maxngrams-1; i>=0; i--) { entry_t tmp2; heapextract(t, &tmp2); /*** the string and its rank is all we need ***/ strcpy( h->fprint[i].str, tmp2.str ); h->fprint[i].rank = i; } tabledone(t); wg_free(tmp); /*** Sort n-grams alphabetically, for easy comparison ***/ qsort( h->fprint, h->size, sizeof(ngram_t), ngramcmp_str ); return 1; }
extern void fp_Print( void *handle, FILE *fp ) { uint4 i; fp_t *h = (fp_t *)handle; ngram_t *tmp = wg_malloc( sizeof(ngram_t) * h->size ); /*** Make a temporary and sort it on rank ***/ memcpy( tmp, h->fprint, h->size * sizeof(ngram_t) ); qsort( tmp, h->size, sizeof(ngram_t), ngramcmp_rank ); for (i=0; i<h->size; i++) { fprintf( fp, "%s\n", tmp[i].str ); } wg_free( tmp ); }
int main() { void *h; char *buf; buf = myread(stdin); h = fp_Init(NULL); if ( fp_Create( h, buf, strlen(buf), 400 ) == 0 ) { fprintf(stderr, "There was an error creating the fingerprint\n"); exit(-1); } fp_Print(h,stdout); fp_Done(h); wg_free(buf); return 0; }
/** * Function that prepares buffer for n-grammification: * runs of invalid characters are collapsed to a single * underscore. * * Function is implemented as a finite state machine. */ static char *prepbuffer( const char *src, size_t bufsize ) { const char *p = src; char *dest = (char *)wg_malloc( bufsize + 3 ); char *w = dest; char *wlimit = dest + bufsize + 1; if ( INVALID(*p) ) { goto SPACE; } else if ( *p == '\0' ) { goto END; } *w++ = '_'; if ( w == wlimit ) { goto STOP; } goto WORD; SPACE: /*** Inside string of invalid characters ***/ p++; if ( INVALID(*p) ) { goto SPACE; } else if ( *p == '\0' ) { goto END; } *w++ = '_'; if ( w == wlimit ) { goto STOP; } goto WORD; WORD: /*** Inside string of valid characters ***/ *w++ = *p++; if ( w == wlimit ) { goto END; } else if ( INVALID(*p) ) { goto SPACE; } else if ( *p == '\0' ) { goto STOP; } goto WORD; END: *w++ = '_'; STOP: *w++ = '\0'; /*** Docs that are too small for a fingerprint, are refused ***/ if ( w - dest < MINDOCSIZE ) { wg_free(dest); return NULL; } return dest; }