void close_document_loader() { textcat_Done(TEXT_CAT_HANDLE); xmlCleanupParser(); free_tokenizer(); close_stemmer(); close_word_normalizer(); }
int main(int argc, char **argv) { char *conffile = "usr/share/libexttextcat/fpdb.conf"; //char *conffile = "fpdb.conf"; void *h = textcat_Init(conffile); /* s1c1(); s1c2(); s1c3(); s1c4(); s1c5(); */ hamming_test(); char *filename6 = "/home/uniscon169/matasano/src/challenge6keys.txt"; struct file_o *filebuffer6 = read_bytes(filename6); file_o_init(filebuffer6); char *b64string = xmalloc(1); char *wholestring = get_string(&filebuffer6); long length; char *STR_XOR = xmalloc(1); int keys_total = 3; struct histogram *hist = hist_o_init(keys_total); length = active_b64_decode_string(&b64string, wholestring, strlen(wholestring)); get_keylength(&hist, 20, b64string, length, keys_total); transpose(&hist, &hist->tdata, &hist->data, length, hist->ham->keylength[0]); //hist->scores->testkey = xrealloc(hist->scores->testkey, (hist->tdata->elements + 1) * sizeof(char)); int x; char ENCCHAR[] = "A\0"; for(long c = 0; c < hist->tdata->elements; c++) { for(x=0; x < 256; x++) { ENCCHAR[0] = x; memset(STR_XOR, hist->tdata->elements, 0); xor_bytes_to_string(&STR_XOR, hist->tdata->blocks[c], hist->tdata->lengths[c], ENCCHAR, 1); printf( "Language: %s\n", textcat_Classify(h, STR_XOR, hist->tdata->lengths[c])); //get_score(STR_XOR, hist->tdata->lengths[c], ENCCHAR, -100, 3.0f, 0); add_betterscore(&hist, c, STR_XOR, ENCCHAR[0]); } } printf("\n"); //STR_XOR = xrealloc(STR_XOR, (hist->inputlength + 1) * sizeof(char)); //xor_bytes_to_string(&STR_XOR, hist->data, length, hist->scores->testkey, 5); printf("\n\n\n"); //puts(STR_XOR); xfree(STR_XOR); xfree(wholestring); xfree(b64string); file_o_destroy(filebuffer6); hist_o_destroy(hist); double tempf = 0; for(x = 0; x < 26; x++) tempf += pow((double) (letterscore_en[x] / 100.0), 2.0); printf("\n%0.5f\n\n", tempf); textcat_Done(h); }
void run_slave(int myrank) { void *my_tc = llamapun_textcat_Init(); char filename[FILE_NAME_SIZE]; char message[RETURN_MESSAGE_SIZE]; MPI_Status status; while (1) { MPI_Recv(&filename, FILE_NAME_SIZE, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); if (status.MPI_TAG == 0) { printf("%2d - exiting\n", myrank); break; } else if (status.MPI_TAG == 1) { //do the actual job //printf("%2d - %s\n", myrank, filename); xmlDoc *doc = read_document(filename); if (doc == NULL) { snprintf(message, RETURN_MESSAGE_SIZE, "Couldn't load document %s\n", filename); MPI_Send(message, RETURN_MESSAGE_SIZE, MPI_CHAR, /*dest = */ 0, /*tag = message */ 1, MPI_COMM_WORLD); } dnmPtr dnm = create_DNM(xmlDocGetRootElement(doc), DNM_SKIP_TAGS); if (dnm == NULL) { fprintf(stderr, "%2d - Couldn't create DNM - exiting\n", myrank); exit(1); } char *result = textcat_Classify(my_tc, dnm->plaintext, dnm->size_plaintext); if (strncmp(result, "[english]", strlen("[english]"))) { //isn't primarily english snprintf(message, RETURN_MESSAGE_SIZE, "%s\t%s\n", filename, result); MPI_Send(message, RETURN_MESSAGE_SIZE, MPI_CHAR, /*dest = */ 0, /*tag = message */ 1, MPI_COMM_WORLD); } else { snprintf(message, RETURN_MESSAGE_SIZE, "%s\tenglish\n", filename); printf("%2d - %s", myrank, message); MPI_Send(message, RETURN_MESSAGE_SIZE, MPI_CHAR, /*dest = */ 0, /*tag = nothing special */ 0, MPI_COMM_WORLD); } //clean up free_DNM(dnm); xmlFreeDoc(doc); } else { fprintf(stderr, "%2d - Error: Unkown tag: %d - exiting\n", myrank, status.MPI_TAG); break; } } //clean up textcat_Done(my_tc); xmlCleanupParser(); }
int main(int argc, char *argv[]) { my_tc = llamapun_textcat_Init(); if (my_tc == NULL) { fprintf(stderr, "Fatal: Couldn't load textcat handle\n"); exit(1); } if(argc == 1) ftw(".", parse, 1); else ftw(argv[1], parse, 1); textcat_Done(my_tc); xmlCleanupParser(); return 0; }
extern void *textcat_Init( const char *conffile ) { textcat_t *h; char line[1024]; FILE *fp; fp = fopen( conffile, "r" ); if ( !fp ) { #ifdef VERBOSE fprintf( stderr, "Failed to open config file '%s'\n", conffile); #endif return NULL; } h = (textcat_t *)wg_malloc(sizeof(textcat_t)); h->size = 0; h->maxsize = 16; h->fprint = (void **)wg_malloc( sizeof(void*) * h->maxsize ); while ( wg_getline( line, 1024, fp ) ) { char *p; char *segment[4]; int res; /*** Skip comments ***/ #ifdef HAVE_STRCHR if (( p = strchr(line,'#') )) { #else if (( p = index(line,'#') )) { #endif *p = '\0'; } if ((res = wg_split( segment, line, line, 4)) < 2 ) { continue; } /*** Ensure enough space ***/ if ( h->size == h->maxsize ) { h->maxsize *= 2; h->fprint = (void *)wg_realloc( h->fprint, sizeof(void*) * h->maxsize ); } /*** Load data ***/ if ((h->fprint[ h->size ] = fp_Init( segment[1] ))==NULL) { goto ERROR; } if ( fp_Read( h->fprint[h->size], segment[0], 400 ) == 0 ) { textcat_Done(h); goto ERROR; } h->size++; } fclose(fp); return h; ERROR: fclose(fp); return NULL; } extern char *textcat_Classify( void *handle, const char *buffer, size_t size ) { textcat_t *h = (textcat_t *)handle; uint4 i, cnt = 0; int minscore = MAXSCORE; int threshold = minscore; char *result = h->output; #ifdef HAVE_ALLOCA candidate_t *candidates = (candidate_t *)alloca( sizeof(candidate_t) * h->size ); #else candidate_t *candidates = (candidate_t *)malloc( sizeof(candidate_t) * h->size ); #define SHOULD_FREE 1 #endif void *unknown; unknown = fp_Init(NULL); if ( fp_Create( unknown, buffer, size, MAXNGRAMS ) == 0 ) { /*** Too little information ***/ result = _TEXTCAT_RESULT_SHORT; goto READY; } /*** Calculate the score for each category. ***/ for (i=0; i<h->size; i++) { int score = fp_Compare( h->fprint[i], unknown, threshold ); candidates[i].score = score; candidates[i].name = fp_Name( h->fprint[i] ); if ( score < minscore ) { minscore = score; threshold = (int)( (double)score * THRESHOLDVALUE ); } } /*** Find the best performers ***/ for (i=0; i<h->size; i++) { if ( candidates[i].score < threshold ) { if ( ++cnt == MAXCANDIDATES+1 ) { break; } memcpy( &candidates[cnt-1], &candidates[i], sizeof(candidate_t) ); } } /*** The verdict ***/ if ( cnt == MAXCANDIDATES+1 ) { result = _TEXTCAT_RESULT_UNKOWN; } else { char *p = result; char *plimit = result+MAXOUTPUTSIZE; qsort( candidates, cnt, sizeof(candidate_t), cmpcandidates ); *p = '\0'; for (i=0; i<cnt; i++) { p = wg_strgmov( p, "[", plimit ); p = wg_strgmov( p, candidates[i].name, plimit ); p = wg_strgmov( p, "]", plimit ); } } READY: fp_Done(unknown); #ifdef SHOULD_FREE free(candidates); #undef SHOULD_FREE #endif return result; }