Exemplo n.º 1
0
void close_document_loader() {
  textcat_Done(TEXT_CAT_HANDLE);
  xmlCleanupParser();
  free_tokenizer();
  close_stemmer();
  close_word_normalizer();
}
Exemplo n.º 2
0
int main(int argc, char **argv)
{
	char *conffile = "usr/share/libexttextcat/fpdb.conf";
	//char *conffile = "fpdb.conf";
	void *h = textcat_Init(conffile);

        /*      
        s1c1();
        s1c2();
        s1c3();
        s1c4();
        s1c5();
	*/
	hamming_test();
        char *filename6 = "/home/uniscon169/matasano/src/challenge6keys.txt";
        struct file_o *filebuffer6 = read_bytes(filename6);
        file_o_init(filebuffer6);
        char *b64string = xmalloc(1);
        char *wholestring = get_string(&filebuffer6);
        long length;
        char *STR_XOR = xmalloc(1);
        int keys_total = 3;
        struct histogram *hist = hist_o_init(keys_total);
        length = active_b64_decode_string(&b64string, wholestring, strlen(wholestring));
        get_keylength(&hist, 20, b64string, length, keys_total);
        transpose(&hist, &hist->tdata, &hist->data, length, hist->ham->keylength[0]);
        //hist->scores->testkey = xrealloc(hist->scores->testkey, (hist->tdata->elements + 1) * sizeof(char));
        int x;
        char ENCCHAR[] = "A\0";
        for(long c = 0; c < hist->tdata->elements; c++) {
                for(x=0; x < 256; x++) {
                        ENCCHAR[0] = x;
                        memset(STR_XOR, hist->tdata->elements, 0);
                        xor_bytes_to_string(&STR_XOR, hist->tdata->blocks[c], hist->tdata->lengths[c], ENCCHAR, 1);
			printf( "Language: %s\n", textcat_Classify(h, STR_XOR, hist->tdata->lengths[c]));
                        //get_score(STR_XOR, hist->tdata->lengths[c], ENCCHAR, -100, 3.0f, 0);
                        add_betterscore(&hist, c, STR_XOR, ENCCHAR[0]);
                }
        }
        printf("\n");
        //STR_XOR = xrealloc(STR_XOR, (hist->inputlength + 1) * sizeof(char));
        //xor_bytes_to_string(&STR_XOR, hist->data, length, hist->scores->testkey, 5);
        printf("\n\n\n");
        //puts(STR_XOR);
        xfree(STR_XOR);
        xfree(wholestring);
        xfree(b64string);
        file_o_destroy(filebuffer6);
        hist_o_destroy(hist); 
        double tempf = 0;
        for(x = 0; x < 26; x++)
                tempf += pow((double) (letterscore_en[x] / 100.0), 2.0);
        printf("\n%0.5f\n\n", tempf);
	textcat_Done(h);
}
void run_slave(int myrank) {
	void *my_tc = llamapun_textcat_Init();
	char filename[FILE_NAME_SIZE];
	char message[RETURN_MESSAGE_SIZE];
	MPI_Status status;
	while (1) {
		MPI_Recv(&filename, FILE_NAME_SIZE, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
		if (status.MPI_TAG == 0) {
			printf("%2d - exiting\n", myrank);
			break;
		} else if (status.MPI_TAG == 1) {
			//do the actual job

			//printf("%2d - %s\n", myrank, filename);

			xmlDoc *doc = read_document(filename);
			if (doc == NULL) {
				snprintf(message, RETURN_MESSAGE_SIZE, "Couldn't load document %s\n", filename);
				MPI_Send(message, RETURN_MESSAGE_SIZE, MPI_CHAR, /*dest = */ 0, /*tag = message */ 1, MPI_COMM_WORLD);
			}
			
			dnmPtr dnm = create_DNM(xmlDocGetRootElement(doc), DNM_SKIP_TAGS);

			if (dnm == NULL) {
				fprintf(stderr, "%2d - Couldn't create DNM - exiting\n", myrank);
				exit(1);
			}
			char *result = textcat_Classify(my_tc, dnm->plaintext, dnm->size_plaintext);
			if (strncmp(result, "[english]", strlen("[english]"))) {  //isn't primarily english
				snprintf(message, RETURN_MESSAGE_SIZE, "%s\t%s\n", filename, result);
				MPI_Send(message, RETURN_MESSAGE_SIZE, MPI_CHAR, /*dest = */ 0, /*tag = message */ 1, MPI_COMM_WORLD);
			}
			else {
				snprintf(message, RETURN_MESSAGE_SIZE, "%s\tenglish\n", filename);
				printf("%2d - %s", myrank, message);
				MPI_Send(message, RETURN_MESSAGE_SIZE, MPI_CHAR, /*dest = */ 0, /*tag = nothing special */ 0, MPI_COMM_WORLD);
			}

			//clean up
			free_DNM(dnm);
			xmlFreeDoc(doc);
		} else {
			fprintf(stderr, "%2d - Error: Unkown tag: %d - exiting\n", myrank, status.MPI_TAG);
			break;
		}
	}
	//clean up
	textcat_Done(my_tc);
	xmlCleanupParser();
}
Exemplo n.º 4
0
int main(int argc, char *argv[]) {
  my_tc = llamapun_textcat_Init();
  if (my_tc == NULL) {
    fprintf(stderr, "Fatal: Couldn't load textcat handle\n");
    exit(1);
  }


  if(argc == 1)
    ftw(".", parse, 1);
  else
    ftw(argv[1], parse, 1);

  textcat_Done(my_tc);
  xmlCleanupParser();
  return 0;
}
Exemplo n.º 5
0
extern void *textcat_Init( const char *conffile )
{
	textcat_t *h;
	char line[1024];
	FILE *fp;

	fp = fopen( conffile, "r" );
	if ( !fp ) {
#ifdef VERBOSE
		fprintf( stderr, "Failed to open config file '%s'\n", conffile);
#endif
		return NULL;
	}

	h = (textcat_t *)wg_malloc(sizeof(textcat_t));
	h->size = 0;
	h->maxsize = 16;
	h->fprint = (void **)wg_malloc( sizeof(void*) * h->maxsize );

	while ( wg_getline( line, 1024, fp ) ) {
		char *p;
		char *segment[4];
		int res;

		/*** Skip comments ***/
#ifdef HAVE_STRCHR
		if (( p = strchr(line,'#') )) {
#else
		if (( p = index(line,'#') )) {
#endif

			*p = '\0';
		}
		if ((res = wg_split( segment, line, line, 4)) < 2 ) {
			continue;
		}

		/*** Ensure enough space ***/
		if ( h->size == h->maxsize ) {
			h->maxsize *= 2;
			h->fprint = (void *)wg_realloc( h->fprint, sizeof(void*) * h->maxsize );
		}

		/*** Load data ***/
		if ((h->fprint[ h->size ] = fp_Init( segment[1] ))==NULL) {
			goto ERROR;
		}
		if ( fp_Read( h->fprint[h->size], segment[0], 400 ) == 0 ) {
			textcat_Done(h);
			goto ERROR;
		}		
		h->size++;
	}

	fclose(fp);
	return h;

 ERROR:
	fclose(fp);
	return NULL;

}


extern char *textcat_Classify( void *handle, const char *buffer, size_t size )
{
	textcat_t *h = (textcat_t *)handle;
	uint4 i, cnt = 0;
	int minscore = MAXSCORE;
	int threshold = minscore;
	char *result = h->output;

#ifdef HAVE_ALLOCA
	candidate_t *candidates = (candidate_t *)alloca( sizeof(candidate_t) * h->size );
#else
	candidate_t *candidates = (candidate_t *)malloc( sizeof(candidate_t) * h->size );
#define SHOULD_FREE 1
#endif

	void *unknown;

	unknown = fp_Init(NULL);
	if ( fp_Create( unknown, buffer, size, MAXNGRAMS ) == 0 ) {
		/*** Too little information ***/
		result = _TEXTCAT_RESULT_SHORT;
		goto READY;
	}
	
	/*** Calculate the score for each category. ***/
	for (i=0; i<h->size; i++) {
		int score = fp_Compare( h->fprint[i], unknown, threshold );
		candidates[i].score = score;
		candidates[i].name = fp_Name( h->fprint[i] );
		if ( score < minscore ) {
			minscore = score;
			threshold = (int)( (double)score * THRESHOLDVALUE );
		}
	}

	/*** Find the best performers ***/
	for (i=0; i<h->size; i++) {
		if ( candidates[i].score < threshold ) {

			if ( ++cnt == MAXCANDIDATES+1 ) {
				break;
			}

			memcpy( &candidates[cnt-1], &candidates[i], sizeof(candidate_t) );

		}
	}

	/*** The verdict ***/
	if ( cnt == MAXCANDIDATES+1 ) {
		result = _TEXTCAT_RESULT_UNKOWN;
	}
	else {
		char *p = result;
		char *plimit = result+MAXOUTPUTSIZE;
		
		qsort( candidates, cnt, sizeof(candidate_t), cmpcandidates );

		*p = '\0';
		for (i=0; i<cnt; i++) {
			p = wg_strgmov( p, "[", plimit );
			p = wg_strgmov( p, candidates[i].name, plimit );
			p = wg_strgmov( p, "]", plimit );
		}
	}
 READY:
	fp_Done(unknown);
#ifdef SHOULD_FREE 
	free(candidates);
#undef SHOULD_FREE
#endif
	return result;
}