Пример #1
0
int main(int argc, char *argv[])
{
    ex_t rc = EX_OK;

    bfpath *bfp;
    bfpath_mode mode;

    fBogoutil = true;

    signal_setup();			/* setup to catch signals */
    atexit(bf_exit);

    progtype = build_progtype(progname, DB_TYPE);

    set_today();			/* compute current date for token age */

    process_arglist(argc, argv);
    process_config_files(false, longopts_bogoutil);	/* need to read lock sizes */

    /* Extra or missing parameters */
    if (flag != M_WORD && flag != M_LIST_LOGFILES && argc != optind) {
	fprintf(stderr, "Missing or extraneous argument.\n");
	usage(stderr);
	exit(EX_ERROR);
    }

    bfp = bfpath_create(ds_file);
    if (bogohome == NULL)
	set_bogohome( "." );		/* set default */

    bfpath_set_bogohome(bfp);

    mode = get_mode(flag);
    if (bfpath_check_mode(bfp, mode)) {
	if (bfp->isdir)
	    bfpath_set_filename(bfp, WORDLIST);
    }

    if (!bfpath_check_mode(bfp, mode)) {
	fprintf(stderr, "Can't open wordlist '%s'\n", bfp->filepath);
	exit(EX_ERROR);
    }

    errno = 0;		/* clear error status */

    switch (flag) {
	case M_RECOVER:
	    ds_init(bfp);
	    rc = ds_recover(bfp, false);
	    break;
	case M_CRECOVER:
	    ds_init(bfp);
	    rc = ds_recover(bfp, true);
	    break;
	case M_CHECKPOINT:
	    ds_init(bfp);
	    rc = ds_checkpoint(bfp);
	    break;
	case M_LIST_LOGFILES:
	    dsm_init(bfp);
	    rc = ds_list_logfiles(bfp, argc - optind, argv + optind);
	    break;
	case M_PURGELOGS:
	    ds_init(bfp);
	    rc = ds_purgelogs(bfp);
	    break;
	case M_REMOVEENV:
	    dsm_init(bfp);
	    rc = ds_remove(bfp);
	    break;
	case M_VERIFY:
	    dsm_init(bfp);
	    rc = ds_verify(bfp);
	    break;
	case M_LEAFPAGES:
	    {
		u_int32_t c;

		dsm_init(bfp);
		c = ds_leafpages(bfp);
		if (c == 0xffffffff) {
		    fprintf(stderr, "%s: error getting leaf page count.\n", ds_file);
		    rc = EX_ERROR;
		} else if (c == 0) {
		    puts("UNKNOWN");
		} else {
		    printf("%lu\n", (unsigned long)c);
		}
	    }
	    break;
	case M_PAGESIZE:
	    {
		u_int32_t s;

		dsm_init(bfp);
		s = ds_pagesize(bfp);
		if (s == 0xffffffff) {
		    fprintf(stderr, "%s: error getting page size.\n", ds_file);
		} else if (s == 0) {
		    puts("UNKNOWN");
		} else {
		    printf("%lu\n", (unsigned long)s);
		}
	    }
	    break;
	case M_DUMP:
	    rc = dump_wordlist(bfp);
	    break;
	case M_LOAD:
	    rc = load_wordlist(bfp) ? EX_ERROR : EX_OK;
	    break;
	case M_MAINTAIN:
	    maintain = true;
	    rc = maintain_wordlist_file(bfp);
	    break;
	case M_WORD:
	    argc -= optind;
	    argv += optind;
	    rc = display_words(bfp, argc, argv, prob);
	    break;
	case M_HIST:
	    rc = histogram(bfp);
	    break;
	case M_ROBX:
	    rc = get_robx(bfp);
	    break;
	case M_NONE:
	default:
	    /* should have been handled above */
	    abort();
	    break;
    }

    bfpath_free(bfp);

    return rc;
}
Пример #2
0
int main(int argc, char **argv) /*@globals errno,stderr,stdout@*/
{
    ex_t exitcode = EX_OK;

    fBogotune = true;		/* for rob_compute_spamicity() */

    dbgout = stderr;

    progtype = build_progtype(progname, DB_TYPE);

    ham_files  = filelist_new("ham");
    spam_files = filelist_new("spam");

    /* process args and read mailboxes */
    process_arglist(argc, argv);

    /* directories from command line and config file are already handled */
    if (ds_flag == DS_DSK) {

	bfpath *bfp;

	if (ds_path == NULL)
	    ds_path = get_directory(PR_ENV_BOGO);
	if (ds_path == NULL)
	    ds_path = get_directory(PR_ENV_HOME);

	if (ds_path == NULL) {
	    fprintf(stderr, "Cannot derive bogofilter directory from environment, aborting.\n");
	    exit(EX_ERROR);
	}

	set_bogohome(ds_path);
	bfp = bfpath_create(ds_path);

	if (!bfpath_check_mode(bfp, BFP_MUST_EXIST)) {
	    fprintf(stderr, "Can't open wordlist '%s'\n", bfp->filepath);
	    exit(EX_ERROR);
	}

	if (bfp->exists && bfp->isdir) {
	    bfpath_free(bfp);
	    ds_path = mxcat(ds_path, DIRSEP_S, WORDLIST, NULL);	
	    bfp = bfpath_create(ds_path);
	    if (!bfpath_check_mode(bfp, BFP_MUST_EXIST)) {
		fprintf(stderr, "Can't open wordlist '%s'\n", bfp->filepath);
		exit(EX_ERROR);
	    }
	}

	env = ds_init(bfp);
	
	init_wordlist("word", ds_path, 0, WL_REGULAR);
    }

    bogotune_init();

    if (ds_flag == DS_DSK)
	load_wordlist(load_hook, train);

    /* if encoding not yet set, assume old style */
    if (encoding == E_UNKNOWN)
	encoding = E_RAW;

    if (bogolex_file != NULL)
	bogolex();
    else
	bogotune();

    bogotune_free();

    if (ds_flag == DS_DSK)
	ds_cleanup(env);

    exit(exitcode);
}
Пример #3
0
void eval()
{
	const char *mach_fname_cstr = CONF_mach_file.c_str();
	Mach *mlp;
	struct stat stat_struct;
	if (stat(mach_fname_cstr, &stat_struct)==0) {
		// read existing network
	    ifstream ifs;
	    ifs.open(mach_fname_cstr,ios::binary);
	    CHECK_FILE(ifs,mach_fname_cstr);
	    mlp = Mach::Read(ifs);
	    ifs.close();
	    cout << "Found existing machine" << endl;
	}
	else {
		Error("No such machine for eval.");
	}

  	//evaluating the test files
  	cout << "Now evaluating the test file..." << endl;
  	CONLLReader* reader = new CONLLReader();
  	CONLLWriter* writer = new CONLLWriter();
  	reader->startReading(CONF_test_file.c_str());
  	writer->startWriting(CONF_output_file.c_str());
  	//the list
  	HashMap * wl = load_wordlist(CONF_vocab_file.c_str());
#ifdef INSANE_DEBUG
  	FILE* x_file = fdopen(3,"w");
#endif

  	//some variables
  	int oov_num = 0;	//out of vocabulary
  	int sen_num = 0;	//sentence number
  	int token_num = 0;	//token number
  	int miss_count = 0;	//only work if the testing file already have answers
  	int TIME_start = clock() / 1000;
  	int TIME_start_fine = 0;
  	//calculate
	DependencyInstance* x = reader->getNext();
	while(x != NULL){
		if(sen_num%1000 == 0){
			cout << "Having processed " << sen_num << ";period is "<<
					(clock()/1000)-TIME_start_fine << "ms."<< endl;
			TIME_start_fine = clock() / 1000;
		}
		sen_num++;
		int length = x->forms->size();
		token_num += length - 1;
		double *tmp_scores = new double[length*length*2];

		//construct scores using nn
		int num_pair = length*(length-1);	//2 * (0+(l-1))*l/2
		REAL *mach_x = new REAL[num_pair*IND_CONF_x_dim_final];
		REAL *mach_y = new REAL[num_pair*CONF_y_class_size];
		int *word_index = get_word_index(length,x,wl,&oov_num);

		int pair_count = 0;
		REAL* assign_x = mach_x;
		for(int ii=0;ii<length;ii++){
			for(int j=ii+1;j<length;j++){
				for(int lr=0;lr<2;lr++){
					//build mach_x
					if(lr==E_RIGHT)
						fill_feature(length,ii,j,word_index,assign_x);
					else
						fill_feature(length,j,ii,word_index,assign_x);
					assign_x += IND_CONF_x_dim_final;
				}
			}
		}
		//- give it to nn
		mlp->evaluate(mach_x,mach_y,num_pair,IND_CONF_x_dim_final,CONF_y_class_size);
		REAL* assign_y = mach_y;
		for(int ii=0;ii<length;ii++){
			for(int j=ii+1;j<length;j++){
				for(int lr=0;lr<2;lr++){
					int index = get_index2(length,ii,j,lr);
					//important ...
					double temp = 0;
					if(CONF_if_y_calss){
						for(int c=0;c<CONF_y_class_size;c++)
							temp += (*assign_y++)*c;
						tmp_scores[index] = temp;
					}
					else
						tmp_scores[index] = *assign_y++;
				}
			}
		}
		//- decode and write
		vector<int> *ret = decodeProjective(length,tmp_scores);
		for(int i2=1;i2<length;i2++){	//ignore root
			if((*ret)[i2] != (*(x->heads))[i2])
				miss_count ++;
		}
		delete x->heads;
		x->heads = ret;
		writer->write(x);
		delete x;
		delete []tmp_scores;
#ifdef INSANE_DEBUG
		fprintf(x_file,"Sentence %d:\n",sen_num);
  		for(int i=0;i<num_pair;i++){
  			for(int j=0;j<IND_CONF_x_dim_final;j++)
  				fprintf(x_file,"%d ",(int)mach_x[i*IND_CONF_x_dim_final+j]);
  			fprintf(x_file,"\n");
  		}
#endif
		delete []mach_x;
		delete []mach_y;
		delete []word_index;
		x = reader->getNext();
	}
#ifdef INSANE_DEBUG
	fclose(x_file);
#endif
	reader->finishReading();
	writer->finishWriting();
	delete reader;
	delete writer;
	cout << "Finished testing in " << (clock()/1000-TIME_start) << "ms" << endl;

	//conclude and evaluate
	cout << "Testing data description:\n"
			<< "Sentences: " << sen_num << '\n'
			<< "Tokens: " << token_num << '\n'
			<< "OOV token: " << oov_num << '\n'
			<< "Miss token: " << miss_count << endl;
	string t;
	DependencyEvaluator::evaluate(CONF_gold_file,CONF_output_file,t,false);
}