int main(int argc, char *argv[]) { ex_t rc = EX_OK; bfpath *bfp; bfpath_mode mode; fBogoutil = true; signal_setup(); /* setup to catch signals */ atexit(bf_exit); progtype = build_progtype(progname, DB_TYPE); set_today(); /* compute current date for token age */ process_arglist(argc, argv); process_config_files(false, longopts_bogoutil); /* need to read lock sizes */ /* Extra or missing parameters */ if (flag != M_WORD && flag != M_LIST_LOGFILES && argc != optind) { fprintf(stderr, "Missing or extraneous argument.\n"); usage(stderr); exit(EX_ERROR); } bfp = bfpath_create(ds_file); if (bogohome == NULL) set_bogohome( "." ); /* set default */ bfpath_set_bogohome(bfp); mode = get_mode(flag); if (bfpath_check_mode(bfp, mode)) { if (bfp->isdir) bfpath_set_filename(bfp, WORDLIST); } if (!bfpath_check_mode(bfp, mode)) { fprintf(stderr, "Can't open wordlist '%s'\n", bfp->filepath); exit(EX_ERROR); } errno = 0; /* clear error status */ switch (flag) { case M_RECOVER: ds_init(bfp); rc = ds_recover(bfp, false); break; case M_CRECOVER: ds_init(bfp); rc = ds_recover(bfp, true); break; case M_CHECKPOINT: ds_init(bfp); rc = ds_checkpoint(bfp); break; case M_LIST_LOGFILES: dsm_init(bfp); rc = ds_list_logfiles(bfp, argc - optind, argv + optind); break; case M_PURGELOGS: ds_init(bfp); rc = ds_purgelogs(bfp); break; case M_REMOVEENV: dsm_init(bfp); rc = ds_remove(bfp); break; case M_VERIFY: dsm_init(bfp); rc = ds_verify(bfp); break; case M_LEAFPAGES: { u_int32_t c; dsm_init(bfp); c = ds_leafpages(bfp); if (c == 0xffffffff) { fprintf(stderr, "%s: error getting leaf page count.\n", ds_file); rc = EX_ERROR; } else if (c == 0) { puts("UNKNOWN"); } else { printf("%lu\n", (unsigned long)c); } } break; case M_PAGESIZE: { u_int32_t s; dsm_init(bfp); s = ds_pagesize(bfp); if (s == 0xffffffff) { fprintf(stderr, "%s: error getting page size.\n", ds_file); } else if (s == 0) { puts("UNKNOWN"); } else { printf("%lu\n", (unsigned long)s); } } break; case M_DUMP: rc = dump_wordlist(bfp); break; case M_LOAD: rc = load_wordlist(bfp) ? EX_ERROR : EX_OK; break; case M_MAINTAIN: maintain = true; rc = maintain_wordlist_file(bfp); break; case M_WORD: argc -= optind; argv += optind; rc = display_words(bfp, argc, argv, prob); break; case M_HIST: rc = histogram(bfp); break; case M_ROBX: rc = get_robx(bfp); break; case M_NONE: default: /* should have been handled above */ abort(); break; } bfpath_free(bfp); return rc; }
int main(int argc, char **argv) /*@globals errno,stderr,stdout@*/ { ex_t exitcode = EX_OK; fBogotune = true; /* for rob_compute_spamicity() */ dbgout = stderr; progtype = build_progtype(progname, DB_TYPE); ham_files = filelist_new("ham"); spam_files = filelist_new("spam"); /* process args and read mailboxes */ process_arglist(argc, argv); /* directories from command line and config file are already handled */ if (ds_flag == DS_DSK) { bfpath *bfp; if (ds_path == NULL) ds_path = get_directory(PR_ENV_BOGO); if (ds_path == NULL) ds_path = get_directory(PR_ENV_HOME); if (ds_path == NULL) { fprintf(stderr, "Cannot derive bogofilter directory from environment, aborting.\n"); exit(EX_ERROR); } set_bogohome(ds_path); bfp = bfpath_create(ds_path); if (!bfpath_check_mode(bfp, BFP_MUST_EXIST)) { fprintf(stderr, "Can't open wordlist '%s'\n", bfp->filepath); exit(EX_ERROR); } if (bfp->exists && bfp->isdir) { bfpath_free(bfp); ds_path = mxcat(ds_path, DIRSEP_S, WORDLIST, NULL); bfp = bfpath_create(ds_path); if (!bfpath_check_mode(bfp, BFP_MUST_EXIST)) { fprintf(stderr, "Can't open wordlist '%s'\n", bfp->filepath); exit(EX_ERROR); } } env = ds_init(bfp); init_wordlist("word", ds_path, 0, WL_REGULAR); } bogotune_init(); if (ds_flag == DS_DSK) load_wordlist(load_hook, train); /* if encoding not yet set, assume old style */ if (encoding == E_UNKNOWN) encoding = E_RAW; if (bogolex_file != NULL) bogolex(); else bogotune(); bogotune_free(); if (ds_flag == DS_DSK) ds_cleanup(env); exit(exitcode); }
void eval() { const char *mach_fname_cstr = CONF_mach_file.c_str(); Mach *mlp; struct stat stat_struct; if (stat(mach_fname_cstr, &stat_struct)==0) { // read existing network ifstream ifs; ifs.open(mach_fname_cstr,ios::binary); CHECK_FILE(ifs,mach_fname_cstr); mlp = Mach::Read(ifs); ifs.close(); cout << "Found existing machine" << endl; } else { Error("No such machine for eval."); } //evaluating the test files cout << "Now evaluating the test file..." << endl; CONLLReader* reader = new CONLLReader(); CONLLWriter* writer = new CONLLWriter(); reader->startReading(CONF_test_file.c_str()); writer->startWriting(CONF_output_file.c_str()); //the list HashMap * wl = load_wordlist(CONF_vocab_file.c_str()); #ifdef INSANE_DEBUG FILE* x_file = fdopen(3,"w"); #endif //some variables int oov_num = 0; //out of vocabulary int sen_num = 0; //sentence number int token_num = 0; //token number int miss_count = 0; //only work if the testing file already have answers int TIME_start = clock() / 1000; int TIME_start_fine = 0; //calculate DependencyInstance* x = reader->getNext(); while(x != NULL){ if(sen_num%1000 == 0){ cout << "Having processed " << sen_num << ";period is "<< (clock()/1000)-TIME_start_fine << "ms."<< endl; TIME_start_fine = clock() / 1000; } sen_num++; int length = x->forms->size(); token_num += length - 1; double *tmp_scores = new double[length*length*2]; //construct scores using nn int num_pair = length*(length-1); //2 * (0+(l-1))*l/2 REAL *mach_x = new REAL[num_pair*IND_CONF_x_dim_final]; REAL *mach_y = new REAL[num_pair*CONF_y_class_size]; int *word_index = get_word_index(length,x,wl,&oov_num); int pair_count = 0; REAL* assign_x = mach_x; for(int ii=0;ii<length;ii++){ for(int j=ii+1;j<length;j++){ for(int lr=0;lr<2;lr++){ //build mach_x if(lr==E_RIGHT) fill_feature(length,ii,j,word_index,assign_x); else fill_feature(length,j,ii,word_index,assign_x); assign_x += IND_CONF_x_dim_final; } } } //- give it to nn mlp->evaluate(mach_x,mach_y,num_pair,IND_CONF_x_dim_final,CONF_y_class_size); REAL* assign_y = mach_y; for(int ii=0;ii<length;ii++){ for(int j=ii+1;j<length;j++){ for(int lr=0;lr<2;lr++){ int index = get_index2(length,ii,j,lr); //important ... double temp = 0; if(CONF_if_y_calss){ for(int c=0;c<CONF_y_class_size;c++) temp += (*assign_y++)*c; tmp_scores[index] = temp; } else tmp_scores[index] = *assign_y++; } } } //- decode and write vector<int> *ret = decodeProjective(length,tmp_scores); for(int i2=1;i2<length;i2++){ //ignore root if((*ret)[i2] != (*(x->heads))[i2]) miss_count ++; } delete x->heads; x->heads = ret; writer->write(x); delete x; delete []tmp_scores; #ifdef INSANE_DEBUG fprintf(x_file,"Sentence %d:\n",sen_num); for(int i=0;i<num_pair;i++){ for(int j=0;j<IND_CONF_x_dim_final;j++) fprintf(x_file,"%d ",(int)mach_x[i*IND_CONF_x_dim_final+j]); fprintf(x_file,"\n"); } #endif delete []mach_x; delete []mach_y; delete []word_index; x = reader->getNext(); } #ifdef INSANE_DEBUG fclose(x_file); #endif reader->finishReading(); writer->finishWriting(); delete reader; delete writer; cout << "Finished testing in " << (clock()/1000-TIME_start) << "ms" << endl; //conclude and evaluate cout << "Testing data description:\n" << "Sentences: " << sen_num << '\n' << "Tokens: " << token_num << '\n' << "OOV token: " << oov_num << '\n' << "Miss token: " << miss_count << endl; string t; DependencyEvaluator::evaluate(CONF_gold_file,CONF_output_file,t,false); }